Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
155
deer-flow/scripts/config-upgrade.sh
Executable file
155
deer-flow/scripts/config-upgrade.sh
Executable file
@@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# config-upgrade.sh - Upgrade config.yaml to match config.example.yaml
|
||||
#
|
||||
# 1. Runs version-specific migrations (value replacements, renames, etc.)
|
||||
# 2. Merges missing fields from the example into the user config
|
||||
# 3. Backs up config.yaml to config.yaml.bak before modifying.
|
||||
|
||||
set -e
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
EXAMPLE="$REPO_ROOT/config.example.yaml"
|
||||
|
||||
# Resolve config.yaml location: env var > backend/ > repo root
|
||||
if [ -n "$DEER_FLOW_CONFIG_PATH" ] && [ -f "$DEER_FLOW_CONFIG_PATH" ]; then
|
||||
CONFIG="$DEER_FLOW_CONFIG_PATH"
|
||||
elif [ -f "$REPO_ROOT/backend/config.yaml" ]; then
|
||||
CONFIG="$REPO_ROOT/backend/config.yaml"
|
||||
elif [ -f "$REPO_ROOT/config.yaml" ]; then
|
||||
CONFIG="$REPO_ROOT/config.yaml"
|
||||
else
|
||||
CONFIG=""
|
||||
fi
|
||||
|
||||
if [ ! -f "$EXAMPLE" ]; then
|
||||
echo "✗ config.example.yaml not found at $EXAMPLE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$CONFIG" ]; then
|
||||
echo "No config.yaml found — creating from example..."
|
||||
cp "$EXAMPLE" "$REPO_ROOT/config.yaml"
|
||||
echo "OK config.yaml created. Please review and set your API keys."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Use inline Python to do migrations + recursive merge with PyYAML
|
||||
if command -v cygpath >/dev/null 2>&1; then
|
||||
CONFIG_WIN="$(cygpath -w "$CONFIG")"
|
||||
EXAMPLE_WIN="$(cygpath -w "$EXAMPLE")"
|
||||
else
|
||||
CONFIG_WIN="$CONFIG"
|
||||
EXAMPLE_WIN="$EXAMPLE"
|
||||
fi
|
||||
|
||||
cd "$REPO_ROOT/backend" && CONFIG_WIN_PATH="$CONFIG_WIN" EXAMPLE_WIN_PATH="$EXAMPLE_WIN" uv run python -c "
|
||||
import os
|
||||
import sys, shutil, copy, re
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
config_path = Path(os.environ['CONFIG_WIN_PATH'])
|
||||
example_path = Path(os.environ['EXAMPLE_WIN_PATH'])
|
||||
|
||||
with open(config_path, encoding='utf-8') as f:
|
||||
raw_text = f.read()
|
||||
user = yaml.safe_load(raw_text) or {}
|
||||
|
||||
with open(example_path, encoding='utf-8') as f:
|
||||
example = yaml.safe_load(f) or {}
|
||||
|
||||
user_version = user.get('config_version', 0)
|
||||
example_version = example.get('config_version', 0)
|
||||
|
||||
if user_version >= example_version:
|
||||
print(f'OK config.yaml is already up to date (version {user_version}).')
|
||||
sys.exit(0)
|
||||
|
||||
print(f'Upgrading config.yaml: version {user_version} -> {example_version}')
|
||||
print()
|
||||
|
||||
# ── Migrations ───────────────────────────────────────────────────────────
|
||||
# Each migration targets a specific version upgrade.
|
||||
# 'replacements': list of (old_string, new_string) applied to the raw YAML text.
|
||||
# This handles value changes that a dict merge cannot catch.
|
||||
|
||||
MIGRATIONS = {
|
||||
1: {
|
||||
'description': 'Rename src.* module paths to deerflow.*',
|
||||
'replacements': [
|
||||
('src.community.', 'deerflow.community.'),
|
||||
('src.sandbox.', 'deerflow.sandbox.'),
|
||||
('src.models.', 'deerflow.models.'),
|
||||
('src.tools.', 'deerflow.tools.'),
|
||||
],
|
||||
},
|
||||
# Future migrations go here:
|
||||
# 2: {
|
||||
# 'description': '...',
|
||||
# 'replacements': [('old', 'new')],
|
||||
# },
|
||||
}
|
||||
|
||||
# Apply migrations in order for versions (user_version, example_version]
|
||||
migrated = []
|
||||
for version in range(user_version + 1, example_version + 1):
|
||||
migration = MIGRATIONS.get(version)
|
||||
if not migration:
|
||||
continue
|
||||
desc = migration.get('description', f'Migration to v{version}')
|
||||
for old, new in migration.get('replacements', []):
|
||||
if old in raw_text:
|
||||
raw_text = raw_text.replace(old, new)
|
||||
migrated.append(f'{old} -> {new}')
|
||||
|
||||
# Re-parse after text migrations
|
||||
user = yaml.safe_load(raw_text) or {}
|
||||
|
||||
if migrated:
|
||||
print(f'Applied {len(migrated)} migration(s):')
|
||||
for m in migrated:
|
||||
print(f' ~ {m}')
|
||||
print()
|
||||
|
||||
# ── Merge missing fields ─────────────────────────────────────────────────
|
||||
|
||||
added = []
|
||||
|
||||
def merge(target, source, path=''):
|
||||
\"\"\"Recursively merge source into target, adding missing keys only.\"\"\"
|
||||
for key, value in source.items():
|
||||
key_path = f'{path}.{key}' if path else key
|
||||
if key not in target:
|
||||
target[key] = copy.deepcopy(value)
|
||||
added.append(key_path)
|
||||
elif isinstance(value, dict) and isinstance(target[key], dict):
|
||||
merge(target[key], value, key_path)
|
||||
|
||||
merge(user, example)
|
||||
|
||||
# Always update config_version
|
||||
user['config_version'] = example_version
|
||||
|
||||
# ── Write ─────────────────────────────────────────────────────────────────
|
||||
|
||||
backup = config_path.with_suffix('.yaml.bak')
|
||||
shutil.copy2(config_path, backup)
|
||||
print(f'Backed up to {backup.name}')
|
||||
|
||||
with open(config_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(user, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
|
||||
if added:
|
||||
print(f'Added {len(added)} new field(s):')
|
||||
for a in added:
|
||||
print(f' + {a}')
|
||||
|
||||
if not migrated and not added:
|
||||
print('No changes needed (version bumped only).')
|
||||
|
||||
print()
|
||||
print(f'OK config.yaml upgraded to version {example_version}.')
|
||||
print(' Please review the changes and set any new required values.')
|
||||
"
|
||||
Reference in New Issue
Block a user