Files
deerflow-factory/deer-flow/scripts/config-upgrade.sh
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

156 lines
5.1 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# config-upgrade.sh - Upgrade config.yaml to match config.example.yaml
#
# 1. Runs version-specific migrations (value replacements, renames, etc.)
# 2. Merges missing fields from the example into the user config
# 3. Backs up config.yaml to config.yaml.bak before modifying.
set -e
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
EXAMPLE="$REPO_ROOT/config.example.yaml"
# Resolve config.yaml location: env var > backend/ > repo root
if [ -n "$DEER_FLOW_CONFIG_PATH" ] && [ -f "$DEER_FLOW_CONFIG_PATH" ]; then
CONFIG="$DEER_FLOW_CONFIG_PATH"
elif [ -f "$REPO_ROOT/backend/config.yaml" ]; then
CONFIG="$REPO_ROOT/backend/config.yaml"
elif [ -f "$REPO_ROOT/config.yaml" ]; then
CONFIG="$REPO_ROOT/config.yaml"
else
CONFIG=""
fi
if [ ! -f "$EXAMPLE" ]; then
echo "✗ config.example.yaml not found at $EXAMPLE"
exit 1
fi
if [ -z "$CONFIG" ]; then
echo "No config.yaml found — creating from example..."
cp "$EXAMPLE" "$REPO_ROOT/config.yaml"
echo "OK config.yaml created. Please review and set your API keys."
exit 0
fi
# Use inline Python to do migrations + recursive merge with PyYAML
if command -v cygpath >/dev/null 2>&1; then
CONFIG_WIN="$(cygpath -w "$CONFIG")"
EXAMPLE_WIN="$(cygpath -w "$EXAMPLE")"
else
CONFIG_WIN="$CONFIG"
EXAMPLE_WIN="$EXAMPLE"
fi
cd "$REPO_ROOT/backend" && CONFIG_WIN_PATH="$CONFIG_WIN" EXAMPLE_WIN_PATH="$EXAMPLE_WIN" uv run python -c "
import os
import sys, shutil, copy, re
from pathlib import Path
import yaml
config_path = Path(os.environ['CONFIG_WIN_PATH'])
example_path = Path(os.environ['EXAMPLE_WIN_PATH'])
with open(config_path, encoding='utf-8') as f:
raw_text = f.read()
user = yaml.safe_load(raw_text) or {}
with open(example_path, encoding='utf-8') as f:
example = yaml.safe_load(f) or {}
user_version = user.get('config_version', 0)
example_version = example.get('config_version', 0)
if user_version >= example_version:
print(f'OK config.yaml is already up to date (version {user_version}).')
sys.exit(0)
print(f'Upgrading config.yaml: version {user_version} -> {example_version}')
print()
# ── Migrations ───────────────────────────────────────────────────────────
# Each migration targets a specific version upgrade.
# 'replacements': list of (old_string, new_string) applied to the raw YAML text.
# This handles value changes that a dict merge cannot catch.
MIGRATIONS = {
1: {
'description': 'Rename src.* module paths to deerflow.*',
'replacements': [
('src.community.', 'deerflow.community.'),
('src.sandbox.', 'deerflow.sandbox.'),
('src.models.', 'deerflow.models.'),
('src.tools.', 'deerflow.tools.'),
],
},
# Future migrations go here:
# 2: {
# 'description': '...',
# 'replacements': [('old', 'new')],
# },
}
# Apply migrations in order for versions (user_version, example_version]
migrated = []
for version in range(user_version + 1, example_version + 1):
migration = MIGRATIONS.get(version)
if not migration:
continue
desc = migration.get('description', f'Migration to v{version}')
for old, new in migration.get('replacements', []):
if old in raw_text:
raw_text = raw_text.replace(old, new)
migrated.append(f'{old} -> {new}')
# Re-parse after text migrations
user = yaml.safe_load(raw_text) or {}
if migrated:
print(f'Applied {len(migrated)} migration(s):')
for m in migrated:
print(f' ~ {m}')
print()
# ── Merge missing fields ─────────────────────────────────────────────────
added = []
def merge(target, source, path=''):
\"\"\"Recursively merge source into target, adding missing keys only.\"\"\"
for key, value in source.items():
key_path = f'{path}.{key}' if path else key
if key not in target:
target[key] = copy.deepcopy(value)
added.append(key_path)
elif isinstance(value, dict) and isinstance(target[key], dict):
merge(target[key], value, key_path)
merge(user, example)
# Always update config_version
user['config_version'] = example_version
# ── Write ─────────────────────────────────────────────────────────────────
backup = config_path.with_suffix('.yaml.bak')
shutil.copy2(config_path, backup)
print(f'Backed up to {backup.name}')
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(user, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
if added:
print(f'Added {len(added)} new field(s):')
for a in added:
print(f' + {a}')
if not migrated and not added:
print('No changes needed (version bumped only).')
print()
print(f'OK config.yaml upgraded to version {example_version}.')
print(' Please review the changes and set any new required values.')
"