Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
82 lines
2.1 KiB
Python
82 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Load the Memory Settings review sample into a local DeerFlow runtime."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import shutil
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
|
|
def default_source(repo_root: Path) -> Path:
|
|
return repo_root / "backend" / "docs" / "memory-settings-sample.json"
|
|
|
|
|
|
def default_target(repo_root: Path) -> Path:
|
|
return repo_root / "backend" / ".deer-flow" / "memory.json"
|
|
|
|
|
|
def parse_args(repo_root: Path) -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Copy the Memory Settings sample data into the local runtime memory file.",
|
|
)
|
|
parser.add_argument(
|
|
"--source",
|
|
type=Path,
|
|
default=default_source(repo_root),
|
|
help="Path to the sample JSON file.",
|
|
)
|
|
parser.add_argument(
|
|
"--target",
|
|
type=Path,
|
|
default=default_target(repo_root),
|
|
help="Path to the runtime memory.json file.",
|
|
)
|
|
parser.add_argument(
|
|
"--no-backup",
|
|
action="store_true",
|
|
help="Overwrite the target without writing a backup copy first.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def validate_json_file(path: Path) -> None:
|
|
with path.open(encoding="utf-8") as handle:
|
|
json.load(handle)
|
|
|
|
|
|
def main() -> int:
|
|
repo_root = Path(__file__).resolve().parents[1]
|
|
args = parse_args(repo_root)
|
|
|
|
source = args.source.resolve()
|
|
target = args.target.resolve()
|
|
|
|
if not source.exists():
|
|
raise SystemExit(f"Sample file not found: {source}")
|
|
|
|
validate_json_file(source)
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
backup_path: Path | None = None
|
|
if target.exists() and not args.no_backup:
|
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
backup_path = target.with_name(f"{target.name}.bak-{timestamp}")
|
|
shutil.copy2(target, backup_path)
|
|
|
|
shutil.copy2(source, target)
|
|
|
|
print(f"Loaded sample memory into: {target}")
|
|
if backup_path is not None:
|
|
print(f"Backup created at: {backup_path}")
|
|
else:
|
|
print("No backup created.")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|