Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
81
deer-flow/scripts/load_memory_sample.py
Normal file
81
deer-flow/scripts/load_memory_sample.py
Normal file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Load the Memory Settings review sample into a local DeerFlow runtime."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def default_source(repo_root: Path) -> Path:
|
||||
return repo_root / "backend" / "docs" / "memory-settings-sample.json"
|
||||
|
||||
|
||||
def default_target(repo_root: Path) -> Path:
|
||||
return repo_root / "backend" / ".deer-flow" / "memory.json"
|
||||
|
||||
|
||||
def parse_args(repo_root: Path) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Copy the Memory Settings sample data into the local runtime memory file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source",
|
||||
type=Path,
|
||||
default=default_source(repo_root),
|
||||
help="Path to the sample JSON file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target",
|
||||
type=Path,
|
||||
default=default_target(repo_root),
|
||||
help="Path to the runtime memory.json file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-backup",
|
||||
action="store_true",
|
||||
help="Overwrite the target without writing a backup copy first.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def validate_json_file(path: Path) -> None:
|
||||
with path.open(encoding="utf-8") as handle:
|
||||
json.load(handle)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
args = parse_args(repo_root)
|
||||
|
||||
source = args.source.resolve()
|
||||
target = args.target.resolve()
|
||||
|
||||
if not source.exists():
|
||||
raise SystemExit(f"Sample file not found: {source}")
|
||||
|
||||
validate_json_file(source)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
backup_path: Path | None = None
|
||||
if target.exists() and not args.no_backup:
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
backup_path = target.with_name(f"{target.name}.bak-{timestamp}")
|
||||
shutil.copy2(target, backup_path)
|
||||
|
||||
shutil.copy2(source, target)
|
||||
|
||||
print(f"Loaded sample memory into: {target}")
|
||||
if backup_path is not None:
|
||||
print(f"Backup created at: {backup_path}")
|
||||
else:
|
||||
print("No backup created.")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user