Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
82 lines
2.7 KiB
Python
82 lines
2.7 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from deerflow.config.app_config import get_app_config, reset_app_config
|
|
|
|
|
|
def _write_config(path: Path, *, model_name: str, supports_thinking: bool) -> None:
|
|
path.write_text(
|
|
yaml.safe_dump(
|
|
{
|
|
"sandbox": {"use": "deerflow.sandbox.local:LocalSandboxProvider"},
|
|
"models": [
|
|
{
|
|
"name": model_name,
|
|
"use": "langchain_openai:ChatOpenAI",
|
|
"model": "gpt-test",
|
|
"supports_thinking": supports_thinking,
|
|
}
|
|
],
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def _write_extensions_config(path: Path) -> None:
|
|
path.write_text(json.dumps({"mcpServers": {}, "skills": {}}), encoding="utf-8")
|
|
|
|
|
|
def test_get_app_config_reloads_when_file_changes(tmp_path, monkeypatch):
|
|
config_path = tmp_path / "config.yaml"
|
|
extensions_path = tmp_path / "extensions_config.json"
|
|
_write_extensions_config(extensions_path)
|
|
_write_config(config_path, model_name="first-model", supports_thinking=False)
|
|
|
|
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_path))
|
|
monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
|
|
reset_app_config()
|
|
|
|
try:
|
|
initial = get_app_config()
|
|
assert initial.models[0].supports_thinking is False
|
|
|
|
_write_config(config_path, model_name="first-model", supports_thinking=True)
|
|
next_mtime = config_path.stat().st_mtime + 5
|
|
os.utime(config_path, (next_mtime, next_mtime))
|
|
|
|
reloaded = get_app_config()
|
|
assert reloaded.models[0].supports_thinking is True
|
|
assert reloaded is not initial
|
|
finally:
|
|
reset_app_config()
|
|
|
|
|
|
def test_get_app_config_reloads_when_config_path_changes(tmp_path, monkeypatch):
|
|
config_a = tmp_path / "config-a.yaml"
|
|
config_b = tmp_path / "config-b.yaml"
|
|
extensions_path = tmp_path / "extensions_config.json"
|
|
_write_extensions_config(extensions_path)
|
|
_write_config(config_a, model_name="model-a", supports_thinking=False)
|
|
_write_config(config_b, model_name="model-b", supports_thinking=True)
|
|
|
|
monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
|
|
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_a))
|
|
reset_app_config()
|
|
|
|
try:
|
|
first = get_app_config()
|
|
assert first.models[0].name == "model-a"
|
|
|
|
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_b))
|
|
second = get_app_config()
|
|
assert second.models[0].name == "model-b"
|
|
assert second is not first
|
|
finally:
|
|
reset_app_config()
|