Files
deerflow-factory/deer-flow/backend/tests/test_app_config_reload.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

82 lines
2.7 KiB
Python

from __future__ import annotations
import json
import os
from pathlib import Path
import yaml
from deerflow.config.app_config import get_app_config, reset_app_config
def _write_config(path: Path, *, model_name: str, supports_thinking: bool) -> None:
path.write_text(
yaml.safe_dump(
{
"sandbox": {"use": "deerflow.sandbox.local:LocalSandboxProvider"},
"models": [
{
"name": model_name,
"use": "langchain_openai:ChatOpenAI",
"model": "gpt-test",
"supports_thinking": supports_thinking,
}
],
}
),
encoding="utf-8",
)
def _write_extensions_config(path: Path) -> None:
path.write_text(json.dumps({"mcpServers": {}, "skills": {}}), encoding="utf-8")
def test_get_app_config_reloads_when_file_changes(tmp_path, monkeypatch):
config_path = tmp_path / "config.yaml"
extensions_path = tmp_path / "extensions_config.json"
_write_extensions_config(extensions_path)
_write_config(config_path, model_name="first-model", supports_thinking=False)
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_path))
monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
reset_app_config()
try:
initial = get_app_config()
assert initial.models[0].supports_thinking is False
_write_config(config_path, model_name="first-model", supports_thinking=True)
next_mtime = config_path.stat().st_mtime + 5
os.utime(config_path, (next_mtime, next_mtime))
reloaded = get_app_config()
assert reloaded.models[0].supports_thinking is True
assert reloaded is not initial
finally:
reset_app_config()
def test_get_app_config_reloads_when_config_path_changes(tmp_path, monkeypatch):
config_a = tmp_path / "config-a.yaml"
config_b = tmp_path / "config-b.yaml"
extensions_path = tmp_path / "extensions_config.json"
_write_extensions_config(extensions_path)
_write_config(config_a, model_name="model-a", supports_thinking=False)
_write_config(config_b, model_name="model-b", supports_thinking=True)
monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_a))
reset_app_config()
try:
first = get_app_config()
assert first.models[0].name == "model-a"
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_b))
second = get_app_config()
assert second.models[0].name == "model-b"
assert second is not first
finally:
reset_app_config()