Files
deerflow-factory/deer-flow/backend/tests/test_config_version.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

126 lines
4.6 KiB
Python

"""Tests for config version check and upgrade logic."""
from __future__ import annotations
import logging
import tempfile
from pathlib import Path
import yaml
from deerflow.config.app_config import AppConfig
def _make_config_files(tmpdir: Path, user_config: dict, example_config: dict) -> Path:
"""Write user config.yaml and config.example.yaml to a temp dir, return config path."""
config_path = tmpdir / "config.yaml"
example_path = tmpdir / "config.example.yaml"
# Minimal valid config needs sandbox
defaults = {
"sandbox": {"use": "deerflow.sandbox.local:LocalSandboxProvider"},
}
for cfg in (user_config, example_config):
for k, v in defaults.items():
cfg.setdefault(k, v)
with open(config_path, "w", encoding="utf-8") as f:
yaml.dump(user_config, f)
with open(example_path, "w", encoding="utf-8") as f:
yaml.dump(example_config, f)
return config_path
def test_missing_version_treated_as_zero(caplog):
"""Config without config_version should be treated as version 0."""
with tempfile.TemporaryDirectory() as tmpdir:
config_path = _make_config_files(
Path(tmpdir),
user_config={}, # no config_version
example_config={"config_version": 1},
)
with caplog.at_level(logging.WARNING, logger="deerflow.config.app_config"):
AppConfig._check_config_version(
{"sandbox": {"use": "deerflow.sandbox.local:LocalSandboxProvider"}},
config_path,
)
assert "outdated" in caplog.text
assert "version 0" in caplog.text
assert "version is 1" in caplog.text
def test_matching_version_no_warning(caplog):
"""Config with matching version should not emit a warning."""
with tempfile.TemporaryDirectory() as tmpdir:
config_path = _make_config_files(
Path(tmpdir),
user_config={"config_version": 1},
example_config={"config_version": 1},
)
with caplog.at_level(logging.WARNING, logger="deerflow.config.app_config"):
AppConfig._check_config_version(
{"config_version": 1},
config_path,
)
assert "outdated" not in caplog.text
def test_outdated_version_emits_warning(caplog):
"""Config with lower version should emit a warning."""
with tempfile.TemporaryDirectory() as tmpdir:
config_path = _make_config_files(
Path(tmpdir),
user_config={"config_version": 1},
example_config={"config_version": 2},
)
with caplog.at_level(logging.WARNING, logger="deerflow.config.app_config"):
AppConfig._check_config_version(
{"config_version": 1},
config_path,
)
assert "outdated" in caplog.text
assert "version 1" in caplog.text
assert "version is 2" in caplog.text
def test_no_example_file_no_warning(caplog):
"""If config.example.yaml doesn't exist, no warning should be emitted."""
with tempfile.TemporaryDirectory() as tmpdir:
config_path = Path(tmpdir) / "config.yaml"
with open(config_path, "w", encoding="utf-8") as f:
yaml.dump({"sandbox": {"use": "test"}}, f)
# No config.example.yaml created
with caplog.at_level(logging.WARNING, logger="deerflow.config.app_config"):
AppConfig._check_config_version({}, config_path)
assert "outdated" not in caplog.text
def test_string_config_version_does_not_raise_type_error(caplog):
"""config_version stored as a YAML string should not raise TypeError on comparison."""
with tempfile.TemporaryDirectory() as tmpdir:
config_path = _make_config_files(
Path(tmpdir),
user_config={"config_version": "1"}, # string, as YAML can produce
example_config={"config_version": 2},
)
# Must not raise TypeError: '<' not supported between instances of 'str' and 'int'
AppConfig._check_config_version({"config_version": "1"}, config_path)
def test_newer_user_version_no_warning(caplog):
"""If user has a newer version than example (edge case), no warning."""
with tempfile.TemporaryDirectory() as tmpdir:
config_path = _make_config_files(
Path(tmpdir),
user_config={"config_version": 3},
example_config={"config_version": 2},
)
with caplog.at_level(logging.WARNING, logger="deerflow.config.app_config"):
AppConfig._check_config_version(
{"config_version": 3},
config_path,
)
assert "outdated" not in caplog.text