Files
deerflow-factory/deer-flow/backend/tests/test_tracing_factory.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

174 lines
5.5 KiB
Python

"""Tests for deerflow.tracing.factory."""
from __future__ import annotations
import sys
import types
import pytest
from deerflow.tracing import factory as tracing_factory
@pytest.fixture(autouse=True)
def clear_tracing_env(monkeypatch):
from deerflow.config import tracing_config as tracing_module
for name in (
"LANGSMITH_TRACING",
"LANGCHAIN_TRACING_V2",
"LANGCHAIN_TRACING",
"LANGSMITH_API_KEY",
"LANGCHAIN_API_KEY",
"LANGSMITH_PROJECT",
"LANGCHAIN_PROJECT",
"LANGSMITH_ENDPOINT",
"LANGCHAIN_ENDPOINT",
"LANGFUSE_TRACING",
"LANGFUSE_PUBLIC_KEY",
"LANGFUSE_SECRET_KEY",
"LANGFUSE_BASE_URL",
):
monkeypatch.delenv(name, raising=False)
tracing_module._tracing_config = None
yield
tracing_module._tracing_config = None
def test_build_tracing_callbacks_returns_empty_list_when_disabled(monkeypatch):
monkeypatch.setattr(tracing_factory, "validate_enabled_tracing_providers", lambda: None)
monkeypatch.setattr(tracing_factory, "get_enabled_tracing_providers", lambda: [])
callbacks = tracing_factory.build_tracing_callbacks()
assert callbacks == []
def test_build_tracing_callbacks_creates_langsmith_and_langfuse(monkeypatch):
class FakeLangSmithTracer:
def __init__(self, *, project_name: str):
self.project_name = project_name
class FakeLangfuseHandler:
def __init__(self, *, public_key: str):
self.public_key = public_key
monkeypatch.setattr(tracing_factory, "get_enabled_tracing_providers", lambda: ["langsmith", "langfuse"])
monkeypatch.setattr(tracing_factory, "validate_enabled_tracing_providers", lambda: None)
monkeypatch.setattr(
tracing_factory,
"get_tracing_config",
lambda: type(
"Cfg",
(),
{
"langsmith": type("LangSmithCfg", (), {"project": "smith-project"})(),
"langfuse": type(
"LangfuseCfg",
(),
{
"secret_key": "sk-lf-test",
"public_key": "pk-lf-test",
"host": "https://langfuse.example.com",
},
)(),
},
)(),
)
monkeypatch.setattr(tracing_factory, "_create_langsmith_tracer", lambda cfg: FakeLangSmithTracer(project_name=cfg.project))
monkeypatch.setattr(
tracing_factory,
"_create_langfuse_handler",
lambda cfg: FakeLangfuseHandler(public_key=cfg.public_key),
)
callbacks = tracing_factory.build_tracing_callbacks()
assert len(callbacks) == 2
assert callbacks[0].project_name == "smith-project"
assert callbacks[1].public_key == "pk-lf-test"
def test_build_tracing_callbacks_raises_when_enabled_provider_fails(monkeypatch):
monkeypatch.setattr(tracing_factory, "get_enabled_tracing_providers", lambda: ["langfuse"])
monkeypatch.setattr(tracing_factory, "validate_enabled_tracing_providers", lambda: None)
monkeypatch.setattr(
tracing_factory,
"get_tracing_config",
lambda: type(
"Cfg",
(),
{
"langfuse": type(
"LangfuseCfg",
(),
{"secret_key": "sk-lf-test", "public_key": "pk-lf-test", "host": "https://langfuse.example.com"},
)(),
},
)(),
)
monkeypatch.setattr(tracing_factory, "_create_langfuse_handler", lambda cfg: (_ for _ in ()).throw(RuntimeError("boom")))
with pytest.raises(RuntimeError, match="Langfuse tracing initialization failed"):
tracing_factory.build_tracing_callbacks()
def test_build_tracing_callbacks_raises_for_explicitly_enabled_misconfigured_provider(monkeypatch):
from deerflow.config import tracing_config as tracing_module
monkeypatch.setenv("LANGFUSE_TRACING", "true")
monkeypatch.delenv("LANGFUSE_PUBLIC_KEY", raising=False)
monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-test")
tracing_module._tracing_config = None
with pytest.raises(ValueError, match="LANGFUSE_PUBLIC_KEY"):
tracing_factory.build_tracing_callbacks()
def test_create_langfuse_handler_initializes_client_before_handler(monkeypatch):
calls: list[tuple[str, dict]] = []
class FakeLangfuse:
def __init__(self, **kwargs):
calls.append(("client", kwargs))
class FakeCallbackHandler:
def __init__(self, **kwargs):
calls.append(("handler", kwargs))
fake_langfuse_module = types.ModuleType("langfuse")
fake_langfuse_module.Langfuse = FakeLangfuse
fake_langfuse_langchain_module = types.ModuleType("langfuse.langchain")
fake_langfuse_langchain_module.CallbackHandler = FakeCallbackHandler
monkeypatch.setitem(sys.modules, "langfuse", fake_langfuse_module)
monkeypatch.setitem(sys.modules, "langfuse.langchain", fake_langfuse_langchain_module)
cfg = type(
"LangfuseCfg",
(),
{
"secret_key": "sk-lf-test",
"public_key": "pk-lf-test",
"host": "https://langfuse.example.com",
},
)()
tracing_factory._create_langfuse_handler(cfg)
assert calls == [
(
"client",
{
"secret_key": "sk-lf-test",
"public_key": "pk-lf-test",
"host": "https://langfuse.example.com",
},
),
(
"handler",
{
"public_key": "pk-lf-test",
},
),
]