Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions

View File

@@ -0,0 +1,109 @@
from unittest.mock import patch
import pytest
from fastapi import FastAPI, HTTPException
from fastapi.testclient import TestClient
from app.gateway.routers import threads
from deerflow.config.paths import Paths
def test_delete_thread_data_removes_thread_directory(tmp_path):
paths = Paths(tmp_path)
thread_dir = paths.thread_dir("thread-cleanup")
workspace = paths.sandbox_work_dir("thread-cleanup")
uploads = paths.sandbox_uploads_dir("thread-cleanup")
outputs = paths.sandbox_outputs_dir("thread-cleanup")
for directory in [workspace, uploads, outputs]:
directory.mkdir(parents=True, exist_ok=True)
(workspace / "notes.txt").write_text("hello", encoding="utf-8")
(uploads / "report.pdf").write_bytes(b"pdf")
(outputs / "result.json").write_text("{}", encoding="utf-8")
assert thread_dir.exists()
response = threads._delete_thread_data("thread-cleanup", paths=paths)
assert response.success is True
assert not thread_dir.exists()
def test_delete_thread_data_is_idempotent_for_missing_directory(tmp_path):
paths = Paths(tmp_path)
response = threads._delete_thread_data("missing-thread", paths=paths)
assert response.success is True
assert not paths.thread_dir("missing-thread").exists()
def test_delete_thread_data_rejects_invalid_thread_id(tmp_path):
paths = Paths(tmp_path)
with pytest.raises(HTTPException) as exc_info:
threads._delete_thread_data("../escape", paths=paths)
assert exc_info.value.status_code == 422
assert "Invalid thread_id" in exc_info.value.detail
def test_delete_thread_route_cleans_thread_directory(tmp_path):
paths = Paths(tmp_path)
thread_dir = paths.thread_dir("thread-route")
paths.sandbox_work_dir("thread-route").mkdir(parents=True, exist_ok=True)
(paths.sandbox_work_dir("thread-route") / "notes.txt").write_text("hello", encoding="utf-8")
app = FastAPI()
app.include_router(threads.router)
with patch("app.gateway.routers.threads.get_paths", return_value=paths):
with TestClient(app) as client:
response = client.delete("/api/threads/thread-route")
assert response.status_code == 200
assert response.json() == {"success": True, "message": "Deleted local thread data for thread-route"}
assert not thread_dir.exists()
def test_delete_thread_route_rejects_invalid_thread_id(tmp_path):
paths = Paths(tmp_path)
app = FastAPI()
app.include_router(threads.router)
with patch("app.gateway.routers.threads.get_paths", return_value=paths):
with TestClient(app) as client:
response = client.delete("/api/threads/../escape")
assert response.status_code == 404
def test_delete_thread_route_returns_422_for_route_safe_invalid_id(tmp_path):
paths = Paths(tmp_path)
app = FastAPI()
app.include_router(threads.router)
with patch("app.gateway.routers.threads.get_paths", return_value=paths):
with TestClient(app) as client:
response = client.delete("/api/threads/thread.with.dot")
assert response.status_code == 422
assert "Invalid thread_id" in response.json()["detail"]
def test_delete_thread_data_returns_generic_500_error(tmp_path):
paths = Paths(tmp_path)
with (
patch.object(paths, "delete_thread_dir", side_effect=OSError("/secret/path")),
patch.object(threads.logger, "exception") as log_exception,
):
with pytest.raises(HTTPException) as exc_info:
threads._delete_thread_data("thread-cleanup", paths=paths)
assert exc_info.value.status_code == 500
assert exc_info.value.detail == "Failed to delete local thread data."
assert "/secret/path" not in exc_info.value.detail
log_exception.assert_called_once_with("Failed to delete thread data for %s", "thread-cleanup")