Files
deerflow-factory/deer-flow/backend/tests/test_artifacts_router.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

105 lines
4.4 KiB
Python

import asyncio
import zipfile
from pathlib import Path
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from starlette.requests import Request
from starlette.responses import FileResponse
import app.gateway.routers.artifacts as artifacts_router
ACTIVE_ARTIFACT_CASES = [
("poc.html", "<html><body><script>alert('xss')</script></body></html>"),
("page.xhtml", '<?xml version="1.0"?><html xmlns="http://www.w3.org/1999/xhtml"><body>hello</body></html>'),
("image.svg", '<svg xmlns="http://www.w3.org/2000/svg"><script>alert("xss")</script></svg>'),
]
def _make_request(query_string: bytes = b"") -> Request:
return Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": query_string})
def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
text = "Curly quotes: \u201cutf8\u201d"
artifact_path.write_text(text, encoding="utf-8")
original_read_text = Path.read_text
def read_text_with_gbk_default(self, *args, **kwargs):
kwargs.setdefault("encoding", "gbk")
return original_read_text(self, *args, **kwargs)
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
request = _make_request()
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
assert bytes(response.body).decode("utf-8") == text
assert response.media_type == "text/plain"
@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES)
def test_get_artifact_forces_download_for_active_content(tmp_path, monkeypatch, filename: str, content: str) -> None:
artifact_path = tmp_path / filename
artifact_path.write_text(content, encoding="utf-8")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/{filename}", _make_request()))
assert isinstance(response, FileResponse)
assert response.headers.get("content-disposition", "").startswith("attachment;")
@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES)
def test_get_artifact_forces_download_for_active_content_in_skill_archive(tmp_path, monkeypatch, filename: str, content: str) -> None:
skill_path = tmp_path / "sample.skill"
with zipfile.ZipFile(skill_path, "w") as zip_ref:
zip_ref.writestr(filename, content)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/sample.skill/{filename}", _make_request()))
assert response.headers.get("content-disposition", "").startswith("attachment;")
assert bytes(response.body) == content.encode("utf-8")
def test_get_artifact_download_false_does_not_force_attachment(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
artifact_path.write_text("hello", encoding="utf-8")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/note.txt?download=false")
assert response.status_code == 200
assert response.text == "hello"
assert "content-disposition" not in response.headers
def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path, monkeypatch) -> None:
skill_path = tmp_path / "sample.skill"
with zipfile.ZipFile(skill_path, "w") as zip_ref:
zip_ref.writestr("notes.txt", "hello")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/sample.skill/notes.txt?download=true")
assert response.status_code == 200
assert response.text == "hello"
assert response.headers.get("content-disposition", "").startswith("attachment;")