Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
196 lines
7.7 KiB
Python
196 lines
7.7 KiB
Python
import asyncio
|
|
import stat
|
|
from io import BytesIO
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
from fastapi import UploadFile
|
|
|
|
from app.gateway.routers import uploads
|
|
|
|
|
|
def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_path):
|
|
thread_uploads_dir = tmp_path / "uploads"
|
|
thread_uploads_dir.mkdir(parents=True)
|
|
|
|
provider = MagicMock()
|
|
provider.acquire.return_value = "local"
|
|
sandbox = MagicMock()
|
|
provider.get.return_value = sandbox
|
|
|
|
with (
|
|
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
|
):
|
|
file = UploadFile(filename="notes.txt", file=BytesIO(b"hello uploads"))
|
|
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
|
|
|
assert result.success is True
|
|
assert len(result.files) == 1
|
|
assert result.files[0]["filename"] == "notes.txt"
|
|
assert (thread_uploads_dir / "notes.txt").read_bytes() == b"hello uploads"
|
|
|
|
sandbox.update_file.assert_not_called()
|
|
|
|
|
|
def test_upload_files_syncs_non_local_sandbox_and_marks_markdown_file(tmp_path):
|
|
thread_uploads_dir = tmp_path / "uploads"
|
|
thread_uploads_dir.mkdir(parents=True)
|
|
|
|
provider = MagicMock()
|
|
provider.acquire.return_value = "aio-1"
|
|
sandbox = MagicMock()
|
|
provider.get.return_value = sandbox
|
|
|
|
async def fake_convert(file_path: Path) -> Path:
|
|
md_path = file_path.with_suffix(".md")
|
|
md_path.write_text("converted", encoding="utf-8")
|
|
return md_path
|
|
|
|
with (
|
|
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
|
patch.object(uploads, "convert_file_to_markdown", AsyncMock(side_effect=fake_convert)),
|
|
):
|
|
file = UploadFile(filename="report.pdf", file=BytesIO(b"pdf-bytes"))
|
|
result = asyncio.run(uploads.upload_files("thread-aio", files=[file]))
|
|
|
|
assert result.success is True
|
|
assert len(result.files) == 1
|
|
file_info = result.files[0]
|
|
assert file_info["filename"] == "report.pdf"
|
|
assert file_info["markdown_file"] == "report.md"
|
|
|
|
assert (thread_uploads_dir / "report.pdf").read_bytes() == b"pdf-bytes"
|
|
assert (thread_uploads_dir / "report.md").read_text(encoding="utf-8") == "converted"
|
|
|
|
sandbox.update_file.assert_any_call("/mnt/user-data/uploads/report.pdf", b"pdf-bytes")
|
|
sandbox.update_file.assert_any_call("/mnt/user-data/uploads/report.md", b"converted")
|
|
|
|
|
|
def test_upload_files_makes_non_local_files_sandbox_writable(tmp_path):
|
|
thread_uploads_dir = tmp_path / "uploads"
|
|
thread_uploads_dir.mkdir(parents=True)
|
|
|
|
provider = MagicMock()
|
|
provider.acquire.return_value = "aio-1"
|
|
sandbox = MagicMock()
|
|
provider.get.return_value = sandbox
|
|
|
|
async def fake_convert(file_path: Path) -> Path:
|
|
md_path = file_path.with_suffix(".md")
|
|
md_path.write_text("converted", encoding="utf-8")
|
|
return md_path
|
|
|
|
with (
|
|
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
|
patch.object(uploads, "convert_file_to_markdown", AsyncMock(side_effect=fake_convert)),
|
|
patch.object(uploads, "_make_file_sandbox_writable") as make_writable,
|
|
):
|
|
file = UploadFile(filename="report.pdf", file=BytesIO(b"pdf-bytes"))
|
|
result = asyncio.run(uploads.upload_files("thread-aio", files=[file]))
|
|
|
|
assert result.success is True
|
|
make_writable.assert_any_call(thread_uploads_dir / "report.pdf")
|
|
make_writable.assert_any_call(thread_uploads_dir / "report.md")
|
|
|
|
|
|
def test_upload_files_does_not_adjust_permissions_for_local_sandbox(tmp_path):
|
|
thread_uploads_dir = tmp_path / "uploads"
|
|
thread_uploads_dir.mkdir(parents=True)
|
|
|
|
provider = MagicMock()
|
|
provider.acquire.return_value = "local"
|
|
sandbox = MagicMock()
|
|
provider.get.return_value = sandbox
|
|
|
|
with (
|
|
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
|
patch.object(uploads, "_make_file_sandbox_writable") as make_writable,
|
|
):
|
|
file = UploadFile(filename="notes.txt", file=BytesIO(b"hello uploads"))
|
|
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
|
|
|
assert result.success is True
|
|
make_writable.assert_not_called()
|
|
|
|
|
|
def test_make_file_sandbox_writable_adds_write_bits_for_regular_files(tmp_path):
|
|
file_path = tmp_path / "report.pdf"
|
|
file_path.write_bytes(b"pdf-bytes")
|
|
os_chmod_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
|
|
file_path.chmod(os_chmod_mode)
|
|
|
|
uploads._make_file_sandbox_writable(file_path)
|
|
|
|
updated_mode = stat.S_IMODE(file_path.stat().st_mode)
|
|
assert updated_mode & stat.S_IWUSR
|
|
assert updated_mode & stat.S_IWGRP
|
|
assert updated_mode & stat.S_IWOTH
|
|
|
|
|
|
def test_make_file_sandbox_writable_skips_symlinks(tmp_path):
|
|
file_path = tmp_path / "target-link.txt"
|
|
file_path.write_text("hello", encoding="utf-8")
|
|
symlink_stat = MagicMock(st_mode=stat.S_IFLNK)
|
|
|
|
with (
|
|
patch.object(uploads.os, "lstat", return_value=symlink_stat),
|
|
patch.object(uploads.os, "chmod") as chmod,
|
|
):
|
|
uploads._make_file_sandbox_writable(file_path)
|
|
|
|
chmod.assert_not_called()
|
|
|
|
|
|
def test_upload_files_rejects_dotdot_and_dot_filenames(tmp_path):
|
|
thread_uploads_dir = tmp_path / "uploads"
|
|
thread_uploads_dir.mkdir(parents=True)
|
|
|
|
provider = MagicMock()
|
|
provider.acquire.return_value = "local"
|
|
sandbox = MagicMock()
|
|
provider.get.return_value = sandbox
|
|
|
|
with (
|
|
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
|
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
|
):
|
|
# These filenames must be rejected outright
|
|
for bad_name in ["..", "."]:
|
|
file = UploadFile(filename=bad_name, file=BytesIO(b"data"))
|
|
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
|
assert result.success is True
|
|
assert result.files == [], f"Expected no files for unsafe filename {bad_name!r}"
|
|
|
|
# Path-traversal prefixes are stripped to the basename and accepted safely
|
|
file = UploadFile(filename="../etc/passwd", file=BytesIO(b"data"))
|
|
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
|
assert result.success is True
|
|
assert len(result.files) == 1
|
|
assert result.files[0]["filename"] == "passwd"
|
|
|
|
# Only the safely normalised file should exist
|
|
assert [f.name for f in thread_uploads_dir.iterdir()] == ["passwd"]
|
|
|
|
|
|
def test_delete_uploaded_file_removes_generated_markdown_companion(tmp_path):
|
|
thread_uploads_dir = tmp_path / "uploads"
|
|
thread_uploads_dir.mkdir(parents=True)
|
|
(thread_uploads_dir / "report.pdf").write_bytes(b"pdf-bytes")
|
|
(thread_uploads_dir / "report.md").write_text("converted", encoding="utf-8")
|
|
|
|
with patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir):
|
|
result = asyncio.run(uploads.delete_uploaded_file("thread-aio", "report.pdf"))
|
|
|
|
assert result == {"success": True, "message": "Deleted report.pdf"}
|
|
assert not (thread_uploads_dir / "report.pdf").exists()
|
|
assert not (thread_uploads_dir / "report.md").exists()
|