Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
106
deer-flow/backend/tests/test_docker_sandbox_mode_detection.py
Normal file
106
deer-flow/backend/tests/test_docker_sandbox_mode_detection.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Regression tests for docker sandbox mode detection logic."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from shutil import which
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SCRIPT_PATH = REPO_ROOT / "scripts" / "docker.sh"
|
||||
BASH_CANDIDATES = [
|
||||
Path(r"C:\Program Files\Git\bin\bash.exe"),
|
||||
Path(which("bash")) if which("bash") else None,
|
||||
]
|
||||
BASH_EXECUTABLE = next(
|
||||
(str(path) for path in BASH_CANDIDATES if path is not None and path.exists() and "WindowsApps" not in str(path)),
|
||||
None,
|
||||
)
|
||||
|
||||
if BASH_EXECUTABLE is None:
|
||||
pytestmark = pytest.mark.skip(reason="bash is required for docker.sh detection tests")
|
||||
|
||||
|
||||
def _detect_mode_with_config(config_content: str) -> str:
|
||||
"""Write config content into a temp project root and execute detect_sandbox_mode."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp_root = Path(tmpdir)
|
||||
(tmp_root / "config.yaml").write_text(config_content, encoding="utf-8")
|
||||
|
||||
command = f"source '{SCRIPT_PATH}' && PROJECT_ROOT='{tmp_root}' && detect_sandbox_mode"
|
||||
|
||||
output = subprocess.check_output(
|
||||
[BASH_EXECUTABLE, "-lc", command],
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
).strip()
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def test_detect_mode_defaults_to_local_when_config_missing():
|
||||
"""No config file should default to local mode."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
command = f"source '{SCRIPT_PATH}' && PROJECT_ROOT='{tmpdir}' && detect_sandbox_mode"
|
||||
output = subprocess.check_output(
|
||||
[BASH_EXECUTABLE, "-lc", command],
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
).strip()
|
||||
|
||||
assert output == "local"
|
||||
|
||||
|
||||
def test_detect_mode_local_provider():
|
||||
"""Local sandbox provider should map to local mode."""
|
||||
config = """
|
||||
sandbox:
|
||||
use: deerflow.sandbox.local:LocalSandboxProvider
|
||||
""".strip()
|
||||
|
||||
assert _detect_mode_with_config(config) == "local"
|
||||
|
||||
|
||||
def test_detect_mode_aio_without_provisioner_url():
|
||||
"""AIO sandbox without provisioner_url should map to aio mode."""
|
||||
config = """
|
||||
sandbox:
|
||||
use: deerflow.community.aio_sandbox:AioSandboxProvider
|
||||
""".strip()
|
||||
|
||||
assert _detect_mode_with_config(config) == "aio"
|
||||
|
||||
|
||||
def test_detect_mode_provisioner_with_url():
|
||||
"""AIO sandbox with provisioner_url should map to provisioner mode."""
|
||||
config = """
|
||||
sandbox:
|
||||
use: deerflow.community.aio_sandbox:AioSandboxProvider
|
||||
provisioner_url: http://provisioner:8002
|
||||
""".strip()
|
||||
|
||||
assert _detect_mode_with_config(config) == "provisioner"
|
||||
|
||||
|
||||
def test_detect_mode_ignores_commented_provisioner_url():
|
||||
"""Commented provisioner_url should not activate provisioner mode."""
|
||||
config = """
|
||||
sandbox:
|
||||
use: deerflow.community.aio_sandbox:AioSandboxProvider
|
||||
# provisioner_url: http://provisioner:8002
|
||||
""".strip()
|
||||
|
||||
assert _detect_mode_with_config(config) == "aio"
|
||||
|
||||
|
||||
def test_detect_mode_unknown_provider_falls_back_to_local():
|
||||
"""Unknown sandbox provider should default to local mode."""
|
||||
config = """
|
||||
sandbox:
|
||||
use: custom.module:UnknownProvider
|
||||
""".strip()
|
||||
|
||||
assert _detect_mode_with_config(config) == "local"
|
||||
Reference in New Issue
Block a user