Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
107 lines
3.1 KiB
Python
107 lines
3.1 KiB
Python
"""Regression tests for docker sandbox mode detection logic."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
import tempfile
|
|
from pathlib import Path
|
|
from shutil import which
|
|
|
|
import pytest
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
SCRIPT_PATH = REPO_ROOT / "scripts" / "docker.sh"
|
|
BASH_CANDIDATES = [
|
|
Path(r"C:\Program Files\Git\bin\bash.exe"),
|
|
Path(which("bash")) if which("bash") else None,
|
|
]
|
|
BASH_EXECUTABLE = next(
|
|
(str(path) for path in BASH_CANDIDATES if path is not None and path.exists() and "WindowsApps" not in str(path)),
|
|
None,
|
|
)
|
|
|
|
if BASH_EXECUTABLE is None:
|
|
pytestmark = pytest.mark.skip(reason="bash is required for docker.sh detection tests")
|
|
|
|
|
|
def _detect_mode_with_config(config_content: str) -> str:
|
|
"""Write config content into a temp project root and execute detect_sandbox_mode."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
tmp_root = Path(tmpdir)
|
|
(tmp_root / "config.yaml").write_text(config_content, encoding="utf-8")
|
|
|
|
command = f"source '{SCRIPT_PATH}' && PROJECT_ROOT='{tmp_root}' && detect_sandbox_mode"
|
|
|
|
output = subprocess.check_output(
|
|
[BASH_EXECUTABLE, "-lc", command],
|
|
text=True,
|
|
encoding="utf-8",
|
|
).strip()
|
|
|
|
return output
|
|
|
|
|
|
def test_detect_mode_defaults_to_local_when_config_missing():
|
|
"""No config file should default to local mode."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
command = f"source '{SCRIPT_PATH}' && PROJECT_ROOT='{tmpdir}' && detect_sandbox_mode"
|
|
output = subprocess.check_output(
|
|
[BASH_EXECUTABLE, "-lc", command],
|
|
text=True,
|
|
encoding="utf-8",
|
|
).strip()
|
|
|
|
assert output == "local"
|
|
|
|
|
|
def test_detect_mode_local_provider():
|
|
"""Local sandbox provider should map to local mode."""
|
|
config = """
|
|
sandbox:
|
|
use: deerflow.sandbox.local:LocalSandboxProvider
|
|
""".strip()
|
|
|
|
assert _detect_mode_with_config(config) == "local"
|
|
|
|
|
|
def test_detect_mode_aio_without_provisioner_url():
|
|
"""AIO sandbox without provisioner_url should map to aio mode."""
|
|
config = """
|
|
sandbox:
|
|
use: deerflow.community.aio_sandbox:AioSandboxProvider
|
|
""".strip()
|
|
|
|
assert _detect_mode_with_config(config) == "aio"
|
|
|
|
|
|
def test_detect_mode_provisioner_with_url():
|
|
"""AIO sandbox with provisioner_url should map to provisioner mode."""
|
|
config = """
|
|
sandbox:
|
|
use: deerflow.community.aio_sandbox:AioSandboxProvider
|
|
provisioner_url: http://provisioner:8002
|
|
""".strip()
|
|
|
|
assert _detect_mode_with_config(config) == "provisioner"
|
|
|
|
|
|
def test_detect_mode_ignores_commented_provisioner_url():
|
|
"""Commented provisioner_url should not activate provisioner mode."""
|
|
config = """
|
|
sandbox:
|
|
use: deerflow.community.aio_sandbox:AioSandboxProvider
|
|
# provisioner_url: http://provisioner:8002
|
|
""".strip()
|
|
|
|
assert _detect_mode_with_config(config) == "aio"
|
|
|
|
|
|
def test_detect_mode_unknown_provider_falls_back_to_local():
|
|
"""Unknown sandbox provider should default to local mode."""
|
|
config = """
|
|
sandbox:
|
|
use: custom.module:UnknownProvider
|
|
""".strip()
|
|
|
|
assert _detect_mode_with_config(config) == "local"
|