Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
165 lines
5.7 KiB
Python
165 lines
5.7 KiB
Python
import builtins
|
|
from types import SimpleNamespace
|
|
|
|
import deerflow.sandbox.local.local_sandbox as local_sandbox
|
|
from deerflow.sandbox.local.local_sandbox import LocalSandbox
|
|
|
|
|
|
def _open(base, file, mode="r", *args, **kwargs):
|
|
if "b" in mode:
|
|
return base(file, mode, *args, **kwargs)
|
|
return base(file, mode, *args, encoding=kwargs.pop("encoding", "gbk"), **kwargs)
|
|
|
|
|
|
def test_read_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
|
|
path = tmp_path / "utf8.txt"
|
|
text = "\u201cutf8\u201d"
|
|
path.write_text(text, encoding="utf-8")
|
|
base = builtins.open
|
|
|
|
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
|
|
|
|
assert LocalSandbox("t").read_file(str(path)) == text
|
|
|
|
|
|
def test_write_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
|
|
path = tmp_path / "utf8.txt"
|
|
text = "emoji \U0001f600"
|
|
base = builtins.open
|
|
|
|
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
|
|
|
|
LocalSandbox("t").write_file(str(path), text)
|
|
|
|
assert path.read_text(encoding="utf-8") == text
|
|
|
|
|
|
def test_get_shell_prefers_posix_shell_from_path_before_windows_fallback(monkeypatch):
|
|
monkeypatch.setattr(local_sandbox.os, "name", "nt")
|
|
monkeypatch.setattr(LocalSandbox, "_find_first_available_shell", lambda candidates: r"C:\Program Files\Git\bin\sh.exe" if candidates == ("/bin/zsh", "/bin/bash", "/bin/sh", "sh") else None)
|
|
|
|
assert LocalSandbox._get_shell() == r"C:\Program Files\Git\bin\sh.exe"
|
|
|
|
|
|
def test_get_shell_uses_powershell_fallback_on_windows(monkeypatch):
|
|
calls: list[tuple[str, ...]] = []
|
|
|
|
def fake_find(candidates: tuple[str, ...]) -> str | None:
|
|
calls.append(candidates)
|
|
if candidates == ("/bin/zsh", "/bin/bash", "/bin/sh", "sh"):
|
|
return None
|
|
return r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe"
|
|
|
|
monkeypatch.setattr(local_sandbox.os, "name", "nt")
|
|
monkeypatch.setattr(local_sandbox.os, "environ", {"SystemRoot": r"C:\Windows"})
|
|
monkeypatch.setattr(LocalSandbox, "_find_first_available_shell", fake_find)
|
|
|
|
assert LocalSandbox._get_shell() == r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe"
|
|
assert calls[1] == (
|
|
"pwsh",
|
|
"pwsh.exe",
|
|
"powershell",
|
|
"powershell.exe",
|
|
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
|
|
"cmd.exe",
|
|
)
|
|
|
|
|
|
def test_get_shell_uses_cmd_as_last_windows_fallback(monkeypatch):
|
|
def fake_find(candidates: tuple[str, ...]) -> str | None:
|
|
if candidates == ("/bin/zsh", "/bin/bash", "/bin/sh", "sh"):
|
|
return None
|
|
return r"C:\Windows\System32\cmd.exe"
|
|
|
|
monkeypatch.setattr(local_sandbox.os, "name", "nt")
|
|
monkeypatch.setattr(local_sandbox.os, "environ", {"SystemRoot": r"C:\Windows"})
|
|
monkeypatch.setattr(LocalSandbox, "_find_first_available_shell", fake_find)
|
|
|
|
assert LocalSandbox._get_shell() == r"C:\Windows\System32\cmd.exe"
|
|
|
|
|
|
def test_execute_command_uses_powershell_command_mode_on_windows(monkeypatch):
|
|
calls: list[tuple[object, dict]] = []
|
|
|
|
def fake_run(*args, **kwargs):
|
|
calls.append((args[0], kwargs))
|
|
return SimpleNamespace(stdout="ok", stderr="", returncode=0)
|
|
|
|
monkeypatch.setattr(local_sandbox.os, "name", "nt")
|
|
monkeypatch.setattr(LocalSandbox, "_get_shell", staticmethod(lambda: r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe"))
|
|
monkeypatch.setattr(local_sandbox.subprocess, "run", fake_run)
|
|
|
|
output = LocalSandbox("t").execute_command("Write-Output hello")
|
|
|
|
assert output == "ok"
|
|
assert calls == [
|
|
(
|
|
[
|
|
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
|
|
"-NoProfile",
|
|
"-Command",
|
|
"Write-Output hello",
|
|
],
|
|
{
|
|
"shell": False,
|
|
"capture_output": True,
|
|
"text": True,
|
|
"timeout": 600,
|
|
},
|
|
)
|
|
]
|
|
|
|
|
|
def test_execute_command_uses_posix_shell_command_mode_on_windows(monkeypatch):
|
|
calls: list[tuple[object, dict]] = []
|
|
|
|
def fake_run(*args, **kwargs):
|
|
calls.append((args[0], kwargs))
|
|
return SimpleNamespace(stdout="ok", stderr="", returncode=0)
|
|
|
|
monkeypatch.setattr(local_sandbox.os, "name", "nt")
|
|
monkeypatch.setattr(LocalSandbox, "_get_shell", staticmethod(lambda: r"C:\Program Files\Git\bin\sh.exe"))
|
|
monkeypatch.setattr(local_sandbox.subprocess, "run", fake_run)
|
|
|
|
output = LocalSandbox("t").execute_command("echo hello")
|
|
|
|
assert output == "ok"
|
|
assert calls == [
|
|
(
|
|
[r"C:\Program Files\Git\bin\sh.exe", "-c", "echo hello"],
|
|
{
|
|
"shell": False,
|
|
"capture_output": True,
|
|
"text": True,
|
|
"timeout": 600,
|
|
},
|
|
)
|
|
]
|
|
|
|
|
|
def test_execute_command_uses_cmd_command_mode_on_windows(monkeypatch):
|
|
calls: list[tuple[object, dict]] = []
|
|
|
|
def fake_run(*args, **kwargs):
|
|
calls.append((args[0], kwargs))
|
|
return SimpleNamespace(stdout="ok", stderr="", returncode=0)
|
|
|
|
monkeypatch.setattr(local_sandbox.os, "name", "nt")
|
|
monkeypatch.setattr(LocalSandbox, "_get_shell", staticmethod(lambda: r"C:\Windows\System32\cmd.exe"))
|
|
monkeypatch.setattr(local_sandbox.subprocess, "run", fake_run)
|
|
|
|
output = LocalSandbox("t").execute_command("echo hello")
|
|
|
|
assert output == "ok"
|
|
assert calls == [
|
|
(
|
|
[r"C:\Windows\System32\cmd.exe", "/c", "echo hello"],
|
|
{
|
|
"shell": False,
|
|
"capture_output": True,
|
|
"text": True,
|
|
"timeout": 600,
|
|
},
|
|
)
|
|
]
|