Files
deerflow-factory/deer-flow/backend/tests/test_local_sandbox_encoding.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

165 lines
5.7 KiB
Python

import builtins
from types import SimpleNamespace
import deerflow.sandbox.local.local_sandbox as local_sandbox
from deerflow.sandbox.local.local_sandbox import LocalSandbox
def _open(base, file, mode="r", *args, **kwargs):
if "b" in mode:
return base(file, mode, *args, **kwargs)
return base(file, mode, *args, encoding=kwargs.pop("encoding", "gbk"), **kwargs)
def test_read_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
path = tmp_path / "utf8.txt"
text = "\u201cutf8\u201d"
path.write_text(text, encoding="utf-8")
base = builtins.open
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
assert LocalSandbox("t").read_file(str(path)) == text
def test_write_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
path = tmp_path / "utf8.txt"
text = "emoji \U0001f600"
base = builtins.open
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
LocalSandbox("t").write_file(str(path), text)
assert path.read_text(encoding="utf-8") == text
def test_get_shell_prefers_posix_shell_from_path_before_windows_fallback(monkeypatch):
monkeypatch.setattr(local_sandbox.os, "name", "nt")
monkeypatch.setattr(LocalSandbox, "_find_first_available_shell", lambda candidates: r"C:\Program Files\Git\bin\sh.exe" if candidates == ("/bin/zsh", "/bin/bash", "/bin/sh", "sh") else None)
assert LocalSandbox._get_shell() == r"C:\Program Files\Git\bin\sh.exe"
def test_get_shell_uses_powershell_fallback_on_windows(monkeypatch):
calls: list[tuple[str, ...]] = []
def fake_find(candidates: tuple[str, ...]) -> str | None:
calls.append(candidates)
if candidates == ("/bin/zsh", "/bin/bash", "/bin/sh", "sh"):
return None
return r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe"
monkeypatch.setattr(local_sandbox.os, "name", "nt")
monkeypatch.setattr(local_sandbox.os, "environ", {"SystemRoot": r"C:\Windows"})
monkeypatch.setattr(LocalSandbox, "_find_first_available_shell", fake_find)
assert LocalSandbox._get_shell() == r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe"
assert calls[1] == (
"pwsh",
"pwsh.exe",
"powershell",
"powershell.exe",
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
"cmd.exe",
)
def test_get_shell_uses_cmd_as_last_windows_fallback(monkeypatch):
def fake_find(candidates: tuple[str, ...]) -> str | None:
if candidates == ("/bin/zsh", "/bin/bash", "/bin/sh", "sh"):
return None
return r"C:\Windows\System32\cmd.exe"
monkeypatch.setattr(local_sandbox.os, "name", "nt")
monkeypatch.setattr(local_sandbox.os, "environ", {"SystemRoot": r"C:\Windows"})
monkeypatch.setattr(LocalSandbox, "_find_first_available_shell", fake_find)
assert LocalSandbox._get_shell() == r"C:\Windows\System32\cmd.exe"
def test_execute_command_uses_powershell_command_mode_on_windows(monkeypatch):
calls: list[tuple[object, dict]] = []
def fake_run(*args, **kwargs):
calls.append((args[0], kwargs))
return SimpleNamespace(stdout="ok", stderr="", returncode=0)
monkeypatch.setattr(local_sandbox.os, "name", "nt")
monkeypatch.setattr(LocalSandbox, "_get_shell", staticmethod(lambda: r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe"))
monkeypatch.setattr(local_sandbox.subprocess, "run", fake_run)
output = LocalSandbox("t").execute_command("Write-Output hello")
assert output == "ok"
assert calls == [
(
[
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
"-NoProfile",
"-Command",
"Write-Output hello",
],
{
"shell": False,
"capture_output": True,
"text": True,
"timeout": 600,
},
)
]
def test_execute_command_uses_posix_shell_command_mode_on_windows(monkeypatch):
calls: list[tuple[object, dict]] = []
def fake_run(*args, **kwargs):
calls.append((args[0], kwargs))
return SimpleNamespace(stdout="ok", stderr="", returncode=0)
monkeypatch.setattr(local_sandbox.os, "name", "nt")
monkeypatch.setattr(LocalSandbox, "_get_shell", staticmethod(lambda: r"C:\Program Files\Git\bin\sh.exe"))
monkeypatch.setattr(local_sandbox.subprocess, "run", fake_run)
output = LocalSandbox("t").execute_command("echo hello")
assert output == "ok"
assert calls == [
(
[r"C:\Program Files\Git\bin\sh.exe", "-c", "echo hello"],
{
"shell": False,
"capture_output": True,
"text": True,
"timeout": 600,
},
)
]
def test_execute_command_uses_cmd_command_mode_on_windows(monkeypatch):
calls: list[tuple[object, dict]] = []
def fake_run(*args, **kwargs):
calls.append((args[0], kwargs))
return SimpleNamespace(stdout="ok", stderr="", returncode=0)
monkeypatch.setattr(local_sandbox.os, "name", "nt")
monkeypatch.setattr(LocalSandbox, "_get_shell", staticmethod(lambda: r"C:\Windows\System32\cmd.exe"))
monkeypatch.setattr(local_sandbox.subprocess, "run", fake_run)
output = LocalSandbox("t").execute_command("echo hello")
assert output == "ok"
assert calls == [
(
[r"C:\Windows\System32\cmd.exe", "/c", "echo hello"],
{
"shell": False,
"capture_output": True,
"text": True,
"timeout": 600,
},
)
]