Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
480
deer-flow/backend/tests/test_local_sandbox_provider_mounts.py
Normal file
480
deer-flow/backend/tests/test_local_sandbox_provider_mounts.py
Normal file
@@ -0,0 +1,480 @@
|
||||
import errno
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping
|
||||
from deerflow.sandbox.local.local_sandbox_provider import LocalSandboxProvider
|
||||
|
||||
|
||||
class TestPathMapping:
|
||||
def test_path_mapping_dataclass(self):
|
||||
mapping = PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True)
|
||||
assert mapping.container_path == "/mnt/skills"
|
||||
assert mapping.local_path == "/home/user/skills"
|
||||
assert mapping.read_only is True
|
||||
|
||||
def test_path_mapping_defaults_to_false(self):
|
||||
mapping = PathMapping(container_path="/mnt/data", local_path="/home/user/data")
|
||||
assert mapping.read_only is False
|
||||
|
||||
|
||||
class TestLocalSandboxPathResolution:
|
||||
def test_resolve_path_exact_match(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"),
|
||||
],
|
||||
)
|
||||
resolved = sandbox._resolve_path("/mnt/skills")
|
||||
assert resolved == "/home/user/skills"
|
||||
|
||||
def test_resolve_path_nested_path(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"),
|
||||
],
|
||||
)
|
||||
resolved = sandbox._resolve_path("/mnt/skills/agent/prompt.py")
|
||||
assert resolved == "/home/user/skills/agent/prompt.py"
|
||||
|
||||
def test_resolve_path_no_mapping(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"),
|
||||
],
|
||||
)
|
||||
resolved = sandbox._resolve_path("/mnt/other/file.txt")
|
||||
assert resolved == "/mnt/other/file.txt"
|
||||
|
||||
def test_resolve_path_longest_prefix_first(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"),
|
||||
PathMapping(container_path="/mnt", local_path="/var/mnt"),
|
||||
],
|
||||
)
|
||||
resolved = sandbox._resolve_path("/mnt/skills/file.py")
|
||||
# Should match /mnt/skills first (longer prefix)
|
||||
assert resolved == "/home/user/skills/file.py"
|
||||
|
||||
def test_reverse_resolve_path_exact_match(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path=str(skills_dir)),
|
||||
],
|
||||
)
|
||||
resolved = sandbox._reverse_resolve_path(str(skills_dir))
|
||||
assert resolved == "/mnt/skills"
|
||||
|
||||
def test_reverse_resolve_path_nested(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
file_path = skills_dir / "agent" / "prompt.py"
|
||||
file_path.parent.mkdir()
|
||||
file_path.write_text("test")
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path=str(skills_dir)),
|
||||
],
|
||||
)
|
||||
resolved = sandbox._reverse_resolve_path(str(file_path))
|
||||
assert resolved == "/mnt/skills/agent/prompt.py"
|
||||
|
||||
|
||||
class TestReadOnlyPath:
|
||||
def test_is_read_only_true(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True),
|
||||
],
|
||||
)
|
||||
assert sandbox._is_read_only_path("/home/user/skills/file.py") is True
|
||||
|
||||
def test_is_read_only_false_for_writable(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path="/home/user/data", read_only=False),
|
||||
],
|
||||
)
|
||||
assert sandbox._is_read_only_path("/home/user/data/file.txt") is False
|
||||
|
||||
def test_is_read_only_false_for_unmapped_path(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True),
|
||||
],
|
||||
)
|
||||
# Path not under any mapping
|
||||
assert sandbox._is_read_only_path("/tmp/other/file.txt") is False
|
||||
|
||||
def test_is_read_only_true_for_exact_match(self):
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True),
|
||||
],
|
||||
)
|
||||
assert sandbox._is_read_only_path("/home/user/skills") is True
|
||||
|
||||
def test_write_file_blocked_on_read_only(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path=str(skills_dir), read_only=True),
|
||||
],
|
||||
)
|
||||
# Skills dir is read-only, write should be blocked
|
||||
with pytest.raises(OSError) as exc_info:
|
||||
sandbox.write_file("/mnt/skills/new_file.py", "content")
|
||||
assert exc_info.value.errno == errno.EROFS
|
||||
|
||||
def test_write_file_allowed_on_writable_mount(self, tmp_path):
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir), read_only=False),
|
||||
],
|
||||
)
|
||||
sandbox.write_file("/mnt/data/file.txt", "content")
|
||||
assert (data_dir / "file.txt").read_text() == "content"
|
||||
|
||||
def test_update_file_blocked_on_read_only(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
existing_file = skills_dir / "existing.py"
|
||||
existing_file.write_bytes(b"original")
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path=str(skills_dir), read_only=True),
|
||||
],
|
||||
)
|
||||
with pytest.raises(OSError) as exc_info:
|
||||
sandbox.update_file("/mnt/skills/existing.py", b"updated")
|
||||
assert exc_info.value.errno == errno.EROFS
|
||||
|
||||
|
||||
class TestMultipleMounts:
|
||||
def test_multiple_read_write_mounts(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
external_dir = tmp_path / "external"
|
||||
external_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/skills", local_path=str(skills_dir), read_only=True),
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir), read_only=False),
|
||||
PathMapping(container_path="/mnt/external", local_path=str(external_dir), read_only=True),
|
||||
],
|
||||
)
|
||||
|
||||
# Skills is read-only
|
||||
with pytest.raises(OSError):
|
||||
sandbox.write_file("/mnt/skills/file.py", "content")
|
||||
|
||||
# Data is writable
|
||||
sandbox.write_file("/mnt/data/file.txt", "data content")
|
||||
assert (data_dir / "file.txt").read_text() == "data content"
|
||||
|
||||
# External is read-only
|
||||
with pytest.raises(OSError):
|
||||
sandbox.write_file("/mnt/external/file.txt", "content")
|
||||
|
||||
def test_nested_mounts_writable_under_readonly(self, tmp_path):
|
||||
"""A writable mount nested under a read-only mount should allow writes."""
|
||||
ro_dir = tmp_path / "ro"
|
||||
ro_dir.mkdir()
|
||||
rw_dir = ro_dir / "writable"
|
||||
rw_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/repo", local_path=str(ro_dir), read_only=True),
|
||||
PathMapping(container_path="/mnt/repo/writable", local_path=str(rw_dir), read_only=False),
|
||||
],
|
||||
)
|
||||
|
||||
# Parent mount is read-only
|
||||
with pytest.raises(OSError):
|
||||
sandbox.write_file("/mnt/repo/file.txt", "content")
|
||||
|
||||
# Nested writable mount should allow writes
|
||||
sandbox.write_file("/mnt/repo/writable/file.txt", "content")
|
||||
assert (rw_dir / "file.txt").read_text() == "content"
|
||||
|
||||
def test_execute_command_path_replacement(self, tmp_path, monkeypatch):
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
test_file = data_dir / "test.txt"
|
||||
test_file.write_text("hello")
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||
],
|
||||
)
|
||||
|
||||
# Mock subprocess to capture the resolved command
|
||||
captured = {}
|
||||
original_run = __import__("subprocess").run
|
||||
|
||||
def mock_run(*args, **kwargs):
|
||||
if len(args) > 0:
|
||||
captured["command"] = args[0]
|
||||
return original_run(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr("deerflow.sandbox.local.local_sandbox.subprocess.run", mock_run)
|
||||
monkeypatch.setattr("deerflow.sandbox.local.local_sandbox.LocalSandbox._get_shell", lambda self: "/bin/sh")
|
||||
|
||||
sandbox.execute_command("cat /mnt/data/test.txt")
|
||||
# Verify the command received the resolved local path
|
||||
assert str(data_dir) in captured.get("command", "")
|
||||
|
||||
def test_reverse_resolve_path_does_not_match_partial_prefix(self, tmp_path):
|
||||
foo_dir = tmp_path / "foo"
|
||||
foo_dir.mkdir()
|
||||
foobar_dir = tmp_path / "foobar"
|
||||
foobar_dir.mkdir()
|
||||
target = foobar_dir / "file.txt"
|
||||
target.write_text("test")
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/foo", local_path=str(foo_dir)),
|
||||
],
|
||||
)
|
||||
|
||||
resolved = sandbox._reverse_resolve_path(str(target))
|
||||
assert resolved == str(target.resolve())
|
||||
|
||||
def test_reverse_resolve_paths_in_output_supports_backslash_separator(self, tmp_path):
|
||||
mount_dir = tmp_path / "mount"
|
||||
mount_dir.mkdir()
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(mount_dir)),
|
||||
],
|
||||
)
|
||||
|
||||
output = f"Copied: {mount_dir}\\file.txt"
|
||||
masked = sandbox._reverse_resolve_paths_in_output(output)
|
||||
|
||||
assert "/mnt/data/file.txt" in masked
|
||||
assert str(mount_dir) not in masked
|
||||
|
||||
|
||||
class TestLocalSandboxProviderMounts:
|
||||
def test_setup_path_mappings_uses_configured_skills_container_path_as_reserved_prefix(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
custom_dir = tmp_path / "custom"
|
||||
custom_dir.mkdir()
|
||||
|
||||
from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig
|
||||
|
||||
sandbox_config = SandboxConfig(
|
||||
use="deerflow.sandbox.local:LocalSandboxProvider",
|
||||
mounts=[
|
||||
VolumeMountConfig(host_path=str(custom_dir), container_path="/custom-skills/nested", read_only=False),
|
||||
],
|
||||
)
|
||||
config = SimpleNamespace(
|
||||
skills=SimpleNamespace(container_path="/custom-skills", get_skills_path=lambda: skills_dir),
|
||||
sandbox=sandbox_config,
|
||||
)
|
||||
|
||||
with patch("deerflow.config.get_app_config", return_value=config):
|
||||
provider = LocalSandboxProvider()
|
||||
|
||||
assert [m.container_path for m in provider._path_mappings] == ["/custom-skills"]
|
||||
|
||||
def test_setup_path_mappings_skips_relative_host_path(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
|
||||
from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig
|
||||
|
||||
sandbox_config = SandboxConfig(
|
||||
use="deerflow.sandbox.local:LocalSandboxProvider",
|
||||
mounts=[
|
||||
VolumeMountConfig(host_path="relative/path", container_path="/mnt/data", read_only=False),
|
||||
],
|
||||
)
|
||||
config = SimpleNamespace(
|
||||
skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir),
|
||||
sandbox=sandbox_config,
|
||||
)
|
||||
|
||||
with patch("deerflow.config.get_app_config", return_value=config):
|
||||
provider = LocalSandboxProvider()
|
||||
|
||||
assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]
|
||||
|
||||
def test_setup_path_mappings_skips_non_absolute_container_path(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
custom_dir = tmp_path / "custom"
|
||||
custom_dir.mkdir()
|
||||
|
||||
from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig
|
||||
|
||||
sandbox_config = SandboxConfig(
|
||||
use="deerflow.sandbox.local:LocalSandboxProvider",
|
||||
mounts=[
|
||||
VolumeMountConfig(host_path=str(custom_dir), container_path="mnt/data", read_only=False),
|
||||
],
|
||||
)
|
||||
config = SimpleNamespace(
|
||||
skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir),
|
||||
sandbox=sandbox_config,
|
||||
)
|
||||
|
||||
with patch("deerflow.config.get_app_config", return_value=config):
|
||||
provider = LocalSandboxProvider()
|
||||
|
||||
assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]
|
||||
|
||||
def test_write_file_resolves_container_paths_in_content(self, tmp_path):
|
||||
"""write_file should replace container paths in file content with local paths."""
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||
],
|
||||
)
|
||||
sandbox.write_file(
|
||||
"/mnt/data/script.py",
|
||||
'import pathlib\npath = "/mnt/data/output"\nprint(path)',
|
||||
)
|
||||
written = (data_dir / "script.py").read_text()
|
||||
# Container path should be resolved to local path (forward slashes)
|
||||
assert str(data_dir).replace("\\", "/") in written
|
||||
assert "/mnt/data/output" not in written
|
||||
|
||||
def test_write_file_uses_forward_slashes_on_windows_paths(self, tmp_path):
|
||||
"""Resolved paths in content should always use forward slashes."""
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||
],
|
||||
)
|
||||
sandbox.write_file(
|
||||
"/mnt/data/config.py",
|
||||
'DATA_DIR = "/mnt/data/files"',
|
||||
)
|
||||
written = (data_dir / "config.py").read_text()
|
||||
# Must not contain backslashes that could break escape sequences
|
||||
assert "\\" not in written.split("DATA_DIR = ")[1].split("\n")[0]
|
||||
|
||||
def test_read_file_reverse_resolves_local_paths_in_agent_written_files(self, tmp_path):
|
||||
"""read_file should convert local paths back to container paths in agent-written files."""
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||
],
|
||||
)
|
||||
# Use write_file so the path is tracked as agent-written
|
||||
sandbox.write_file("/mnt/data/info.txt", "File located at: /mnt/data/info.txt")
|
||||
|
||||
content = sandbox.read_file("/mnt/data/info.txt")
|
||||
assert "/mnt/data/info.txt" in content
|
||||
|
||||
def test_read_file_does_not_reverse_resolve_non_agent_files(self, tmp_path):
|
||||
"""read_file should NOT rewrite paths in user-uploaded or external files."""
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||
],
|
||||
)
|
||||
# Write directly to filesystem (simulates user upload or external tool output)
|
||||
local_path = str(data_dir).replace("\\", "/")
|
||||
(data_dir / "config.yml").write_text(f"output_dir: {local_path}/outputs")
|
||||
|
||||
content = sandbox.read_file("/mnt/data/config.yml")
|
||||
# Content should be returned as-is, NOT reverse-resolved
|
||||
assert local_path in content
|
||||
|
||||
def test_write_then_read_roundtrip(self, tmp_path):
|
||||
"""Container paths survive a write → read roundtrip."""
|
||||
data_dir = tmp_path / "data"
|
||||
data_dir.mkdir()
|
||||
|
||||
sandbox = LocalSandbox(
|
||||
"test",
|
||||
[
|
||||
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||
],
|
||||
)
|
||||
original = 'cfg = {"path": "/mnt/data/config.json", "flag": true}'
|
||||
sandbox.write_file("/mnt/data/settings.py", original)
|
||||
result = sandbox.read_file("/mnt/data/settings.py")
|
||||
# The container path should be preserved through roundtrip
|
||||
assert "/mnt/data/config.json" in result
|
||||
|
||||
def test_setup_path_mappings_normalizes_container_path_trailing_slash(self, tmp_path):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
custom_dir = tmp_path / "custom"
|
||||
custom_dir.mkdir()
|
||||
|
||||
from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig
|
||||
|
||||
sandbox_config = SandboxConfig(
|
||||
use="deerflow.sandbox.local:LocalSandboxProvider",
|
||||
mounts=[
|
||||
VolumeMountConfig(host_path=str(custom_dir), container_path="/mnt/data/", read_only=False),
|
||||
],
|
||||
)
|
||||
config = SimpleNamespace(
|
||||
skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir),
|
||||
sandbox=sandbox_config,
|
||||
)
|
||||
|
||||
with patch("deerflow.config.get_app_config", return_value=config):
|
||||
provider = LocalSandboxProvider()
|
||||
|
||||
assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills", "/mnt/data"]
|
||||
Reference in New Issue
Block a user