Files
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

94 lines
2.4 KiB
Python

from abc import ABC, abstractmethod
from deerflow.sandbox.search import GrepMatch
class Sandbox(ABC):
"""Abstract base class for sandbox environments"""
_id: str
def __init__(self, id: str):
self._id = id
@property
def id(self) -> str:
return self._id
@abstractmethod
def execute_command(self, command: str) -> str:
"""Execute bash command in sandbox.
Args:
command: The command to execute.
Returns:
The standard or error output of the command.
"""
pass
@abstractmethod
def read_file(self, path: str) -> str:
"""Read the content of a file.
Args:
path: The absolute path of the file to read.
Returns:
The content of the file.
"""
pass
@abstractmethod
def list_dir(self, path: str, max_depth=2) -> list[str]:
"""List the contents of a directory.
Args:
path: The absolute path of the directory to list.
max_depth: The maximum depth to traverse. Default is 2.
Returns:
The contents of the directory.
"""
pass
@abstractmethod
def write_file(self, path: str, content: str, append: bool = False) -> None:
"""Write content to a file.
Args:
path: The absolute path of the file to write to.
content: The text content to write to the file.
append: Whether to append the content to the file. If False, the file will be created or overwritten.
"""
pass
@abstractmethod
def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
"""Find paths that match a glob pattern under a root directory."""
pass
@abstractmethod
def grep(
self,
path: str,
pattern: str,
*,
glob: str | None = None,
literal: bool = False,
case_sensitive: bool = False,
max_results: int = 100,
) -> tuple[list[GrepMatch], bool]:
"""Search for matches inside text files under a directory."""
pass
@abstractmethod
def update_file(self, path: str, content: bytes) -> None:
"""Update a file with binary content.
Args:
path: The absolute path of the file to update.
content: The binary content to write to the file.
"""
pass