Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
@@ -0,0 +1,93 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from deerflow.sandbox.search import GrepMatch
|
||||
|
||||
|
||||
class Sandbox(ABC):
|
||||
"""Abstract base class for sandbox environments"""
|
||||
|
||||
_id: str
|
||||
|
||||
def __init__(self, id: str):
|
||||
self._id = id
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
return self._id
|
||||
|
||||
@abstractmethod
|
||||
def execute_command(self, command: str) -> str:
|
||||
"""Execute bash command in sandbox.
|
||||
|
||||
Args:
|
||||
command: The command to execute.
|
||||
|
||||
Returns:
|
||||
The standard or error output of the command.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def read_file(self, path: str) -> str:
|
||||
"""Read the content of a file.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to read.
|
||||
|
||||
Returns:
|
||||
The content of the file.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
||||
"""List the contents of a directory.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the directory to list.
|
||||
max_depth: The maximum depth to traverse. Default is 2.
|
||||
|
||||
Returns:
|
||||
The contents of the directory.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||
"""Write content to a file.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to write to.
|
||||
content: The text content to write to the file.
|
||||
append: Whether to append the content to the file. If False, the file will be created or overwritten.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
|
||||
"""Find paths that match a glob pattern under a root directory."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def grep(
|
||||
self,
|
||||
path: str,
|
||||
pattern: str,
|
||||
*,
|
||||
glob: str | None = None,
|
||||
literal: bool = False,
|
||||
case_sensitive: bool = False,
|
||||
max_results: int = 100,
|
||||
) -> tuple[list[GrepMatch], bool]:
|
||||
"""Search for matches inside text files under a directory."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update_file(self, path: str, content: bytes) -> None:
|
||||
"""Update a file with binary content.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to update.
|
||||
content: The binary content to write to the file.
|
||||
"""
|
||||
pass
|
||||
Reference in New Issue
Block a user