Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
90 lines
2.7 KiB
Python
90 lines
2.7 KiB
Python
"""Subagent registry for managing available subagents."""
|
|
|
|
import logging
|
|
from dataclasses import replace
|
|
|
|
from deerflow.sandbox.security import is_host_bash_allowed
|
|
from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
|
|
from deerflow.subagents.config import SubagentConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_subagent_config(name: str) -> SubagentConfig | None:
|
|
"""Get a subagent configuration by name, with config.yaml overrides applied.
|
|
|
|
Args:
|
|
name: The name of the subagent.
|
|
|
|
Returns:
|
|
SubagentConfig if found (with any config.yaml overrides applied), None otherwise.
|
|
"""
|
|
config = BUILTIN_SUBAGENTS.get(name)
|
|
if config is None:
|
|
return None
|
|
|
|
# Apply timeout override from config.yaml (lazy import to avoid circular deps)
|
|
from deerflow.config.subagents_config import get_subagents_app_config
|
|
|
|
app_config = get_subagents_app_config()
|
|
effective_timeout = app_config.get_timeout_for(name)
|
|
effective_max_turns = app_config.get_max_turns_for(name, config.max_turns)
|
|
|
|
overrides = {}
|
|
if effective_timeout != config.timeout_seconds:
|
|
logger.debug(
|
|
"Subagent '%s': timeout overridden by config.yaml (%ss -> %ss)",
|
|
name,
|
|
config.timeout_seconds,
|
|
effective_timeout,
|
|
)
|
|
overrides["timeout_seconds"] = effective_timeout
|
|
if effective_max_turns != config.max_turns:
|
|
logger.debug(
|
|
"Subagent '%s': max_turns overridden by config.yaml (%s -> %s)",
|
|
name,
|
|
config.max_turns,
|
|
effective_max_turns,
|
|
)
|
|
overrides["max_turns"] = effective_max_turns
|
|
if overrides:
|
|
config = replace(config, **overrides)
|
|
|
|
return config
|
|
|
|
|
|
def list_subagents() -> list[SubagentConfig]:
|
|
"""List all available subagent configurations (with config.yaml overrides applied).
|
|
|
|
Returns:
|
|
List of all registered SubagentConfig instances.
|
|
"""
|
|
return [get_subagent_config(name) for name in BUILTIN_SUBAGENTS]
|
|
|
|
|
|
def get_subagent_names() -> list[str]:
|
|
"""Get all available subagent names.
|
|
|
|
Returns:
|
|
List of subagent names.
|
|
"""
|
|
return list(BUILTIN_SUBAGENTS.keys())
|
|
|
|
|
|
def get_available_subagent_names() -> list[str]:
|
|
"""Get subagent names that should be exposed to the active runtime.
|
|
|
|
Returns:
|
|
List of subagent names visible to the current sandbox configuration.
|
|
"""
|
|
names = list(BUILTIN_SUBAGENTS.keys())
|
|
try:
|
|
host_bash_allowed = is_host_bash_allowed()
|
|
except Exception:
|
|
logger.debug("Could not determine host bash availability; exposing all built-in subagents")
|
|
return names
|
|
|
|
if not host_bash_allowed:
|
|
names = [name for name in names if name != "bash"]
|
|
return names
|