Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
from .app_config import get_app_config
from .extensions_config import ExtensionsConfig, get_extensions_config
from .memory_config import MemoryConfig, get_memory_config
from .paths import Paths, get_paths
from .skill_evolution_config import SkillEvolutionConfig
from .skills_config import SkillsConfig
from .tracing_config import (
get_enabled_tracing_providers,
get_explicitly_enabled_tracing_providers,
get_tracing_config,
is_tracing_enabled,
validate_enabled_tracing_providers,
)
__all__ = [
"get_app_config",
"SkillEvolutionConfig",
"Paths",
"get_paths",
"SkillsConfig",
"ExtensionsConfig",
"get_extensions_config",
"MemoryConfig",
"get_memory_config",
"get_tracing_config",
"get_explicitly_enabled_tracing_providers",
"get_enabled_tracing_providers",
"is_tracing_enabled",
"validate_enabled_tracing_providers",
]

View File

@@ -0,0 +1,51 @@
"""ACP (Agent Client Protocol) agent configuration loaded from config.yaml."""
import logging
from collections.abc import Mapping
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
class ACPAgentConfig(BaseModel):
"""Configuration for a single ACP-compatible agent."""
command: str = Field(description="Command to launch the ACP agent subprocess")
args: list[str] = Field(default_factory=list, description="Additional command arguments")
env: dict[str, str] = Field(default_factory=dict, description="Environment variables to inject into the agent subprocess. Values starting with $ are resolved from host environment variables.")
description: str = Field(description="Description of the agent's capabilities (shown in tool description)")
model: str | None = Field(default=None, description="Model hint passed to the agent (optional)")
auto_approve_permissions: bool = Field(
default=False,
description=(
"When True, DeerFlow automatically approves all ACP permission requests from this agent "
"(allow_once preferred over allow_always). When False (default), all permission requests "
"are denied — the agent must be configured to operate without requesting permissions."
),
)
_acp_agents: dict[str, ACPAgentConfig] = {}
def get_acp_agents() -> dict[str, ACPAgentConfig]:
"""Get the currently configured ACP agents.
Returns:
Mapping of agent name -> ACPAgentConfig. Empty dict if no ACP agents are configured.
"""
return _acp_agents
def load_acp_config_from_dict(config_dict: Mapping[str, Mapping[str, object]] | None) -> None:
"""Load ACP agent configuration from a dictionary (typically from config.yaml).
Args:
config_dict: Mapping of agent name -> config fields.
"""
global _acp_agents
if config_dict is None:
config_dict = {}
_acp_agents = {name: ACPAgentConfig(**cfg) for name, cfg in config_dict.items()}
logger.info("ACP config loaded: %d agent(s): %s", len(_acp_agents), list(_acp_agents.keys()))

View File

@@ -0,0 +1,125 @@
"""Configuration and loaders for custom agents."""
import logging
import re
from typing import Any
import yaml
from pydantic import BaseModel
from deerflow.config.paths import get_paths
logger = logging.getLogger(__name__)
SOUL_FILENAME = "SOUL.md"
AGENT_NAME_PATTERN = re.compile(r"^[A-Za-z0-9-]+$")
class AgentConfig(BaseModel):
"""Configuration for a custom agent."""
name: str
description: str = ""
model: str | None = None
tool_groups: list[str] | None = None
# skills controls which skills are loaded into the agent's prompt:
# - None (or omitted): load all enabled skills (default fallback behavior)
# - [] (explicit empty list): disable all skills
# - ["skill1", "skill2"]: load only the specified skills
skills: list[str] | None = None
def load_agent_config(name: str | None) -> AgentConfig | None:
"""Load the custom or default agent's config from its directory.
Args:
name: The agent name.
Returns:
AgentConfig instance.
Raises:
FileNotFoundError: If the agent directory or config.yaml does not exist.
ValueError: If config.yaml cannot be parsed.
"""
if name is None:
return None
if not AGENT_NAME_PATTERN.match(name):
raise ValueError(f"Invalid agent name '{name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}")
agent_dir = get_paths().agent_dir(name)
config_file = agent_dir / "config.yaml"
if not agent_dir.exists():
raise FileNotFoundError(f"Agent directory not found: {agent_dir}")
if not config_file.exists():
raise FileNotFoundError(f"Agent config not found: {config_file}")
try:
with open(config_file, encoding="utf-8") as f:
data: dict[str, Any] = yaml.safe_load(f) or {}
except yaml.YAMLError as e:
raise ValueError(f"Failed to parse agent config {config_file}: {e}") from e
# Ensure name is set from directory name if not in file
if "name" not in data:
data["name"] = name
# Strip unknown fields before passing to Pydantic (e.g. legacy prompt_file)
known_fields = set(AgentConfig.model_fields.keys())
data = {k: v for k, v in data.items() if k in known_fields}
return AgentConfig(**data)
def load_agent_soul(agent_name: str | None) -> str | None:
"""Read the SOUL.md file for a custom agent, if it exists.
SOUL.md defines the agent's personality, values, and behavioral guardrails.
It is injected into the lead agent's system prompt as additional context.
Args:
agent_name: The name of the agent or None for the default agent.
Returns:
The SOUL.md content as a string, or None if the file does not exist.
"""
agent_dir = get_paths().agent_dir(agent_name) if agent_name else get_paths().base_dir
soul_path = agent_dir / SOUL_FILENAME
if not soul_path.exists():
return None
content = soul_path.read_text(encoding="utf-8").strip()
return content or None
def list_custom_agents() -> list[AgentConfig]:
"""Scan the agents directory and return all valid custom agents.
Returns:
List of AgentConfig for each valid agent directory found.
"""
agents_dir = get_paths().agents_dir
if not agents_dir.exists():
return []
agents: list[AgentConfig] = []
for entry in sorted(agents_dir.iterdir()):
if not entry.is_dir():
continue
config_file = entry / "config.yaml"
if not config_file.exists():
logger.debug(f"Skipping {entry.name}: no config.yaml")
continue
try:
agent_cfg = load_agent_config(entry.name)
agents.append(agent_cfg)
except Exception as e:
logger.warning(f"Skipping agent '{entry.name}': {e}")
return agents

View File

@@ -0,0 +1,379 @@
import logging
import os
from contextvars import ContextVar
from pathlib import Path
from typing import Any, Self
import yaml
from dotenv import load_dotenv
from pydantic import BaseModel, ConfigDict, Field
from deerflow.config.acp_config import load_acp_config_from_dict
from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
from deerflow.config.extensions_config import ExtensionsConfig
from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
from deerflow.config.model_config import ModelConfig
from deerflow.config.sandbox_config import SandboxConfig
from deerflow.config.skill_evolution_config import SkillEvolutionConfig
from deerflow.config.skills_config import SkillsConfig
from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
from deerflow.config.token_usage_config import TokenUsageConfig
from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
load_dotenv()
logger = logging.getLogger(__name__)
def _default_config_candidates() -> tuple[Path, ...]:
"""Return deterministic config.yaml locations without relying on cwd."""
backend_dir = Path(__file__).resolve().parents[4]
repo_root = backend_dir.parent
return (backend_dir / "config.yaml", repo_root / "config.yaml")
class AppConfig(BaseModel):
"""Config for the DeerFlow application"""
log_level: str = Field(default="info", description="Logging level for deerflow modules (debug/info/warning/error)")
token_usage: TokenUsageConfig = Field(default_factory=TokenUsageConfig, description="Token usage tracking configuration")
models: list[ModelConfig] = Field(default_factory=list, description="Available models")
sandbox: SandboxConfig = Field(description="Sandbox configuration")
tools: list[ToolConfig] = Field(default_factory=list, description="Available tools")
tool_groups: list[ToolGroupConfig] = Field(default_factory=list, description="Available tool groups")
skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
skill_evolution: SkillEvolutionConfig = Field(default_factory=SkillEvolutionConfig, description="Agent-managed skill evolution configuration")
extensions: ExtensionsConfig = Field(default_factory=ExtensionsConfig, description="Extensions configuration (MCP servers and skills state)")
tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig, description="Tool search / deferred loading configuration")
title: TitleConfig = Field(default_factory=TitleConfig, description="Automatic title generation configuration")
summarization: SummarizationConfig = Field(default_factory=SummarizationConfig, description="Conversation summarization configuration")
memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory subsystem configuration")
subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
model_config = ConfigDict(extra="allow", frozen=False)
checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration")
stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration")
@classmethod
def resolve_config_path(cls, config_path: str | None = None) -> Path:
"""Resolve the config file path.
Priority:
1. If provided `config_path` argument, use it.
2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
3. Otherwise, search deterministic backend/repository-root defaults from `_default_config_candidates()`.
"""
if config_path:
path = Path(config_path)
if not Path.exists(path):
raise FileNotFoundError(f"Config file specified by param `config_path` not found at {path}")
return path
elif os.getenv("DEER_FLOW_CONFIG_PATH"):
path = Path(os.getenv("DEER_FLOW_CONFIG_PATH"))
if not Path.exists(path):
raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
return path
else:
for path in _default_config_candidates():
if path.exists():
return path
raise FileNotFoundError("`config.yaml` file not found at the default backend or repository root locations")
@classmethod
def from_file(cls, config_path: str | None = None) -> Self:
"""Load config from YAML file.
See `resolve_config_path` for more details.
Args:
config_path: Path to the config file.
Returns:
AppConfig: The loaded config.
"""
resolved_path = cls.resolve_config_path(config_path)
with open(resolved_path, encoding="utf-8") as f:
config_data = yaml.safe_load(f) or {}
# Check config version before processing
cls._check_config_version(config_data, resolved_path)
config_data = cls.resolve_env_variables(config_data)
# Load title config if present
if "title" in config_data:
load_title_config_from_dict(config_data["title"])
# Load summarization config if present
if "summarization" in config_data:
load_summarization_config_from_dict(config_data["summarization"])
# Load memory config if present
if "memory" in config_data:
load_memory_config_from_dict(config_data["memory"])
# Load subagents config if present
if "subagents" in config_data:
load_subagents_config_from_dict(config_data["subagents"])
# Load tool_search config if present
if "tool_search" in config_data:
load_tool_search_config_from_dict(config_data["tool_search"])
# Load guardrails config if present
if "guardrails" in config_data:
load_guardrails_config_from_dict(config_data["guardrails"])
# Load checkpointer config if present
if "checkpointer" in config_data:
load_checkpointer_config_from_dict(config_data["checkpointer"])
# Load stream bridge config if present
if "stream_bridge" in config_data:
load_stream_bridge_config_from_dict(config_data["stream_bridge"])
# Always refresh ACP agent config so removed entries do not linger across reloads.
load_acp_config_from_dict(config_data.get("acp_agents", {}))
# Load extensions config separately (it's in a different file)
extensions_config = ExtensionsConfig.from_file()
config_data["extensions"] = extensions_config.model_dump()
result = cls.model_validate(config_data)
return result
@classmethod
def _check_config_version(cls, config_data: dict, config_path: Path) -> None:
"""Check if the user's config.yaml is outdated compared to config.example.yaml.
Emits a warning if the user's config_version is lower than the example's.
Missing config_version is treated as version 0 (pre-versioning).
"""
try:
user_version = int(config_data.get("config_version", 0))
except (TypeError, ValueError):
user_version = 0
# Find config.example.yaml by searching config.yaml's directory and its parents
example_path = None
search_dir = config_path.parent
for _ in range(5): # search up to 5 levels
candidate = search_dir / "config.example.yaml"
if candidate.exists():
example_path = candidate
break
parent = search_dir.parent
if parent == search_dir:
break
search_dir = parent
if example_path is None:
return
try:
with open(example_path, encoding="utf-8") as f:
example_data = yaml.safe_load(f)
raw = example_data.get("config_version", 0) if example_data else 0
try:
example_version = int(raw)
except (TypeError, ValueError):
example_version = 0
except Exception:
return
if user_version < example_version:
logger.warning(
"Your config.yaml (version %d) is outdated — the latest version is %d. Run `make config-upgrade` to merge new fields into your config.",
user_version,
example_version,
)
@classmethod
def resolve_env_variables(cls, config: Any) -> Any:
"""Recursively resolve environment variables in the config.
Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
Args:
config: The config to resolve environment variables in.
Returns:
The config with environment variables resolved.
"""
if isinstance(config, str):
if config.startswith("$"):
env_value = os.getenv(config[1:])
if env_value is None:
raise ValueError(f"Environment variable {config[1:]} not found for config value {config}")
return env_value
return config
elif isinstance(config, dict):
return {k: cls.resolve_env_variables(v) for k, v in config.items()}
elif isinstance(config, list):
return [cls.resolve_env_variables(item) for item in config]
return config
def get_model_config(self, name: str) -> ModelConfig | None:
"""Get the model config by name.
Args:
name: The name of the model to get the config for.
Returns:
The model config if found, otherwise None.
"""
return next((model for model in self.models if model.name == name), None)
def get_tool_config(self, name: str) -> ToolConfig | None:
"""Get the tool config by name.
Args:
name: The name of the tool to get the config for.
Returns:
The tool config if found, otherwise None.
"""
return next((tool for tool in self.tools if tool.name == name), None)
def get_tool_group_config(self, name: str) -> ToolGroupConfig | None:
"""Get the tool group config by name.
Args:
name: The name of the tool group to get the config for.
Returns:
The tool group config if found, otherwise None.
"""
return next((group for group in self.tool_groups if group.name == name), None)
_app_config: AppConfig | None = None
_app_config_path: Path | None = None
_app_config_mtime: float | None = None
_app_config_is_custom = False
_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())
def _get_config_mtime(config_path: Path) -> float | None:
"""Get the modification time of a config file if it exists."""
try:
return config_path.stat().st_mtime
except OSError:
return None
def _load_and_cache_app_config(config_path: str | None = None) -> AppConfig:
"""Load config from disk and refresh cache metadata."""
global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
resolved_path = AppConfig.resolve_config_path(config_path)
_app_config = AppConfig.from_file(str(resolved_path))
_app_config_path = resolved_path
_app_config_mtime = _get_config_mtime(resolved_path)
_app_config_is_custom = False
return _app_config
def get_app_config() -> AppConfig:
"""Get the DeerFlow config instance.
Returns a cached singleton instance and automatically reloads it when the
underlying config file path or modification time changes. Use
`reload_app_config()` to force a reload, or `reset_app_config()` to clear
the cache.
"""
global _app_config, _app_config_path, _app_config_mtime
runtime_override = _current_app_config.get()
if runtime_override is not None:
return runtime_override
if _app_config is not None and _app_config_is_custom:
return _app_config
resolved_path = AppConfig.resolve_config_path()
current_mtime = _get_config_mtime(resolved_path)
should_reload = _app_config is None or _app_config_path != resolved_path or _app_config_mtime != current_mtime
if should_reload:
if _app_config_path == resolved_path and _app_config_mtime is not None and current_mtime is not None and _app_config_mtime != current_mtime:
logger.info(
"Config file has been modified (mtime: %s -> %s), reloading AppConfig",
_app_config_mtime,
current_mtime,
)
_load_and_cache_app_config(str(resolved_path))
return _app_config
def reload_app_config(config_path: str | None = None) -> AppConfig:
"""Reload the config from file and update the cached instance.
This is useful when the config file has been modified and you want
to pick up the changes without restarting the application.
Args:
config_path: Optional path to config file. If not provided,
uses the default resolution strategy.
Returns:
The newly loaded AppConfig instance.
"""
return _load_and_cache_app_config(config_path)
def reset_app_config() -> None:
"""Reset the cached config instance.
This clears the singleton cache, causing the next call to
`get_app_config()` to reload from file. Useful for testing
or when switching between different configurations.
"""
global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
_app_config = None
_app_config_path = None
_app_config_mtime = None
_app_config_is_custom = False
def set_app_config(config: AppConfig) -> None:
"""Set a custom config instance.
This allows injecting a custom or mock config for testing purposes.
Args:
config: The AppConfig instance to use.
"""
global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
_app_config = config
_app_config_path = None
_app_config_mtime = None
_app_config_is_custom = True
def peek_current_app_config() -> AppConfig | None:
"""Return the runtime-scoped AppConfig override, if one is active."""
return _current_app_config.get()
def push_current_app_config(config: AppConfig) -> None:
"""Push a runtime-scoped AppConfig override for the current execution context."""
stack = _current_app_config_stack.get()
_current_app_config_stack.set(stack + (_current_app_config.get(),))
_current_app_config.set(config)
def pop_current_app_config() -> None:
"""Pop the latest runtime-scoped AppConfig override for the current execution context."""
stack = _current_app_config_stack.get()
if not stack:
_current_app_config.set(None)
return
previous = stack[-1]
_current_app_config_stack.set(stack[:-1])
_current_app_config.set(previous)

View File

@@ -0,0 +1,46 @@
"""Configuration for LangGraph checkpointer."""
from typing import Literal
from pydantic import BaseModel, Field
CheckpointerType = Literal["memory", "sqlite", "postgres"]
class CheckpointerConfig(BaseModel):
"""Configuration for LangGraph state persistence checkpointer."""
type: CheckpointerType = Field(
description="Checkpointer backend type. "
"'memory' is in-process only (lost on restart). "
"'sqlite' persists to a local file (requires langgraph-checkpoint-sqlite). "
"'postgres' persists to PostgreSQL (requires langgraph-checkpoint-postgres)."
)
connection_string: str | None = Field(
default=None,
description="Connection string for sqlite (file path) or postgres (DSN). "
"Required for sqlite and postgres types. "
"For sqlite, use a file path like '.deer-flow/checkpoints.db' or ':memory:' for in-memory. "
"For postgres, use a DSN like 'postgresql://user:pass@localhost:5432/db'.",
)
# Global configuration instance — None means no checkpointer is configured.
_checkpointer_config: CheckpointerConfig | None = None
def get_checkpointer_config() -> CheckpointerConfig | None:
"""Get the current checkpointer configuration, or None if not configured."""
return _checkpointer_config
def set_checkpointer_config(config: CheckpointerConfig | None) -> None:
"""Set the checkpointer configuration."""
global _checkpointer_config
_checkpointer_config = config
def load_checkpointer_config_from_dict(config_dict: dict) -> None:
"""Load checkpointer configuration from a dictionary."""
global _checkpointer_config
_checkpointer_config = CheckpointerConfig(**config_dict)

View File

@@ -0,0 +1,256 @@
"""Unified extensions configuration for MCP servers and skills."""
import json
import os
from pathlib import Path
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
class McpOAuthConfig(BaseModel):
"""OAuth configuration for an MCP server (HTTP/SSE transports)."""
enabled: bool = Field(default=True, description="Whether OAuth token injection is enabled")
token_url: str = Field(description="OAuth token endpoint URL")
grant_type: Literal["client_credentials", "refresh_token"] = Field(
default="client_credentials",
description="OAuth grant type",
)
client_id: str | None = Field(default=None, description="OAuth client ID")
client_secret: str | None = Field(default=None, description="OAuth client secret")
refresh_token: str | None = Field(default=None, description="OAuth refresh token (for refresh_token grant)")
scope: str | None = Field(default=None, description="OAuth scope")
audience: str | None = Field(default=None, description="OAuth audience (provider-specific)")
token_field: str = Field(default="access_token", description="Field name containing access token in token response")
token_type_field: str = Field(default="token_type", description="Field name containing token type in token response")
expires_in_field: str = Field(default="expires_in", description="Field name containing expiry (seconds) in token response")
default_token_type: str = Field(default="Bearer", description="Default token type when missing in token response")
refresh_skew_seconds: int = Field(default=60, description="Refresh token this many seconds before expiry")
extra_token_params: dict[str, str] = Field(default_factory=dict, description="Additional form params sent to token endpoint")
model_config = ConfigDict(extra="allow")
class McpServerConfig(BaseModel):
"""Configuration for a single MCP server."""
enabled: bool = Field(default=True, description="Whether this MCP server is enabled")
type: str = Field(default="stdio", description="Transport type: 'stdio', 'sse', or 'http'")
command: str | None = Field(default=None, description="Command to execute to start the MCP server (for stdio type)")
args: list[str] = Field(default_factory=list, description="Arguments to pass to the command (for stdio type)")
env: dict[str, str] = Field(default_factory=dict, description="Environment variables for the MCP server")
url: str | None = Field(default=None, description="URL of the MCP server (for sse or http type)")
headers: dict[str, str] = Field(default_factory=dict, description="HTTP headers to send (for sse or http type)")
oauth: McpOAuthConfig | None = Field(default=None, description="OAuth configuration (for sse or http type)")
description: str = Field(default="", description="Human-readable description of what this MCP server provides")
model_config = ConfigDict(extra="allow")
class SkillStateConfig(BaseModel):
"""Configuration for a single skill's state."""
enabled: bool = Field(default=True, description="Whether this skill is enabled")
class ExtensionsConfig(BaseModel):
"""Unified configuration for MCP servers and skills."""
mcp_servers: dict[str, McpServerConfig] = Field(
default_factory=dict,
description="Map of MCP server name to configuration",
alias="mcpServers",
)
skills: dict[str, SkillStateConfig] = Field(
default_factory=dict,
description="Map of skill name to state configuration",
)
model_config = ConfigDict(extra="allow", populate_by_name=True)
@classmethod
def resolve_config_path(cls, config_path: str | None = None) -> Path | None:
"""Resolve the extensions config file path.
Priority:
1. If provided `config_path` argument, use it.
2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
3. Otherwise, check for `extensions_config.json` in the current directory, then in the parent directory.
4. For backward compatibility, also check for `mcp_config.json` if `extensions_config.json` is not found.
5. If not found, return None (extensions are optional).
Args:
config_path: Optional path to extensions config file.
Resolution order:
1. If provided `config_path` argument, use it.
2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
3. Otherwise, search backend/repository-root defaults for
`extensions_config.json`, then legacy `mcp_config.json`.
Returns:
Path to the extensions config file if found, otherwise None.
"""
if config_path:
path = Path(config_path)
if not path.exists():
raise FileNotFoundError(f"Extensions config file specified by param `config_path` not found at {path}")
return path
elif os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"):
path = Path(os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"))
if not path.exists():
raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
return path
else:
backend_dir = Path(__file__).resolve().parents[4]
repo_root = backend_dir.parent
for path in (
backend_dir / "extensions_config.json",
repo_root / "extensions_config.json",
backend_dir / "mcp_config.json",
repo_root / "mcp_config.json",
):
if path.exists():
return path
# Extensions are optional, so return None if not found
return None
@classmethod
def from_file(cls, config_path: str | None = None) -> "ExtensionsConfig":
"""Load extensions config from JSON file.
See `resolve_config_path` for more details.
Args:
config_path: Path to the extensions config file.
Returns:
ExtensionsConfig: The loaded config, or empty config if file not found.
"""
resolved_path = cls.resolve_config_path(config_path)
if resolved_path is None:
# Return empty config if extensions config file is not found
return cls(mcp_servers={}, skills={})
try:
with open(resolved_path, encoding="utf-8") as f:
config_data = json.load(f)
cls.resolve_env_variables(config_data)
return cls.model_validate(config_data)
except json.JSONDecodeError as e:
raise ValueError(f"Extensions config file at {resolved_path} is not valid JSON: {e}") from e
except Exception as e:
raise RuntimeError(f"Failed to load extensions config from {resolved_path}: {e}") from e
@classmethod
def resolve_env_variables(cls, config: dict[str, Any]) -> dict[str, Any]:
"""Recursively resolve environment variables in the config.
Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
Args:
config: The config to resolve environment variables in.
Returns:
The config with environment variables resolved.
"""
for key, value in config.items():
if isinstance(value, str):
if value.startswith("$"):
env_value = os.getenv(value[1:])
if env_value is None:
# Unresolved placeholder — store empty string so downstream
# consumers (e.g. MCP servers) don't receive the literal "$VAR"
# token as an actual environment value.
config[key] = ""
else:
config[key] = env_value
else:
config[key] = value
elif isinstance(value, dict):
config[key] = cls.resolve_env_variables(value)
elif isinstance(value, list):
config[key] = [cls.resolve_env_variables(item) if isinstance(item, dict) else item for item in value]
return config
def get_enabled_mcp_servers(self) -> dict[str, McpServerConfig]:
"""Get only the enabled MCP servers.
Returns:
Dictionary of enabled MCP servers.
"""
return {name: config for name, config in self.mcp_servers.items() if config.enabled}
def is_skill_enabled(self, skill_name: str, skill_category: str) -> bool:
"""Check if a skill is enabled.
Args:
skill_name: Name of the skill
skill_category: Category of the skill
Returns:
True if enabled, False otherwise
"""
skill_config = self.skills.get(skill_name)
if skill_config is None:
# Default to enable for public & custom skill
return skill_category in ("public", "custom")
return skill_config.enabled
_extensions_config: ExtensionsConfig | None = None
def get_extensions_config() -> ExtensionsConfig:
"""Get the extensions config instance.
Returns a cached singleton instance. Use `reload_extensions_config()` to reload
from file, or `reset_extensions_config()` to clear the cache.
Returns:
The cached ExtensionsConfig instance.
"""
global _extensions_config
if _extensions_config is None:
_extensions_config = ExtensionsConfig.from_file()
return _extensions_config
def reload_extensions_config(config_path: str | None = None) -> ExtensionsConfig:
"""Reload the extensions config from file and update the cached instance.
This is useful when the config file has been modified and you want
to pick up the changes without restarting the application.
Args:
config_path: Optional path to extensions config file. If not provided,
uses the default resolution strategy.
Returns:
The newly loaded ExtensionsConfig instance.
"""
global _extensions_config
_extensions_config = ExtensionsConfig.from_file(config_path)
return _extensions_config
def reset_extensions_config() -> None:
"""Reset the cached extensions config instance.
This clears the singleton cache, causing the next call to
`get_extensions_config()` to reload from file. Useful for testing
or when switching between different configurations.
"""
global _extensions_config
_extensions_config = None
def set_extensions_config(config: ExtensionsConfig) -> None:
"""Set a custom extensions config instance.
This allows injecting a custom or mock config for testing purposes.
Args:
config: The ExtensionsConfig instance to use.
"""
global _extensions_config
_extensions_config = config

View File

@@ -0,0 +1,48 @@
"""Configuration for pre-tool-call authorization."""
from pydantic import BaseModel, Field
class GuardrailProviderConfig(BaseModel):
"""Configuration for a guardrail provider."""
use: str = Field(description="Class path (e.g. 'deerflow.guardrails.builtin:AllowlistProvider')")
config: dict = Field(default_factory=dict, description="Provider-specific settings passed as kwargs")
class GuardrailsConfig(BaseModel):
"""Configuration for pre-tool-call authorization.
When enabled, every tool call passes through the configured provider
before execution. The provider receives tool name, arguments, and the
agent's passport reference, and returns an allow/deny decision.
"""
enabled: bool = Field(default=False, description="Enable guardrail middleware")
fail_closed: bool = Field(default=True, description="Block tool calls if provider errors")
passport: str | None = Field(default=None, description="OAP passport path or hosted agent ID")
provider: GuardrailProviderConfig | None = Field(default=None, description="Guardrail provider configuration")
_guardrails_config: GuardrailsConfig | None = None
def get_guardrails_config() -> GuardrailsConfig:
"""Get the guardrails config, returning defaults if not loaded."""
global _guardrails_config
if _guardrails_config is None:
_guardrails_config = GuardrailsConfig()
return _guardrails_config
def load_guardrails_config_from_dict(data: dict) -> GuardrailsConfig:
"""Load guardrails config from a dict (called during AppConfig loading)."""
global _guardrails_config
_guardrails_config = GuardrailsConfig.model_validate(data)
return _guardrails_config
def reset_guardrails_config() -> None:
"""Reset the cached config instance. Used in tests to prevent singleton leaks."""
global _guardrails_config
_guardrails_config = None

View File

@@ -0,0 +1,82 @@
"""Configuration for memory mechanism."""
from pydantic import BaseModel, Field
class MemoryConfig(BaseModel):
"""Configuration for global memory mechanism."""
enabled: bool = Field(
default=True,
description="Whether to enable memory mechanism",
)
storage_path: str = Field(
default="",
description=(
"Path to store memory data. "
"If empty, defaults to `{base_dir}/memory.json` (see Paths.memory_file). "
"Absolute paths are used as-is. "
"Relative paths are resolved against `Paths.base_dir` "
"(not the backend working directory). "
"Note: if you previously set this to `.deer-flow/memory.json`, "
"the file will now be resolved as `{base_dir}/.deer-flow/memory.json`; "
"migrate existing data or use an absolute path to preserve the old location."
),
)
storage_class: str = Field(
default="deerflow.agents.memory.storage.FileMemoryStorage",
description="The class path for memory storage provider",
)
debounce_seconds: int = Field(
default=30,
ge=1,
le=300,
description="Seconds to wait before processing queued updates (debounce)",
)
model_name: str | None = Field(
default=None,
description="Model name to use for memory updates (None = use default model)",
)
max_facts: int = Field(
default=100,
ge=10,
le=500,
description="Maximum number of facts to store",
)
fact_confidence_threshold: float = Field(
default=0.7,
ge=0.0,
le=1.0,
description="Minimum confidence threshold for storing facts",
)
injection_enabled: bool = Field(
default=True,
description="Whether to inject memory into system prompt",
)
max_injection_tokens: int = Field(
default=2000,
ge=100,
le=8000,
description="Maximum tokens to use for memory injection",
)
# Global configuration instance
_memory_config: MemoryConfig = MemoryConfig()
def get_memory_config() -> MemoryConfig:
"""Get the current memory configuration."""
return _memory_config
def set_memory_config(config: MemoryConfig) -> None:
"""Set the memory configuration."""
global _memory_config
_memory_config = config
def load_memory_config_from_dict(config_dict: dict) -> None:
"""Load memory configuration from a dictionary."""
global _memory_config
_memory_config = MemoryConfig(**config_dict)

View File

@@ -0,0 +1,41 @@
from pydantic import BaseModel, ConfigDict, Field
class ModelConfig(BaseModel):
"""Config section for a model"""
name: str = Field(..., description="Unique name for the model")
display_name: str | None = Field(..., default_factory=lambda: None, description="Display name for the model")
description: str | None = Field(..., default_factory=lambda: None, description="Description for the model")
use: str = Field(
...,
description="Class path of the model provider(e.g. langchain_openai.ChatOpenAI)",
)
model: str = Field(..., description="Model name")
model_config = ConfigDict(extra="allow")
use_responses_api: bool | None = Field(
default=None,
description="Whether to route OpenAI ChatOpenAI calls through the /v1/responses API",
)
output_version: str | None = Field(
default=None,
description="Structured output version for OpenAI responses content, e.g. responses/v1",
)
supports_thinking: bool = Field(default_factory=lambda: False, description="Whether the model supports thinking")
supports_reasoning_effort: bool = Field(default_factory=lambda: False, description="Whether the model supports reasoning effort")
when_thinking_enabled: dict | None = Field(
default_factory=lambda: None,
description="Extra settings to be passed to the model when thinking is enabled",
)
when_thinking_disabled: dict | None = Field(
default_factory=lambda: None,
description="Extra settings to be passed to the model when thinking is disabled",
)
supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
thinking: dict | None = Field(
default_factory=lambda: None,
description=(
"Thinking settings for the model. If provided, these settings will be passed to the model when thinking is enabled. "
"This is a shortcut for `when_thinking_enabled` and will be merged with `when_thinking_enabled` if both are provided."
),
)

View File

@@ -0,0 +1,306 @@
import os
import re
import shutil
from pathlib import Path, PureWindowsPath
# Virtual path prefix seen by agents inside the sandbox
VIRTUAL_PATH_PREFIX = "/mnt/user-data"
_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
def _default_local_base_dir() -> Path:
"""Return the repo-local DeerFlow state directory without relying on cwd."""
backend_dir = Path(__file__).resolve().parents[4]
return backend_dir / ".deer-flow"
def _validate_thread_id(thread_id: str) -> str:
"""Validate a thread ID before using it in filesystem paths."""
if not _SAFE_THREAD_ID_RE.match(thread_id):
raise ValueError(f"Invalid thread_id {thread_id!r}: only alphanumeric characters, hyphens, and underscores are allowed.")
return thread_id
def _join_host_path(base: str, *parts: str) -> str:
"""Join host filesystem path segments while preserving native style.
Docker Desktop on Windows expects bind mount sources to stay in Windows
path form (for example ``C:\\repo\\backend\\.deer-flow``). Using
``Path(base) / ...`` on a POSIX host can accidentally rewrite those paths
with mixed separators, so this helper preserves the original style.
"""
if not parts:
return base
if re.match(r"^[A-Za-z]:[\\/]", base) or base.startswith("\\\\") or "\\" in base:
result = PureWindowsPath(base)
for part in parts:
result /= part
return str(result)
result = Path(base)
for part in parts:
result /= part
return str(result)
def join_host_path(base: str, *parts: str) -> str:
"""Join host filesystem path segments while preserving native style."""
return _join_host_path(base, *parts)
class Paths:
"""
Centralized path configuration for DeerFlow application data.
Directory layout (host side):
{base_dir}/
├── memory.json
├── USER.md <-- global user profile (injected into all agents)
├── agents/
│ └── {agent_name}/
│ ├── config.yaml
│ ├── SOUL.md <-- agent personality/identity (injected alongside lead prompt)
│ └── memory.json
└── threads/
└── {thread_id}/
└── user-data/ <-- mounted as /mnt/user-data/ inside sandbox
├── workspace/ <-- /mnt/user-data/workspace/
├── uploads/ <-- /mnt/user-data/uploads/
└── outputs/ <-- /mnt/user-data/outputs/
BaseDir resolution (in priority order):
1. Constructor argument `base_dir`
2. DEER_FLOW_HOME environment variable
3. Repo-local fallback derived from this module path: `{backend_dir}/.deer-flow`
"""
def __init__(self, base_dir: str | Path | None = None) -> None:
self._base_dir = Path(base_dir).resolve() if base_dir is not None else None
@property
def host_base_dir(self) -> Path:
"""Host-visible base dir for Docker volume mount sources.
When running inside Docker with a mounted Docker socket (DooD), the Docker
daemon runs on the host and resolves mount paths against the host filesystem.
Set DEER_FLOW_HOST_BASE_DIR to the host-side path that corresponds to this
container's base_dir so that sandbox container volume mounts work correctly.
Falls back to base_dir when the env var is not set (native/local execution).
"""
if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
return Path(env)
return self.base_dir
def _host_base_dir_str(self) -> str:
"""Return the host base dir as a raw string for bind mounts."""
if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
return env
return str(self.base_dir)
@property
def base_dir(self) -> Path:
"""Root directory for all application data."""
if self._base_dir is not None:
return self._base_dir
if env_home := os.getenv("DEER_FLOW_HOME"):
return Path(env_home).resolve()
return _default_local_base_dir()
@property
def memory_file(self) -> Path:
"""Path to the persisted memory file: `{base_dir}/memory.json`."""
return self.base_dir / "memory.json"
@property
def user_md_file(self) -> Path:
"""Path to the global user profile file: `{base_dir}/USER.md`."""
return self.base_dir / "USER.md"
@property
def agents_dir(self) -> Path:
"""Root directory for all custom agents: `{base_dir}/agents/`."""
return self.base_dir / "agents"
def agent_dir(self, name: str) -> Path:
"""Directory for a specific agent: `{base_dir}/agents/{name}/`."""
return self.agents_dir / name.lower()
def agent_memory_file(self, name: str) -> Path:
"""Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
return self.agent_dir(name) / "memory.json"
def thread_dir(self, thread_id: str) -> Path:
"""
Host path for a thread's data: `{base_dir}/threads/{thread_id}/`
This directory contains a `user-data/` subdirectory that is mounted
as `/mnt/user-data/` inside the sandbox.
Raises:
ValueError: If `thread_id` contains unsafe characters (path separators
or `..`) that could cause directory traversal.
"""
return self.base_dir / "threads" / _validate_thread_id(thread_id)
def sandbox_work_dir(self, thread_id: str) -> Path:
"""
Host path for the agent's workspace directory.
Host: `{base_dir}/threads/{thread_id}/user-data/workspace/`
Sandbox: `/mnt/user-data/workspace/`
"""
return self.thread_dir(thread_id) / "user-data" / "workspace"
def sandbox_uploads_dir(self, thread_id: str) -> Path:
"""
Host path for user-uploaded files.
Host: `{base_dir}/threads/{thread_id}/user-data/uploads/`
Sandbox: `/mnt/user-data/uploads/`
"""
return self.thread_dir(thread_id) / "user-data" / "uploads"
def sandbox_outputs_dir(self, thread_id: str) -> Path:
"""
Host path for agent-generated artifacts.
Host: `{base_dir}/threads/{thread_id}/user-data/outputs/`
Sandbox: `/mnt/user-data/outputs/`
"""
return self.thread_dir(thread_id) / "user-data" / "outputs"
def acp_workspace_dir(self, thread_id: str) -> Path:
"""
Host path for the ACP workspace of a specific thread.
Host: `{base_dir}/threads/{thread_id}/acp-workspace/`
Sandbox: `/mnt/acp-workspace/`
Each thread gets its own isolated ACP workspace so that concurrent
sessions cannot read each other's ACP agent outputs.
"""
return self.thread_dir(thread_id) / "acp-workspace"
def sandbox_user_data_dir(self, thread_id: str) -> Path:
"""
Host path for the user-data root.
Host: `{base_dir}/threads/{thread_id}/user-data/`
Sandbox: `/mnt/user-data/`
"""
return self.thread_dir(thread_id) / "user-data"
def host_thread_dir(self, thread_id: str) -> str:
"""Host path for a thread directory, preserving Windows path syntax."""
return _join_host_path(self._host_base_dir_str(), "threads", _validate_thread_id(thread_id))
def host_sandbox_user_data_dir(self, thread_id: str) -> str:
"""Host path for a thread's user-data root."""
return _join_host_path(self.host_thread_dir(thread_id), "user-data")
def host_sandbox_work_dir(self, thread_id: str) -> str:
"""Host path for the workspace mount source."""
return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "workspace")
def host_sandbox_uploads_dir(self, thread_id: str) -> str:
"""Host path for the uploads mount source."""
return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "uploads")
def host_sandbox_outputs_dir(self, thread_id: str) -> str:
"""Host path for the outputs mount source."""
return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "outputs")
def host_acp_workspace_dir(self, thread_id: str) -> str:
"""Host path for the ACP workspace mount source."""
return _join_host_path(self.host_thread_dir(thread_id), "acp-workspace")
def ensure_thread_dirs(self, thread_id: str) -> None:
"""Create all standard sandbox directories for a thread.
Directories are created with mode 0o777 so that sandbox containers
(which may run as a different UID than the host backend process) can
write to the volume-mounted paths without "Permission denied" errors.
The explicit chmod() call is necessary because Path.mkdir(mode=...) is
subject to the process umask and may not yield the intended permissions.
Includes the ACP workspace directory so it can be volume-mounted into
the sandbox container at ``/mnt/acp-workspace`` even before the first
ACP agent invocation.
"""
for d in [
self.sandbox_work_dir(thread_id),
self.sandbox_uploads_dir(thread_id),
self.sandbox_outputs_dir(thread_id),
self.acp_workspace_dir(thread_id),
]:
d.mkdir(parents=True, exist_ok=True)
d.chmod(0o777)
def delete_thread_dir(self, thread_id: str) -> None:
"""Delete all persisted data for a thread.
The operation is idempotent: missing thread directories are ignored.
"""
thread_dir = self.thread_dir(thread_id)
if thread_dir.exists():
shutil.rmtree(thread_dir)
def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
"""Resolve a sandbox virtual path to the actual host filesystem path.
Args:
thread_id: The thread ID.
virtual_path: Virtual path as seen inside the sandbox, e.g.
``/mnt/user-data/outputs/report.pdf``.
Leading slashes are stripped before matching.
Returns:
The resolved absolute host filesystem path.
Raises:
ValueError: If the path does not start with the expected virtual
prefix or a path-traversal attempt is detected.
"""
stripped = virtual_path.lstrip("/")
prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
# Require an exact segment-boundary match to avoid prefix confusion
# (e.g. reject paths like "mnt/user-dataX/...").
if stripped != prefix and not stripped.startswith(prefix + "/"):
raise ValueError(f"Path must start with /{prefix}")
relative = stripped[len(prefix) :].lstrip("/")
base = self.sandbox_user_data_dir(thread_id).resolve()
actual = (base / relative).resolve()
try:
actual.relative_to(base)
except ValueError:
raise ValueError("Access denied: path traversal detected")
return actual
# ── Singleton ────────────────────────────────────────────────────────────
_paths: Paths | None = None
def get_paths() -> Paths:
"""Return the global Paths singleton (lazy-initialized)."""
global _paths
if _paths is None:
_paths = Paths()
return _paths
def resolve_path(path: str) -> Path:
"""Resolve *path* to an absolute ``Path``.
Relative paths are resolved relative to the application base directory.
Absolute paths are returned as-is (after normalisation).
"""
p = Path(path)
if not p.is_absolute():
p = get_paths().base_dir / path
return p.resolve()

View File

@@ -0,0 +1,83 @@
from pydantic import BaseModel, ConfigDict, Field
class VolumeMountConfig(BaseModel):
"""Configuration for a volume mount."""
host_path: str = Field(..., description="Path on the host machine")
container_path: str = Field(..., description="Path inside the container")
read_only: bool = Field(default=False, description="Whether the mount is read-only")
class SandboxConfig(BaseModel):
"""Config section for a sandbox.
Common options:
use: Class path of the sandbox provider (required)
allow_host_bash: Enable host-side bash execution for LocalSandboxProvider.
Dangerous and intended only for fully trusted local workflows.
AioSandboxProvider specific options:
image: Docker image to use (default: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest)
port: Base port for sandbox containers (default: 8080)
replicas: Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.
container_prefix: Prefix for container names (default: deer-flow-sandbox)
idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
mounts: List of volume mounts to share directories with the container
environment: Environment variables to inject into the container (values starting with $ are resolved from host env)
"""
use: str = Field(
...,
description="Class path of the sandbox provider (e.g. deerflow.sandbox.local:LocalSandboxProvider)",
)
allow_host_bash: bool = Field(
default=False,
description="Allow the bash tool to execute directly on the host when using LocalSandboxProvider. Dangerous; intended only for fully trusted local environments.",
)
image: str | None = Field(
default=None,
description="Docker image to use for the sandbox container",
)
port: int | None = Field(
default=None,
description="Base port for sandbox containers",
)
replicas: int | None = Field(
default=None,
description="Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.",
)
container_prefix: str | None = Field(
default=None,
description="Prefix for container names",
)
idle_timeout: int | None = Field(
default=None,
description="Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.",
)
mounts: list[VolumeMountConfig] = Field(
default_factory=list,
description="List of volume mounts to share directories between host and container",
)
environment: dict[str, str] = Field(
default_factory=dict,
description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
)
bash_output_max_chars: int = Field(
default=20000,
ge=0,
description="Maximum characters to keep from bash tool output. Output exceeding this limit is middle-truncated (head + tail), preserving the first and last half. Set to 0 to disable truncation.",
)
read_file_output_max_chars: int = Field(
default=50000,
ge=0,
description="Maximum characters to keep from read_file tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
)
ls_output_max_chars: int = Field(
default=20000,
ge=0,
description="Maximum characters to keep from ls tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
)
model_config = ConfigDict(extra="allow")

View File

@@ -0,0 +1,14 @@
from pydantic import BaseModel, Field
class SkillEvolutionConfig(BaseModel):
"""Configuration for agent-managed skill evolution."""
enabled: bool = Field(
default=False,
description="Whether the agent can create and modify skills under skills/custom.",
)
moderation_model_name: str | None = Field(
default=None,
description="Optional model name for skill security moderation. Defaults to the primary chat model.",
)

View File

@@ -0,0 +1,54 @@
from pathlib import Path
from pydantic import BaseModel, Field
def _default_repo_root() -> Path:
"""Resolve the repo root without relying on the current working directory."""
return Path(__file__).resolve().parents[5]
class SkillsConfig(BaseModel):
"""Configuration for skills system"""
path: str | None = Field(
default=None,
description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory",
)
container_path: str = Field(
default="/mnt/skills",
description="Path where skills are mounted in the sandbox container",
)
def get_skills_path(self) -> Path:
"""
Get the resolved skills directory path.
Returns:
Path to the skills directory
"""
if self.path:
# Use configured path (can be absolute or relative)
path = Path(self.path)
if not path.is_absolute():
# If relative, resolve from the repo root for deterministic behavior.
path = _default_repo_root() / path
return path.resolve()
else:
# Default: ../skills relative to backend directory
from deerflow.skills.loader import get_skills_root_path
return get_skills_root_path()
def get_skill_container_path(self, skill_name: str, category: str = "public") -> str:
"""
Get the full container path for a specific skill.
Args:
skill_name: Name of the skill (directory name)
category: Category of the skill (public or custom)
Returns:
Full path to the skill in the container
"""
return f"{self.container_path}/{category}/{skill_name}"

View File

@@ -0,0 +1,46 @@
"""Configuration for stream bridge."""
from typing import Literal
from pydantic import BaseModel, Field
StreamBridgeType = Literal["memory", "redis"]
class StreamBridgeConfig(BaseModel):
"""Configuration for the stream bridge that connects agent workers to SSE endpoints."""
type: StreamBridgeType = Field(
default="memory",
description="Stream bridge backend type. 'memory' uses in-process asyncio.Queue (single-process only). 'redis' uses Redis Streams (planned for Phase 2, not yet implemented).",
)
redis_url: str | None = Field(
default=None,
description="Redis URL for the redis stream bridge type. Example: 'redis://localhost:6379/0'.",
)
queue_maxsize: int = Field(
default=256,
description="Maximum number of events buffered per run in the memory bridge.",
)
# Global configuration instance — None means no stream bridge is configured
# (falls back to memory with defaults).
_stream_bridge_config: StreamBridgeConfig | None = None
def get_stream_bridge_config() -> StreamBridgeConfig | None:
"""Get the current stream bridge configuration, or None if not configured."""
return _stream_bridge_config
def set_stream_bridge_config(config: StreamBridgeConfig | None) -> None:
"""Set the stream bridge configuration."""
global _stream_bridge_config
_stream_bridge_config = config
def load_stream_bridge_config_from_dict(config_dict: dict) -> None:
"""Load stream bridge configuration from a dictionary."""
global _stream_bridge_config
_stream_bridge_config = StreamBridgeConfig(**config_dict)

View File

@@ -0,0 +1,102 @@
"""Configuration for the subagent system loaded from config.yaml."""
import logging
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
class SubagentOverrideConfig(BaseModel):
"""Per-agent configuration overrides."""
timeout_seconds: int | None = Field(
default=None,
ge=1,
description="Timeout in seconds for this subagent (None = use global default)",
)
max_turns: int | None = Field(
default=None,
ge=1,
description="Maximum turns for this subagent (None = use global or builtin default)",
)
class SubagentsAppConfig(BaseModel):
"""Configuration for the subagent system."""
timeout_seconds: int = Field(
default=900,
ge=1,
description="Default timeout in seconds for all subagents (default: 900 = 15 minutes)",
)
max_turns: int | None = Field(
default=None,
ge=1,
description="Optional default max-turn override for all subagents (None = keep builtin defaults)",
)
agents: dict[str, SubagentOverrideConfig] = Field(
default_factory=dict,
description="Per-agent configuration overrides keyed by agent name",
)
def get_timeout_for(self, agent_name: str) -> int:
"""Get the effective timeout for a specific agent.
Args:
agent_name: The name of the subagent.
Returns:
The timeout in seconds, using per-agent override if set, otherwise global default.
"""
override = self.agents.get(agent_name)
if override is not None and override.timeout_seconds is not None:
return override.timeout_seconds
return self.timeout_seconds
def get_max_turns_for(self, agent_name: str, builtin_default: int) -> int:
"""Get the effective max_turns for a specific agent."""
override = self.agents.get(agent_name)
if override is not None and override.max_turns is not None:
return override.max_turns
if self.max_turns is not None:
return self.max_turns
return builtin_default
_subagents_config: SubagentsAppConfig = SubagentsAppConfig()
def get_subagents_app_config() -> SubagentsAppConfig:
"""Get the current subagents configuration."""
return _subagents_config
def load_subagents_config_from_dict(config_dict: dict) -> None:
"""Load subagents configuration from a dictionary."""
global _subagents_config
_subagents_config = SubagentsAppConfig(**config_dict)
overrides_summary = {}
for name, override in _subagents_config.agents.items():
parts = []
if override.timeout_seconds is not None:
parts.append(f"timeout={override.timeout_seconds}s")
if override.max_turns is not None:
parts.append(f"max_turns={override.max_turns}")
if parts:
overrides_summary[name] = ", ".join(parts)
if overrides_summary:
logger.info(
"Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s",
_subagents_config.timeout_seconds,
_subagents_config.max_turns,
overrides_summary,
)
else:
logger.info(
"Subagents config loaded: default timeout=%ss, default max_turns=%s, no per-agent overrides",
_subagents_config.timeout_seconds,
_subagents_config.max_turns,
)

View File

@@ -0,0 +1,74 @@
"""Configuration for conversation summarization."""
from typing import Literal
from pydantic import BaseModel, Field
ContextSizeType = Literal["fraction", "tokens", "messages"]
class ContextSize(BaseModel):
"""Context size specification for trigger or keep parameters."""
type: ContextSizeType = Field(description="Type of context size specification")
value: int | float = Field(description="Value for the context size specification")
def to_tuple(self) -> tuple[ContextSizeType, int | float]:
"""Convert to tuple format expected by SummarizationMiddleware."""
return (self.type, self.value)
class SummarizationConfig(BaseModel):
"""Configuration for automatic conversation summarization."""
enabled: bool = Field(
default=False,
description="Whether to enable automatic conversation summarization",
)
model_name: str | None = Field(
default=None,
description="Model name to use for summarization (None = use a lightweight model)",
)
trigger: ContextSize | list[ContextSize] | None = Field(
default=None,
description="One or more thresholds that trigger summarization. When any threshold is met, summarization runs. "
"Examples: {'type': 'messages', 'value': 50} triggers at 50 messages, "
"{'type': 'tokens', 'value': 4000} triggers at 4000 tokens, "
"{'type': 'fraction', 'value': 0.8} triggers at 80% of model's max input tokens",
)
keep: ContextSize = Field(
default_factory=lambda: ContextSize(type="messages", value=20),
description="Context retention policy after summarization. Specifies how much history to preserve. "
"Examples: {'type': 'messages', 'value': 20} keeps 20 messages, "
"{'type': 'tokens', 'value': 3000} keeps 3000 tokens, "
"{'type': 'fraction', 'value': 0.3} keeps 30% of model's max input tokens",
)
trim_tokens_to_summarize: int | None = Field(
default=4000,
description="Maximum tokens to keep when preparing messages for summarization. Pass null to skip trimming.",
)
summary_prompt: str | None = Field(
default=None,
description="Custom prompt template for generating summaries. If not provided, uses the default LangChain prompt.",
)
# Global configuration instance
_summarization_config: SummarizationConfig = SummarizationConfig()
def get_summarization_config() -> SummarizationConfig:
"""Get the current summarization configuration."""
return _summarization_config
def set_summarization_config(config: SummarizationConfig) -> None:
"""Set the summarization configuration."""
global _summarization_config
_summarization_config = config
def load_summarization_config_from_dict(config_dict: dict) -> None:
"""Load summarization configuration from a dictionary."""
global _summarization_config
_summarization_config = SummarizationConfig(**config_dict)

View File

@@ -0,0 +1,53 @@
"""Configuration for automatic thread title generation."""
from pydantic import BaseModel, Field
class TitleConfig(BaseModel):
"""Configuration for automatic thread title generation."""
enabled: bool = Field(
default=True,
description="Whether to enable automatic title generation",
)
max_words: int = Field(
default=6,
ge=1,
le=20,
description="Maximum number of words in the generated title",
)
max_chars: int = Field(
default=60,
ge=10,
le=200,
description="Maximum number of characters in the generated title",
)
model_name: str | None = Field(
default=None,
description="Model name to use for title generation (None = use default model)",
)
prompt_template: str = Field(
default=("Generate a concise title (max {max_words} words) for this conversation.\nUser: {user_msg}\nAssistant: {assistant_msg}\n\nReturn ONLY the title, no quotes, no explanation."),
description="Prompt template for title generation",
)
# Global configuration instance
_title_config: TitleConfig = TitleConfig()
def get_title_config() -> TitleConfig:
"""Get the current title configuration."""
return _title_config
def set_title_config(config: TitleConfig) -> None:
"""Set the title configuration."""
global _title_config
_title_config = config
def load_title_config_from_dict(config_dict: dict) -> None:
"""Load title configuration from a dictionary."""
global _title_config
_title_config = TitleConfig(**config_dict)

View File

@@ -0,0 +1,7 @@
from pydantic import BaseModel, Field
class TokenUsageConfig(BaseModel):
"""Configuration for token usage tracking."""
enabled: bool = Field(default=False, description="Enable token usage tracking middleware")

View File

@@ -0,0 +1,20 @@
from pydantic import BaseModel, ConfigDict, Field
class ToolGroupConfig(BaseModel):
"""Config section for a tool group"""
name: str = Field(..., description="Unique name for the tool group")
model_config = ConfigDict(extra="allow")
class ToolConfig(BaseModel):
"""Config section for a tool"""
name: str = Field(..., description="Unique name for the tool")
group: str = Field(..., description="Group name for the tool")
use: str = Field(
...,
description="Variable name of the tool provider(e.g. deerflow.sandbox.tools:bash_tool)",
)
model_config = ConfigDict(extra="allow")

View File

@@ -0,0 +1,35 @@
"""Configuration for deferred tool loading via tool_search."""
from pydantic import BaseModel, Field
class ToolSearchConfig(BaseModel):
"""Configuration for deferred tool loading via tool_search.
When enabled, MCP tools are not loaded into the agent's context directly.
Instead, they are listed by name in the system prompt and discoverable
via the tool_search tool at runtime.
"""
enabled: bool = Field(
default=False,
description="Defer tools and enable tool_search",
)
_tool_search_config: ToolSearchConfig | None = None
def get_tool_search_config() -> ToolSearchConfig:
"""Get the tool search config, loading from AppConfig if needed."""
global _tool_search_config
if _tool_search_config is None:
_tool_search_config = ToolSearchConfig()
return _tool_search_config
def load_tool_search_config_from_dict(data: dict) -> ToolSearchConfig:
"""Load tool search config from a dict (called during AppConfig loading)."""
global _tool_search_config
_tool_search_config = ToolSearchConfig.model_validate(data)
return _tool_search_config

View File

@@ -0,0 +1,149 @@
import os
import threading
from pydantic import BaseModel, Field
_config_lock = threading.Lock()
class LangSmithTracingConfig(BaseModel):
"""Configuration for LangSmith tracing."""
enabled: bool = Field(...)
api_key: str | None = Field(...)
project: str = Field(...)
endpoint: str = Field(...)
@property
def is_configured(self) -> bool:
return self.enabled and bool(self.api_key)
def validate(self) -> None:
if self.enabled and not self.api_key:
raise ValueError("LangSmith tracing is enabled but LANGSMITH_API_KEY (or LANGCHAIN_API_KEY) is not set.")
class LangfuseTracingConfig(BaseModel):
"""Configuration for Langfuse tracing."""
enabled: bool = Field(...)
public_key: str | None = Field(...)
secret_key: str | None = Field(...)
host: str = Field(...)
@property
def is_configured(self) -> bool:
return self.enabled and bool(self.public_key) and bool(self.secret_key)
def validate(self) -> None:
if not self.enabled:
return
missing: list[str] = []
if not self.public_key:
missing.append("LANGFUSE_PUBLIC_KEY")
if not self.secret_key:
missing.append("LANGFUSE_SECRET_KEY")
if missing:
raise ValueError(f"Langfuse tracing is enabled but required settings are missing: {', '.join(missing)}")
class TracingConfig(BaseModel):
"""Tracing configuration for supported providers."""
langsmith: LangSmithTracingConfig = Field(...)
langfuse: LangfuseTracingConfig = Field(...)
@property
def is_configured(self) -> bool:
return bool(self.enabled_providers)
@property
def explicitly_enabled_providers(self) -> list[str]:
enabled: list[str] = []
if self.langsmith.enabled:
enabled.append("langsmith")
if self.langfuse.enabled:
enabled.append("langfuse")
return enabled
@property
def enabled_providers(self) -> list[str]:
enabled: list[str] = []
if self.langsmith.is_configured:
enabled.append("langsmith")
if self.langfuse.is_configured:
enabled.append("langfuse")
return enabled
def validate_enabled(self) -> None:
self.langsmith.validate()
self.langfuse.validate()
_tracing_config: TracingConfig | None = None
_TRUTHY_VALUES = {"1", "true", "yes", "on"}
def _env_flag_preferred(*names: str) -> bool:
"""Return the boolean value of the first env var that is present and non-empty."""
for name in names:
value = os.environ.get(name)
if value is not None and value.strip():
return value.strip().lower() in _TRUTHY_VALUES
return False
def _first_env_value(*names: str) -> str | None:
"""Return the first non-empty environment value from candidate names."""
for name in names:
value = os.environ.get(name)
if value and value.strip():
return value.strip()
return None
def get_tracing_config() -> TracingConfig:
"""Get the current tracing configuration from environment variables."""
global _tracing_config
if _tracing_config is not None:
return _tracing_config
with _config_lock:
if _tracing_config is not None:
return _tracing_config
_tracing_config = TracingConfig(
langsmith=LangSmithTracingConfig(
enabled=_env_flag_preferred("LANGSMITH_TRACING", "LANGCHAIN_TRACING_V2", "LANGCHAIN_TRACING"),
api_key=_first_env_value("LANGSMITH_API_KEY", "LANGCHAIN_API_KEY"),
project=_first_env_value("LANGSMITH_PROJECT", "LANGCHAIN_PROJECT") or "deer-flow",
endpoint=_first_env_value("LANGSMITH_ENDPOINT", "LANGCHAIN_ENDPOINT") or "https://api.smith.langchain.com",
),
langfuse=LangfuseTracingConfig(
enabled=_env_flag_preferred("LANGFUSE_TRACING"),
public_key=_first_env_value("LANGFUSE_PUBLIC_KEY"),
secret_key=_first_env_value("LANGFUSE_SECRET_KEY"),
host=_first_env_value("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com",
),
)
return _tracing_config
def get_enabled_tracing_providers() -> list[str]:
"""Return the configured tracing providers that are enabled and complete."""
return get_tracing_config().enabled_providers
def get_explicitly_enabled_tracing_providers() -> list[str]:
"""Return tracing providers explicitly enabled by config, even if incomplete."""
return get_tracing_config().explicitly_enabled_providers
def validate_enabled_tracing_providers() -> None:
"""Validate that any explicitly enabled providers are fully configured."""
get_tracing_config().validate_enabled()
def is_tracing_enabled() -> bool:
"""Check if any tracing provider is enabled and fully configured."""
return get_tracing_config().is_configured