Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
from .app_config import get_app_config
|
||||
from .extensions_config import ExtensionsConfig, get_extensions_config
|
||||
from .memory_config import MemoryConfig, get_memory_config
|
||||
from .paths import Paths, get_paths
|
||||
from .skill_evolution_config import SkillEvolutionConfig
|
||||
from .skills_config import SkillsConfig
|
||||
from .tracing_config import (
|
||||
get_enabled_tracing_providers,
|
||||
get_explicitly_enabled_tracing_providers,
|
||||
get_tracing_config,
|
||||
is_tracing_enabled,
|
||||
validate_enabled_tracing_providers,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"get_app_config",
|
||||
"SkillEvolutionConfig",
|
||||
"Paths",
|
||||
"get_paths",
|
||||
"SkillsConfig",
|
||||
"ExtensionsConfig",
|
||||
"get_extensions_config",
|
||||
"MemoryConfig",
|
||||
"get_memory_config",
|
||||
"get_tracing_config",
|
||||
"get_explicitly_enabled_tracing_providers",
|
||||
"get_enabled_tracing_providers",
|
||||
"is_tracing_enabled",
|
||||
"validate_enabled_tracing_providers",
|
||||
]
|
||||
@@ -0,0 +1,51 @@
|
||||
"""ACP (Agent Client Protocol) agent configuration loaded from config.yaml."""
|
||||
|
||||
import logging
|
||||
from collections.abc import Mapping
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ACPAgentConfig(BaseModel):
|
||||
"""Configuration for a single ACP-compatible agent."""
|
||||
|
||||
command: str = Field(description="Command to launch the ACP agent subprocess")
|
||||
args: list[str] = Field(default_factory=list, description="Additional command arguments")
|
||||
env: dict[str, str] = Field(default_factory=dict, description="Environment variables to inject into the agent subprocess. Values starting with $ are resolved from host environment variables.")
|
||||
description: str = Field(description="Description of the agent's capabilities (shown in tool description)")
|
||||
model: str | None = Field(default=None, description="Model hint passed to the agent (optional)")
|
||||
auto_approve_permissions: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"When True, DeerFlow automatically approves all ACP permission requests from this agent "
|
||||
"(allow_once preferred over allow_always). When False (default), all permission requests "
|
||||
"are denied — the agent must be configured to operate without requesting permissions."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
_acp_agents: dict[str, ACPAgentConfig] = {}
|
||||
|
||||
|
||||
def get_acp_agents() -> dict[str, ACPAgentConfig]:
|
||||
"""Get the currently configured ACP agents.
|
||||
|
||||
Returns:
|
||||
Mapping of agent name -> ACPAgentConfig. Empty dict if no ACP agents are configured.
|
||||
"""
|
||||
return _acp_agents
|
||||
|
||||
|
||||
def load_acp_config_from_dict(config_dict: Mapping[str, Mapping[str, object]] | None) -> None:
|
||||
"""Load ACP agent configuration from a dictionary (typically from config.yaml).
|
||||
|
||||
Args:
|
||||
config_dict: Mapping of agent name -> config fields.
|
||||
"""
|
||||
global _acp_agents
|
||||
if config_dict is None:
|
||||
config_dict = {}
|
||||
_acp_agents = {name: ACPAgentConfig(**cfg) for name, cfg in config_dict.items()}
|
||||
logger.info("ACP config loaded: %d agent(s): %s", len(_acp_agents), list(_acp_agents.keys()))
|
||||
@@ -0,0 +1,125 @@
|
||||
"""Configuration and loaders for custom agents."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
from deerflow.config.paths import get_paths
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SOUL_FILENAME = "SOUL.md"
|
||||
AGENT_NAME_PATTERN = re.compile(r"^[A-Za-z0-9-]+$")
|
||||
|
||||
|
||||
class AgentConfig(BaseModel):
|
||||
"""Configuration for a custom agent."""
|
||||
|
||||
name: str
|
||||
description: str = ""
|
||||
model: str | None = None
|
||||
tool_groups: list[str] | None = None
|
||||
# skills controls which skills are loaded into the agent's prompt:
|
||||
# - None (or omitted): load all enabled skills (default fallback behavior)
|
||||
# - [] (explicit empty list): disable all skills
|
||||
# - ["skill1", "skill2"]: load only the specified skills
|
||||
skills: list[str] | None = None
|
||||
|
||||
|
||||
def load_agent_config(name: str | None) -> AgentConfig | None:
|
||||
"""Load the custom or default agent's config from its directory.
|
||||
|
||||
Args:
|
||||
name: The agent name.
|
||||
|
||||
Returns:
|
||||
AgentConfig instance.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the agent directory or config.yaml does not exist.
|
||||
ValueError: If config.yaml cannot be parsed.
|
||||
"""
|
||||
|
||||
if name is None:
|
||||
return None
|
||||
|
||||
if not AGENT_NAME_PATTERN.match(name):
|
||||
raise ValueError(f"Invalid agent name '{name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}")
|
||||
agent_dir = get_paths().agent_dir(name)
|
||||
config_file = agent_dir / "config.yaml"
|
||||
|
||||
if not agent_dir.exists():
|
||||
raise FileNotFoundError(f"Agent directory not found: {agent_dir}")
|
||||
|
||||
if not config_file.exists():
|
||||
raise FileNotFoundError(f"Agent config not found: {config_file}")
|
||||
|
||||
try:
|
||||
with open(config_file, encoding="utf-8") as f:
|
||||
data: dict[str, Any] = yaml.safe_load(f) or {}
|
||||
except yaml.YAMLError as e:
|
||||
raise ValueError(f"Failed to parse agent config {config_file}: {e}") from e
|
||||
|
||||
# Ensure name is set from directory name if not in file
|
||||
if "name" not in data:
|
||||
data["name"] = name
|
||||
|
||||
# Strip unknown fields before passing to Pydantic (e.g. legacy prompt_file)
|
||||
known_fields = set(AgentConfig.model_fields.keys())
|
||||
data = {k: v for k, v in data.items() if k in known_fields}
|
||||
|
||||
return AgentConfig(**data)
|
||||
|
||||
|
||||
def load_agent_soul(agent_name: str | None) -> str | None:
|
||||
"""Read the SOUL.md file for a custom agent, if it exists.
|
||||
|
||||
SOUL.md defines the agent's personality, values, and behavioral guardrails.
|
||||
It is injected into the lead agent's system prompt as additional context.
|
||||
|
||||
Args:
|
||||
agent_name: The name of the agent or None for the default agent.
|
||||
|
||||
Returns:
|
||||
The SOUL.md content as a string, or None if the file does not exist.
|
||||
"""
|
||||
agent_dir = get_paths().agent_dir(agent_name) if agent_name else get_paths().base_dir
|
||||
soul_path = agent_dir / SOUL_FILENAME
|
||||
if not soul_path.exists():
|
||||
return None
|
||||
content = soul_path.read_text(encoding="utf-8").strip()
|
||||
return content or None
|
||||
|
||||
|
||||
def list_custom_agents() -> list[AgentConfig]:
|
||||
"""Scan the agents directory and return all valid custom agents.
|
||||
|
||||
Returns:
|
||||
List of AgentConfig for each valid agent directory found.
|
||||
"""
|
||||
agents_dir = get_paths().agents_dir
|
||||
|
||||
if not agents_dir.exists():
|
||||
return []
|
||||
|
||||
agents: list[AgentConfig] = []
|
||||
|
||||
for entry in sorted(agents_dir.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
|
||||
config_file = entry / "config.yaml"
|
||||
if not config_file.exists():
|
||||
logger.debug(f"Skipping {entry.name}: no config.yaml")
|
||||
continue
|
||||
|
||||
try:
|
||||
agent_cfg = load_agent_config(entry.name)
|
||||
agents.append(agent_cfg)
|
||||
except Exception as e:
|
||||
logger.warning(f"Skipping agent '{entry.name}': {e}")
|
||||
|
||||
return agents
|
||||
379
deer-flow/backend/packages/harness/deerflow/config/app_config.py
Normal file
379
deer-flow/backend/packages/harness/deerflow/config/app_config.py
Normal file
@@ -0,0 +1,379 @@
|
||||
import logging
|
||||
import os
|
||||
from contextvars import ContextVar
|
||||
from pathlib import Path
|
||||
from typing import Any, Self
|
||||
|
||||
import yaml
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from deerflow.config.acp_config import load_acp_config_from_dict
|
||||
from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
|
||||
from deerflow.config.extensions_config import ExtensionsConfig
|
||||
from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
|
||||
from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
|
||||
from deerflow.config.model_config import ModelConfig
|
||||
from deerflow.config.sandbox_config import SandboxConfig
|
||||
from deerflow.config.skill_evolution_config import SkillEvolutionConfig
|
||||
from deerflow.config.skills_config import SkillsConfig
|
||||
from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
|
||||
from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
|
||||
from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
|
||||
from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
|
||||
from deerflow.config.token_usage_config import TokenUsageConfig
|
||||
from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
|
||||
from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
|
||||
|
||||
load_dotenv()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _default_config_candidates() -> tuple[Path, ...]:
|
||||
"""Return deterministic config.yaml locations without relying on cwd."""
|
||||
backend_dir = Path(__file__).resolve().parents[4]
|
||||
repo_root = backend_dir.parent
|
||||
return (backend_dir / "config.yaml", repo_root / "config.yaml")
|
||||
|
||||
|
||||
class AppConfig(BaseModel):
|
||||
"""Config for the DeerFlow application"""
|
||||
|
||||
log_level: str = Field(default="info", description="Logging level for deerflow modules (debug/info/warning/error)")
|
||||
token_usage: TokenUsageConfig = Field(default_factory=TokenUsageConfig, description="Token usage tracking configuration")
|
||||
models: list[ModelConfig] = Field(default_factory=list, description="Available models")
|
||||
sandbox: SandboxConfig = Field(description="Sandbox configuration")
|
||||
tools: list[ToolConfig] = Field(default_factory=list, description="Available tools")
|
||||
tool_groups: list[ToolGroupConfig] = Field(default_factory=list, description="Available tool groups")
|
||||
skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
|
||||
skill_evolution: SkillEvolutionConfig = Field(default_factory=SkillEvolutionConfig, description="Agent-managed skill evolution configuration")
|
||||
extensions: ExtensionsConfig = Field(default_factory=ExtensionsConfig, description="Extensions configuration (MCP servers and skills state)")
|
||||
tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig, description="Tool search / deferred loading configuration")
|
||||
title: TitleConfig = Field(default_factory=TitleConfig, description="Automatic title generation configuration")
|
||||
summarization: SummarizationConfig = Field(default_factory=SummarizationConfig, description="Conversation summarization configuration")
|
||||
memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory subsystem configuration")
|
||||
subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
|
||||
guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
|
||||
model_config = ConfigDict(extra="allow", frozen=False)
|
||||
checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration")
|
||||
stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration")
|
||||
|
||||
@classmethod
|
||||
def resolve_config_path(cls, config_path: str | None = None) -> Path:
|
||||
"""Resolve the config file path.
|
||||
|
||||
Priority:
|
||||
1. If provided `config_path` argument, use it.
|
||||
2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
|
||||
3. Otherwise, search deterministic backend/repository-root defaults from `_default_config_candidates()`.
|
||||
"""
|
||||
if config_path:
|
||||
path = Path(config_path)
|
||||
if not Path.exists(path):
|
||||
raise FileNotFoundError(f"Config file specified by param `config_path` not found at {path}")
|
||||
return path
|
||||
elif os.getenv("DEER_FLOW_CONFIG_PATH"):
|
||||
path = Path(os.getenv("DEER_FLOW_CONFIG_PATH"))
|
||||
if not Path.exists(path):
|
||||
raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
|
||||
return path
|
||||
else:
|
||||
for path in _default_config_candidates():
|
||||
if path.exists():
|
||||
return path
|
||||
raise FileNotFoundError("`config.yaml` file not found at the default backend or repository root locations")
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, config_path: str | None = None) -> Self:
|
||||
"""Load config from YAML file.
|
||||
|
||||
See `resolve_config_path` for more details.
|
||||
|
||||
Args:
|
||||
config_path: Path to the config file.
|
||||
|
||||
Returns:
|
||||
AppConfig: The loaded config.
|
||||
"""
|
||||
resolved_path = cls.resolve_config_path(config_path)
|
||||
with open(resolved_path, encoding="utf-8") as f:
|
||||
config_data = yaml.safe_load(f) or {}
|
||||
|
||||
# Check config version before processing
|
||||
cls._check_config_version(config_data, resolved_path)
|
||||
|
||||
config_data = cls.resolve_env_variables(config_data)
|
||||
|
||||
# Load title config if present
|
||||
if "title" in config_data:
|
||||
load_title_config_from_dict(config_data["title"])
|
||||
|
||||
# Load summarization config if present
|
||||
if "summarization" in config_data:
|
||||
load_summarization_config_from_dict(config_data["summarization"])
|
||||
|
||||
# Load memory config if present
|
||||
if "memory" in config_data:
|
||||
load_memory_config_from_dict(config_data["memory"])
|
||||
|
||||
# Load subagents config if present
|
||||
if "subagents" in config_data:
|
||||
load_subagents_config_from_dict(config_data["subagents"])
|
||||
|
||||
# Load tool_search config if present
|
||||
if "tool_search" in config_data:
|
||||
load_tool_search_config_from_dict(config_data["tool_search"])
|
||||
|
||||
# Load guardrails config if present
|
||||
if "guardrails" in config_data:
|
||||
load_guardrails_config_from_dict(config_data["guardrails"])
|
||||
|
||||
# Load checkpointer config if present
|
||||
if "checkpointer" in config_data:
|
||||
load_checkpointer_config_from_dict(config_data["checkpointer"])
|
||||
|
||||
# Load stream bridge config if present
|
||||
if "stream_bridge" in config_data:
|
||||
load_stream_bridge_config_from_dict(config_data["stream_bridge"])
|
||||
|
||||
# Always refresh ACP agent config so removed entries do not linger across reloads.
|
||||
load_acp_config_from_dict(config_data.get("acp_agents", {}))
|
||||
|
||||
# Load extensions config separately (it's in a different file)
|
||||
extensions_config = ExtensionsConfig.from_file()
|
||||
config_data["extensions"] = extensions_config.model_dump()
|
||||
|
||||
result = cls.model_validate(config_data)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def _check_config_version(cls, config_data: dict, config_path: Path) -> None:
|
||||
"""Check if the user's config.yaml is outdated compared to config.example.yaml.
|
||||
|
||||
Emits a warning if the user's config_version is lower than the example's.
|
||||
Missing config_version is treated as version 0 (pre-versioning).
|
||||
"""
|
||||
try:
|
||||
user_version = int(config_data.get("config_version", 0))
|
||||
except (TypeError, ValueError):
|
||||
user_version = 0
|
||||
|
||||
# Find config.example.yaml by searching config.yaml's directory and its parents
|
||||
example_path = None
|
||||
search_dir = config_path.parent
|
||||
for _ in range(5): # search up to 5 levels
|
||||
candidate = search_dir / "config.example.yaml"
|
||||
if candidate.exists():
|
||||
example_path = candidate
|
||||
break
|
||||
parent = search_dir.parent
|
||||
if parent == search_dir:
|
||||
break
|
||||
search_dir = parent
|
||||
if example_path is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with open(example_path, encoding="utf-8") as f:
|
||||
example_data = yaml.safe_load(f)
|
||||
raw = example_data.get("config_version", 0) if example_data else 0
|
||||
try:
|
||||
example_version = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
example_version = 0
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if user_version < example_version:
|
||||
logger.warning(
|
||||
"Your config.yaml (version %d) is outdated — the latest version is %d. Run `make config-upgrade` to merge new fields into your config.",
|
||||
user_version,
|
||||
example_version,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def resolve_env_variables(cls, config: Any) -> Any:
|
||||
"""Recursively resolve environment variables in the config.
|
||||
|
||||
Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
|
||||
|
||||
Args:
|
||||
config: The config to resolve environment variables in.
|
||||
|
||||
Returns:
|
||||
The config with environment variables resolved.
|
||||
"""
|
||||
if isinstance(config, str):
|
||||
if config.startswith("$"):
|
||||
env_value = os.getenv(config[1:])
|
||||
if env_value is None:
|
||||
raise ValueError(f"Environment variable {config[1:]} not found for config value {config}")
|
||||
return env_value
|
||||
return config
|
||||
elif isinstance(config, dict):
|
||||
return {k: cls.resolve_env_variables(v) for k, v in config.items()}
|
||||
elif isinstance(config, list):
|
||||
return [cls.resolve_env_variables(item) for item in config]
|
||||
return config
|
||||
|
||||
def get_model_config(self, name: str) -> ModelConfig | None:
|
||||
"""Get the model config by name.
|
||||
|
||||
Args:
|
||||
name: The name of the model to get the config for.
|
||||
|
||||
Returns:
|
||||
The model config if found, otherwise None.
|
||||
"""
|
||||
return next((model for model in self.models if model.name == name), None)
|
||||
|
||||
def get_tool_config(self, name: str) -> ToolConfig | None:
|
||||
"""Get the tool config by name.
|
||||
|
||||
Args:
|
||||
name: The name of the tool to get the config for.
|
||||
|
||||
Returns:
|
||||
The tool config if found, otherwise None.
|
||||
"""
|
||||
return next((tool for tool in self.tools if tool.name == name), None)
|
||||
|
||||
def get_tool_group_config(self, name: str) -> ToolGroupConfig | None:
|
||||
"""Get the tool group config by name.
|
||||
|
||||
Args:
|
||||
name: The name of the tool group to get the config for.
|
||||
|
||||
Returns:
|
||||
The tool group config if found, otherwise None.
|
||||
"""
|
||||
return next((group for group in self.tool_groups if group.name == name), None)
|
||||
|
||||
|
||||
_app_config: AppConfig | None = None
|
||||
_app_config_path: Path | None = None
|
||||
_app_config_mtime: float | None = None
|
||||
_app_config_is_custom = False
|
||||
_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
|
||||
_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())
|
||||
|
||||
|
||||
def _get_config_mtime(config_path: Path) -> float | None:
|
||||
"""Get the modification time of a config file if it exists."""
|
||||
try:
|
||||
return config_path.stat().st_mtime
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _load_and_cache_app_config(config_path: str | None = None) -> AppConfig:
|
||||
"""Load config from disk and refresh cache metadata."""
|
||||
global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
|
||||
|
||||
resolved_path = AppConfig.resolve_config_path(config_path)
|
||||
_app_config = AppConfig.from_file(str(resolved_path))
|
||||
_app_config_path = resolved_path
|
||||
_app_config_mtime = _get_config_mtime(resolved_path)
|
||||
_app_config_is_custom = False
|
||||
return _app_config
|
||||
|
||||
|
||||
def get_app_config() -> AppConfig:
|
||||
"""Get the DeerFlow config instance.
|
||||
|
||||
Returns a cached singleton instance and automatically reloads it when the
|
||||
underlying config file path or modification time changes. Use
|
||||
`reload_app_config()` to force a reload, or `reset_app_config()` to clear
|
||||
the cache.
|
||||
"""
|
||||
global _app_config, _app_config_path, _app_config_mtime
|
||||
|
||||
runtime_override = _current_app_config.get()
|
||||
if runtime_override is not None:
|
||||
return runtime_override
|
||||
|
||||
if _app_config is not None and _app_config_is_custom:
|
||||
return _app_config
|
||||
|
||||
resolved_path = AppConfig.resolve_config_path()
|
||||
current_mtime = _get_config_mtime(resolved_path)
|
||||
|
||||
should_reload = _app_config is None or _app_config_path != resolved_path or _app_config_mtime != current_mtime
|
||||
if should_reload:
|
||||
if _app_config_path == resolved_path and _app_config_mtime is not None and current_mtime is not None and _app_config_mtime != current_mtime:
|
||||
logger.info(
|
||||
"Config file has been modified (mtime: %s -> %s), reloading AppConfig",
|
||||
_app_config_mtime,
|
||||
current_mtime,
|
||||
)
|
||||
_load_and_cache_app_config(str(resolved_path))
|
||||
return _app_config
|
||||
|
||||
|
||||
def reload_app_config(config_path: str | None = None) -> AppConfig:
|
||||
"""Reload the config from file and update the cached instance.
|
||||
|
||||
This is useful when the config file has been modified and you want
|
||||
to pick up the changes without restarting the application.
|
||||
|
||||
Args:
|
||||
config_path: Optional path to config file. If not provided,
|
||||
uses the default resolution strategy.
|
||||
|
||||
Returns:
|
||||
The newly loaded AppConfig instance.
|
||||
"""
|
||||
return _load_and_cache_app_config(config_path)
|
||||
|
||||
|
||||
def reset_app_config() -> None:
|
||||
"""Reset the cached config instance.
|
||||
|
||||
This clears the singleton cache, causing the next call to
|
||||
`get_app_config()` to reload from file. Useful for testing
|
||||
or when switching between different configurations.
|
||||
"""
|
||||
global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
|
||||
_app_config = None
|
||||
_app_config_path = None
|
||||
_app_config_mtime = None
|
||||
_app_config_is_custom = False
|
||||
|
||||
|
||||
def set_app_config(config: AppConfig) -> None:
|
||||
"""Set a custom config instance.
|
||||
|
||||
This allows injecting a custom or mock config for testing purposes.
|
||||
|
||||
Args:
|
||||
config: The AppConfig instance to use.
|
||||
"""
|
||||
global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
|
||||
_app_config = config
|
||||
_app_config_path = None
|
||||
_app_config_mtime = None
|
||||
_app_config_is_custom = True
|
||||
|
||||
|
||||
def peek_current_app_config() -> AppConfig | None:
|
||||
"""Return the runtime-scoped AppConfig override, if one is active."""
|
||||
return _current_app_config.get()
|
||||
|
||||
|
||||
def push_current_app_config(config: AppConfig) -> None:
|
||||
"""Push a runtime-scoped AppConfig override for the current execution context."""
|
||||
stack = _current_app_config_stack.get()
|
||||
_current_app_config_stack.set(stack + (_current_app_config.get(),))
|
||||
_current_app_config.set(config)
|
||||
|
||||
|
||||
def pop_current_app_config() -> None:
|
||||
"""Pop the latest runtime-scoped AppConfig override for the current execution context."""
|
||||
stack = _current_app_config_stack.get()
|
||||
if not stack:
|
||||
_current_app_config.set(None)
|
||||
return
|
||||
previous = stack[-1]
|
||||
_current_app_config_stack.set(stack[:-1])
|
||||
_current_app_config.set(previous)
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Configuration for LangGraph checkpointer."""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
CheckpointerType = Literal["memory", "sqlite", "postgres"]
|
||||
|
||||
|
||||
class CheckpointerConfig(BaseModel):
|
||||
"""Configuration for LangGraph state persistence checkpointer."""
|
||||
|
||||
type: CheckpointerType = Field(
|
||||
description="Checkpointer backend type. "
|
||||
"'memory' is in-process only (lost on restart). "
|
||||
"'sqlite' persists to a local file (requires langgraph-checkpoint-sqlite). "
|
||||
"'postgres' persists to PostgreSQL (requires langgraph-checkpoint-postgres)."
|
||||
)
|
||||
connection_string: str | None = Field(
|
||||
default=None,
|
||||
description="Connection string for sqlite (file path) or postgres (DSN). "
|
||||
"Required for sqlite and postgres types. "
|
||||
"For sqlite, use a file path like '.deer-flow/checkpoints.db' or ':memory:' for in-memory. "
|
||||
"For postgres, use a DSN like 'postgresql://user:pass@localhost:5432/db'.",
|
||||
)
|
||||
|
||||
|
||||
# Global configuration instance — None means no checkpointer is configured.
|
||||
_checkpointer_config: CheckpointerConfig | None = None
|
||||
|
||||
|
||||
def get_checkpointer_config() -> CheckpointerConfig | None:
|
||||
"""Get the current checkpointer configuration, or None if not configured."""
|
||||
return _checkpointer_config
|
||||
|
||||
|
||||
def set_checkpointer_config(config: CheckpointerConfig | None) -> None:
|
||||
"""Set the checkpointer configuration."""
|
||||
global _checkpointer_config
|
||||
_checkpointer_config = config
|
||||
|
||||
|
||||
def load_checkpointer_config_from_dict(config_dict: dict) -> None:
|
||||
"""Load checkpointer configuration from a dictionary."""
|
||||
global _checkpointer_config
|
||||
_checkpointer_config = CheckpointerConfig(**config_dict)
|
||||
@@ -0,0 +1,256 @@
|
||||
"""Unified extensions configuration for MCP servers and skills."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class McpOAuthConfig(BaseModel):
|
||||
"""OAuth configuration for an MCP server (HTTP/SSE transports)."""
|
||||
|
||||
enabled: bool = Field(default=True, description="Whether OAuth token injection is enabled")
|
||||
token_url: str = Field(description="OAuth token endpoint URL")
|
||||
grant_type: Literal["client_credentials", "refresh_token"] = Field(
|
||||
default="client_credentials",
|
||||
description="OAuth grant type",
|
||||
)
|
||||
client_id: str | None = Field(default=None, description="OAuth client ID")
|
||||
client_secret: str | None = Field(default=None, description="OAuth client secret")
|
||||
refresh_token: str | None = Field(default=None, description="OAuth refresh token (for refresh_token grant)")
|
||||
scope: str | None = Field(default=None, description="OAuth scope")
|
||||
audience: str | None = Field(default=None, description="OAuth audience (provider-specific)")
|
||||
token_field: str = Field(default="access_token", description="Field name containing access token in token response")
|
||||
token_type_field: str = Field(default="token_type", description="Field name containing token type in token response")
|
||||
expires_in_field: str = Field(default="expires_in", description="Field name containing expiry (seconds) in token response")
|
||||
default_token_type: str = Field(default="Bearer", description="Default token type when missing in token response")
|
||||
refresh_skew_seconds: int = Field(default=60, description="Refresh token this many seconds before expiry")
|
||||
extra_token_params: dict[str, str] = Field(default_factory=dict, description="Additional form params sent to token endpoint")
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
|
||||
class McpServerConfig(BaseModel):
|
||||
"""Configuration for a single MCP server."""
|
||||
|
||||
enabled: bool = Field(default=True, description="Whether this MCP server is enabled")
|
||||
type: str = Field(default="stdio", description="Transport type: 'stdio', 'sse', or 'http'")
|
||||
command: str | None = Field(default=None, description="Command to execute to start the MCP server (for stdio type)")
|
||||
args: list[str] = Field(default_factory=list, description="Arguments to pass to the command (for stdio type)")
|
||||
env: dict[str, str] = Field(default_factory=dict, description="Environment variables for the MCP server")
|
||||
url: str | None = Field(default=None, description="URL of the MCP server (for sse or http type)")
|
||||
headers: dict[str, str] = Field(default_factory=dict, description="HTTP headers to send (for sse or http type)")
|
||||
oauth: McpOAuthConfig | None = Field(default=None, description="OAuth configuration (for sse or http type)")
|
||||
description: str = Field(default="", description="Human-readable description of what this MCP server provides")
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
|
||||
class SkillStateConfig(BaseModel):
|
||||
"""Configuration for a single skill's state."""
|
||||
|
||||
enabled: bool = Field(default=True, description="Whether this skill is enabled")
|
||||
|
||||
|
||||
class ExtensionsConfig(BaseModel):
|
||||
"""Unified configuration for MCP servers and skills."""
|
||||
|
||||
mcp_servers: dict[str, McpServerConfig] = Field(
|
||||
default_factory=dict,
|
||||
description="Map of MCP server name to configuration",
|
||||
alias="mcpServers",
|
||||
)
|
||||
skills: dict[str, SkillStateConfig] = Field(
|
||||
default_factory=dict,
|
||||
description="Map of skill name to state configuration",
|
||||
)
|
||||
model_config = ConfigDict(extra="allow", populate_by_name=True)
|
||||
|
||||
@classmethod
|
||||
def resolve_config_path(cls, config_path: str | None = None) -> Path | None:
|
||||
"""Resolve the extensions config file path.
|
||||
|
||||
Priority:
|
||||
1. If provided `config_path` argument, use it.
|
||||
2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
|
||||
3. Otherwise, check for `extensions_config.json` in the current directory, then in the parent directory.
|
||||
4. For backward compatibility, also check for `mcp_config.json` if `extensions_config.json` is not found.
|
||||
5. If not found, return None (extensions are optional).
|
||||
|
||||
Args:
|
||||
config_path: Optional path to extensions config file.
|
||||
|
||||
Resolution order:
|
||||
1. If provided `config_path` argument, use it.
|
||||
2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
|
||||
3. Otherwise, search backend/repository-root defaults for
|
||||
`extensions_config.json`, then legacy `mcp_config.json`.
|
||||
|
||||
Returns:
|
||||
Path to the extensions config file if found, otherwise None.
|
||||
"""
|
||||
if config_path:
|
||||
path = Path(config_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Extensions config file specified by param `config_path` not found at {path}")
|
||||
return path
|
||||
elif os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"):
|
||||
path = Path(os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"))
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
|
||||
return path
|
||||
else:
|
||||
backend_dir = Path(__file__).resolve().parents[4]
|
||||
repo_root = backend_dir.parent
|
||||
for path in (
|
||||
backend_dir / "extensions_config.json",
|
||||
repo_root / "extensions_config.json",
|
||||
backend_dir / "mcp_config.json",
|
||||
repo_root / "mcp_config.json",
|
||||
):
|
||||
if path.exists():
|
||||
return path
|
||||
|
||||
# Extensions are optional, so return None if not found
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, config_path: str | None = None) -> "ExtensionsConfig":
|
||||
"""Load extensions config from JSON file.
|
||||
|
||||
See `resolve_config_path` for more details.
|
||||
|
||||
Args:
|
||||
config_path: Path to the extensions config file.
|
||||
|
||||
Returns:
|
||||
ExtensionsConfig: The loaded config, or empty config if file not found.
|
||||
"""
|
||||
resolved_path = cls.resolve_config_path(config_path)
|
||||
if resolved_path is None:
|
||||
# Return empty config if extensions config file is not found
|
||||
return cls(mcp_servers={}, skills={})
|
||||
|
||||
try:
|
||||
with open(resolved_path, encoding="utf-8") as f:
|
||||
config_data = json.load(f)
|
||||
cls.resolve_env_variables(config_data)
|
||||
return cls.model_validate(config_data)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Extensions config file at {resolved_path} is not valid JSON: {e}") from e
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to load extensions config from {resolved_path}: {e}") from e
|
||||
|
||||
@classmethod
|
||||
def resolve_env_variables(cls, config: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Recursively resolve environment variables in the config.
|
||||
|
||||
Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
|
||||
|
||||
Args:
|
||||
config: The config to resolve environment variables in.
|
||||
|
||||
Returns:
|
||||
The config with environment variables resolved.
|
||||
"""
|
||||
for key, value in config.items():
|
||||
if isinstance(value, str):
|
||||
if value.startswith("$"):
|
||||
env_value = os.getenv(value[1:])
|
||||
if env_value is None:
|
||||
# Unresolved placeholder — store empty string so downstream
|
||||
# consumers (e.g. MCP servers) don't receive the literal "$VAR"
|
||||
# token as an actual environment value.
|
||||
config[key] = ""
|
||||
else:
|
||||
config[key] = env_value
|
||||
else:
|
||||
config[key] = value
|
||||
elif isinstance(value, dict):
|
||||
config[key] = cls.resolve_env_variables(value)
|
||||
elif isinstance(value, list):
|
||||
config[key] = [cls.resolve_env_variables(item) if isinstance(item, dict) else item for item in value]
|
||||
return config
|
||||
|
||||
def get_enabled_mcp_servers(self) -> dict[str, McpServerConfig]:
|
||||
"""Get only the enabled MCP servers.
|
||||
|
||||
Returns:
|
||||
Dictionary of enabled MCP servers.
|
||||
"""
|
||||
return {name: config for name, config in self.mcp_servers.items() if config.enabled}
|
||||
|
||||
def is_skill_enabled(self, skill_name: str, skill_category: str) -> bool:
|
||||
"""Check if a skill is enabled.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
skill_category: Category of the skill
|
||||
|
||||
Returns:
|
||||
True if enabled, False otherwise
|
||||
"""
|
||||
skill_config = self.skills.get(skill_name)
|
||||
if skill_config is None:
|
||||
# Default to enable for public & custom skill
|
||||
return skill_category in ("public", "custom")
|
||||
return skill_config.enabled
|
||||
|
||||
|
||||
_extensions_config: ExtensionsConfig | None = None
|
||||
|
||||
|
||||
def get_extensions_config() -> ExtensionsConfig:
|
||||
"""Get the extensions config instance.
|
||||
|
||||
Returns a cached singleton instance. Use `reload_extensions_config()` to reload
|
||||
from file, or `reset_extensions_config()` to clear the cache.
|
||||
|
||||
Returns:
|
||||
The cached ExtensionsConfig instance.
|
||||
"""
|
||||
global _extensions_config
|
||||
if _extensions_config is None:
|
||||
_extensions_config = ExtensionsConfig.from_file()
|
||||
return _extensions_config
|
||||
|
||||
|
||||
def reload_extensions_config(config_path: str | None = None) -> ExtensionsConfig:
|
||||
"""Reload the extensions config from file and update the cached instance.
|
||||
|
||||
This is useful when the config file has been modified and you want
|
||||
to pick up the changes without restarting the application.
|
||||
|
||||
Args:
|
||||
config_path: Optional path to extensions config file. If not provided,
|
||||
uses the default resolution strategy.
|
||||
|
||||
Returns:
|
||||
The newly loaded ExtensionsConfig instance.
|
||||
"""
|
||||
global _extensions_config
|
||||
_extensions_config = ExtensionsConfig.from_file(config_path)
|
||||
return _extensions_config
|
||||
|
||||
|
||||
def reset_extensions_config() -> None:
|
||||
"""Reset the cached extensions config instance.
|
||||
|
||||
This clears the singleton cache, causing the next call to
|
||||
`get_extensions_config()` to reload from file. Useful for testing
|
||||
or when switching between different configurations.
|
||||
"""
|
||||
global _extensions_config
|
||||
_extensions_config = None
|
||||
|
||||
|
||||
def set_extensions_config(config: ExtensionsConfig) -> None:
|
||||
"""Set a custom extensions config instance.
|
||||
|
||||
This allows injecting a custom or mock config for testing purposes.
|
||||
|
||||
Args:
|
||||
config: The ExtensionsConfig instance to use.
|
||||
"""
|
||||
global _extensions_config
|
||||
_extensions_config = config
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Configuration for pre-tool-call authorization."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class GuardrailProviderConfig(BaseModel):
|
||||
"""Configuration for a guardrail provider."""
|
||||
|
||||
use: str = Field(description="Class path (e.g. 'deerflow.guardrails.builtin:AllowlistProvider')")
|
||||
config: dict = Field(default_factory=dict, description="Provider-specific settings passed as kwargs")
|
||||
|
||||
|
||||
class GuardrailsConfig(BaseModel):
|
||||
"""Configuration for pre-tool-call authorization.
|
||||
|
||||
When enabled, every tool call passes through the configured provider
|
||||
before execution. The provider receives tool name, arguments, and the
|
||||
agent's passport reference, and returns an allow/deny decision.
|
||||
"""
|
||||
|
||||
enabled: bool = Field(default=False, description="Enable guardrail middleware")
|
||||
fail_closed: bool = Field(default=True, description="Block tool calls if provider errors")
|
||||
passport: str | None = Field(default=None, description="OAP passport path or hosted agent ID")
|
||||
provider: GuardrailProviderConfig | None = Field(default=None, description="Guardrail provider configuration")
|
||||
|
||||
|
||||
_guardrails_config: GuardrailsConfig | None = None
|
||||
|
||||
|
||||
def get_guardrails_config() -> GuardrailsConfig:
|
||||
"""Get the guardrails config, returning defaults if not loaded."""
|
||||
global _guardrails_config
|
||||
if _guardrails_config is None:
|
||||
_guardrails_config = GuardrailsConfig()
|
||||
return _guardrails_config
|
||||
|
||||
|
||||
def load_guardrails_config_from_dict(data: dict) -> GuardrailsConfig:
|
||||
"""Load guardrails config from a dict (called during AppConfig loading)."""
|
||||
global _guardrails_config
|
||||
_guardrails_config = GuardrailsConfig.model_validate(data)
|
||||
return _guardrails_config
|
||||
|
||||
|
||||
def reset_guardrails_config() -> None:
|
||||
"""Reset the cached config instance. Used in tests to prevent singleton leaks."""
|
||||
global _guardrails_config
|
||||
_guardrails_config = None
|
||||
@@ -0,0 +1,82 @@
|
||||
"""Configuration for memory mechanism."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class MemoryConfig(BaseModel):
|
||||
"""Configuration for global memory mechanism."""
|
||||
|
||||
enabled: bool = Field(
|
||||
default=True,
|
||||
description="Whether to enable memory mechanism",
|
||||
)
|
||||
storage_path: str = Field(
|
||||
default="",
|
||||
description=(
|
||||
"Path to store memory data. "
|
||||
"If empty, defaults to `{base_dir}/memory.json` (see Paths.memory_file). "
|
||||
"Absolute paths are used as-is. "
|
||||
"Relative paths are resolved against `Paths.base_dir` "
|
||||
"(not the backend working directory). "
|
||||
"Note: if you previously set this to `.deer-flow/memory.json`, "
|
||||
"the file will now be resolved as `{base_dir}/.deer-flow/memory.json`; "
|
||||
"migrate existing data or use an absolute path to preserve the old location."
|
||||
),
|
||||
)
|
||||
storage_class: str = Field(
|
||||
default="deerflow.agents.memory.storage.FileMemoryStorage",
|
||||
description="The class path for memory storage provider",
|
||||
)
|
||||
debounce_seconds: int = Field(
|
||||
default=30,
|
||||
ge=1,
|
||||
le=300,
|
||||
description="Seconds to wait before processing queued updates (debounce)",
|
||||
)
|
||||
model_name: str | None = Field(
|
||||
default=None,
|
||||
description="Model name to use for memory updates (None = use default model)",
|
||||
)
|
||||
max_facts: int = Field(
|
||||
default=100,
|
||||
ge=10,
|
||||
le=500,
|
||||
description="Maximum number of facts to store",
|
||||
)
|
||||
fact_confidence_threshold: float = Field(
|
||||
default=0.7,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Minimum confidence threshold for storing facts",
|
||||
)
|
||||
injection_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Whether to inject memory into system prompt",
|
||||
)
|
||||
max_injection_tokens: int = Field(
|
||||
default=2000,
|
||||
ge=100,
|
||||
le=8000,
|
||||
description="Maximum tokens to use for memory injection",
|
||||
)
|
||||
|
||||
|
||||
# Global configuration instance
|
||||
_memory_config: MemoryConfig = MemoryConfig()
|
||||
|
||||
|
||||
def get_memory_config() -> MemoryConfig:
|
||||
"""Get the current memory configuration."""
|
||||
return _memory_config
|
||||
|
||||
|
||||
def set_memory_config(config: MemoryConfig) -> None:
|
||||
"""Set the memory configuration."""
|
||||
global _memory_config
|
||||
_memory_config = config
|
||||
|
||||
|
||||
def load_memory_config_from_dict(config_dict: dict) -> None:
|
||||
"""Load memory configuration from a dictionary."""
|
||||
global _memory_config
|
||||
_memory_config = MemoryConfig(**config_dict)
|
||||
@@ -0,0 +1,41 @@
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class ModelConfig(BaseModel):
|
||||
"""Config section for a model"""
|
||||
|
||||
name: str = Field(..., description="Unique name for the model")
|
||||
display_name: str | None = Field(..., default_factory=lambda: None, description="Display name for the model")
|
||||
description: str | None = Field(..., default_factory=lambda: None, description="Description for the model")
|
||||
use: str = Field(
|
||||
...,
|
||||
description="Class path of the model provider(e.g. langchain_openai.ChatOpenAI)",
|
||||
)
|
||||
model: str = Field(..., description="Model name")
|
||||
model_config = ConfigDict(extra="allow")
|
||||
use_responses_api: bool | None = Field(
|
||||
default=None,
|
||||
description="Whether to route OpenAI ChatOpenAI calls through the /v1/responses API",
|
||||
)
|
||||
output_version: str | None = Field(
|
||||
default=None,
|
||||
description="Structured output version for OpenAI responses content, e.g. responses/v1",
|
||||
)
|
||||
supports_thinking: bool = Field(default_factory=lambda: False, description="Whether the model supports thinking")
|
||||
supports_reasoning_effort: bool = Field(default_factory=lambda: False, description="Whether the model supports reasoning effort")
|
||||
when_thinking_enabled: dict | None = Field(
|
||||
default_factory=lambda: None,
|
||||
description="Extra settings to be passed to the model when thinking is enabled",
|
||||
)
|
||||
when_thinking_disabled: dict | None = Field(
|
||||
default_factory=lambda: None,
|
||||
description="Extra settings to be passed to the model when thinking is disabled",
|
||||
)
|
||||
supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
|
||||
thinking: dict | None = Field(
|
||||
default_factory=lambda: None,
|
||||
description=(
|
||||
"Thinking settings for the model. If provided, these settings will be passed to the model when thinking is enabled. "
|
||||
"This is a shortcut for `when_thinking_enabled` and will be merged with `when_thinking_enabled` if both are provided."
|
||||
),
|
||||
)
|
||||
306
deer-flow/backend/packages/harness/deerflow/config/paths.py
Normal file
306
deer-flow/backend/packages/harness/deerflow/config/paths.py
Normal file
@@ -0,0 +1,306 @@
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path, PureWindowsPath
|
||||
|
||||
# Virtual path prefix seen by agents inside the sandbox
|
||||
VIRTUAL_PATH_PREFIX = "/mnt/user-data"
|
||||
|
||||
_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
|
||||
|
||||
|
||||
def _default_local_base_dir() -> Path:
|
||||
"""Return the repo-local DeerFlow state directory without relying on cwd."""
|
||||
backend_dir = Path(__file__).resolve().parents[4]
|
||||
return backend_dir / ".deer-flow"
|
||||
|
||||
|
||||
def _validate_thread_id(thread_id: str) -> str:
|
||||
"""Validate a thread ID before using it in filesystem paths."""
|
||||
if not _SAFE_THREAD_ID_RE.match(thread_id):
|
||||
raise ValueError(f"Invalid thread_id {thread_id!r}: only alphanumeric characters, hyphens, and underscores are allowed.")
|
||||
return thread_id
|
||||
|
||||
|
||||
def _join_host_path(base: str, *parts: str) -> str:
|
||||
"""Join host filesystem path segments while preserving native style.
|
||||
|
||||
Docker Desktop on Windows expects bind mount sources to stay in Windows
|
||||
path form (for example ``C:\\repo\\backend\\.deer-flow``). Using
|
||||
``Path(base) / ...`` on a POSIX host can accidentally rewrite those paths
|
||||
with mixed separators, so this helper preserves the original style.
|
||||
"""
|
||||
if not parts:
|
||||
return base
|
||||
|
||||
if re.match(r"^[A-Za-z]:[\\/]", base) or base.startswith("\\\\") or "\\" in base:
|
||||
result = PureWindowsPath(base)
|
||||
for part in parts:
|
||||
result /= part
|
||||
return str(result)
|
||||
|
||||
result = Path(base)
|
||||
for part in parts:
|
||||
result /= part
|
||||
return str(result)
|
||||
|
||||
|
||||
def join_host_path(base: str, *parts: str) -> str:
|
||||
"""Join host filesystem path segments while preserving native style."""
|
||||
return _join_host_path(base, *parts)
|
||||
|
||||
|
||||
class Paths:
|
||||
"""
|
||||
Centralized path configuration for DeerFlow application data.
|
||||
|
||||
Directory layout (host side):
|
||||
{base_dir}/
|
||||
├── memory.json
|
||||
├── USER.md <-- global user profile (injected into all agents)
|
||||
├── agents/
|
||||
│ └── {agent_name}/
|
||||
│ ├── config.yaml
|
||||
│ ├── SOUL.md <-- agent personality/identity (injected alongside lead prompt)
|
||||
│ └── memory.json
|
||||
└── threads/
|
||||
└── {thread_id}/
|
||||
└── user-data/ <-- mounted as /mnt/user-data/ inside sandbox
|
||||
├── workspace/ <-- /mnt/user-data/workspace/
|
||||
├── uploads/ <-- /mnt/user-data/uploads/
|
||||
└── outputs/ <-- /mnt/user-data/outputs/
|
||||
|
||||
BaseDir resolution (in priority order):
|
||||
1. Constructor argument `base_dir`
|
||||
2. DEER_FLOW_HOME environment variable
|
||||
3. Repo-local fallback derived from this module path: `{backend_dir}/.deer-flow`
|
||||
"""
|
||||
|
||||
def __init__(self, base_dir: str | Path | None = None) -> None:
|
||||
self._base_dir = Path(base_dir).resolve() if base_dir is not None else None
|
||||
|
||||
@property
|
||||
def host_base_dir(self) -> Path:
|
||||
"""Host-visible base dir for Docker volume mount sources.
|
||||
|
||||
When running inside Docker with a mounted Docker socket (DooD), the Docker
|
||||
daemon runs on the host and resolves mount paths against the host filesystem.
|
||||
Set DEER_FLOW_HOST_BASE_DIR to the host-side path that corresponds to this
|
||||
container's base_dir so that sandbox container volume mounts work correctly.
|
||||
|
||||
Falls back to base_dir when the env var is not set (native/local execution).
|
||||
"""
|
||||
if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
|
||||
return Path(env)
|
||||
return self.base_dir
|
||||
|
||||
def _host_base_dir_str(self) -> str:
|
||||
"""Return the host base dir as a raw string for bind mounts."""
|
||||
if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
|
||||
return env
|
||||
return str(self.base_dir)
|
||||
|
||||
@property
|
||||
def base_dir(self) -> Path:
|
||||
"""Root directory for all application data."""
|
||||
if self._base_dir is not None:
|
||||
return self._base_dir
|
||||
|
||||
if env_home := os.getenv("DEER_FLOW_HOME"):
|
||||
return Path(env_home).resolve()
|
||||
|
||||
return _default_local_base_dir()
|
||||
|
||||
@property
|
||||
def memory_file(self) -> Path:
|
||||
"""Path to the persisted memory file: `{base_dir}/memory.json`."""
|
||||
return self.base_dir / "memory.json"
|
||||
|
||||
@property
|
||||
def user_md_file(self) -> Path:
|
||||
"""Path to the global user profile file: `{base_dir}/USER.md`."""
|
||||
return self.base_dir / "USER.md"
|
||||
|
||||
@property
|
||||
def agents_dir(self) -> Path:
|
||||
"""Root directory for all custom agents: `{base_dir}/agents/`."""
|
||||
return self.base_dir / "agents"
|
||||
|
||||
def agent_dir(self, name: str) -> Path:
|
||||
"""Directory for a specific agent: `{base_dir}/agents/{name}/`."""
|
||||
return self.agents_dir / name.lower()
|
||||
|
||||
def agent_memory_file(self, name: str) -> Path:
|
||||
"""Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
|
||||
return self.agent_dir(name) / "memory.json"
|
||||
|
||||
def thread_dir(self, thread_id: str) -> Path:
|
||||
"""
|
||||
Host path for a thread's data: `{base_dir}/threads/{thread_id}/`
|
||||
|
||||
This directory contains a `user-data/` subdirectory that is mounted
|
||||
as `/mnt/user-data/` inside the sandbox.
|
||||
|
||||
Raises:
|
||||
ValueError: If `thread_id` contains unsafe characters (path separators
|
||||
or `..`) that could cause directory traversal.
|
||||
"""
|
||||
return self.base_dir / "threads" / _validate_thread_id(thread_id)
|
||||
|
||||
def sandbox_work_dir(self, thread_id: str) -> Path:
|
||||
"""
|
||||
Host path for the agent's workspace directory.
|
||||
Host: `{base_dir}/threads/{thread_id}/user-data/workspace/`
|
||||
Sandbox: `/mnt/user-data/workspace/`
|
||||
"""
|
||||
return self.thread_dir(thread_id) / "user-data" / "workspace"
|
||||
|
||||
def sandbox_uploads_dir(self, thread_id: str) -> Path:
|
||||
"""
|
||||
Host path for user-uploaded files.
|
||||
Host: `{base_dir}/threads/{thread_id}/user-data/uploads/`
|
||||
Sandbox: `/mnt/user-data/uploads/`
|
||||
"""
|
||||
return self.thread_dir(thread_id) / "user-data" / "uploads"
|
||||
|
||||
def sandbox_outputs_dir(self, thread_id: str) -> Path:
|
||||
"""
|
||||
Host path for agent-generated artifacts.
|
||||
Host: `{base_dir}/threads/{thread_id}/user-data/outputs/`
|
||||
Sandbox: `/mnt/user-data/outputs/`
|
||||
"""
|
||||
return self.thread_dir(thread_id) / "user-data" / "outputs"
|
||||
|
||||
def acp_workspace_dir(self, thread_id: str) -> Path:
|
||||
"""
|
||||
Host path for the ACP workspace of a specific thread.
|
||||
Host: `{base_dir}/threads/{thread_id}/acp-workspace/`
|
||||
Sandbox: `/mnt/acp-workspace/`
|
||||
|
||||
Each thread gets its own isolated ACP workspace so that concurrent
|
||||
sessions cannot read each other's ACP agent outputs.
|
||||
"""
|
||||
return self.thread_dir(thread_id) / "acp-workspace"
|
||||
|
||||
def sandbox_user_data_dir(self, thread_id: str) -> Path:
|
||||
"""
|
||||
Host path for the user-data root.
|
||||
Host: `{base_dir}/threads/{thread_id}/user-data/`
|
||||
Sandbox: `/mnt/user-data/`
|
||||
"""
|
||||
return self.thread_dir(thread_id) / "user-data"
|
||||
|
||||
def host_thread_dir(self, thread_id: str) -> str:
|
||||
"""Host path for a thread directory, preserving Windows path syntax."""
|
||||
return _join_host_path(self._host_base_dir_str(), "threads", _validate_thread_id(thread_id))
|
||||
|
||||
def host_sandbox_user_data_dir(self, thread_id: str) -> str:
|
||||
"""Host path for a thread's user-data root."""
|
||||
return _join_host_path(self.host_thread_dir(thread_id), "user-data")
|
||||
|
||||
def host_sandbox_work_dir(self, thread_id: str) -> str:
|
||||
"""Host path for the workspace mount source."""
|
||||
return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "workspace")
|
||||
|
||||
def host_sandbox_uploads_dir(self, thread_id: str) -> str:
|
||||
"""Host path for the uploads mount source."""
|
||||
return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "uploads")
|
||||
|
||||
def host_sandbox_outputs_dir(self, thread_id: str) -> str:
|
||||
"""Host path for the outputs mount source."""
|
||||
return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "outputs")
|
||||
|
||||
def host_acp_workspace_dir(self, thread_id: str) -> str:
|
||||
"""Host path for the ACP workspace mount source."""
|
||||
return _join_host_path(self.host_thread_dir(thread_id), "acp-workspace")
|
||||
|
||||
def ensure_thread_dirs(self, thread_id: str) -> None:
|
||||
"""Create all standard sandbox directories for a thread.
|
||||
|
||||
Directories are created with mode 0o777 so that sandbox containers
|
||||
(which may run as a different UID than the host backend process) can
|
||||
write to the volume-mounted paths without "Permission denied" errors.
|
||||
The explicit chmod() call is necessary because Path.mkdir(mode=...) is
|
||||
subject to the process umask and may not yield the intended permissions.
|
||||
|
||||
Includes the ACP workspace directory so it can be volume-mounted into
|
||||
the sandbox container at ``/mnt/acp-workspace`` even before the first
|
||||
ACP agent invocation.
|
||||
"""
|
||||
for d in [
|
||||
self.sandbox_work_dir(thread_id),
|
||||
self.sandbox_uploads_dir(thread_id),
|
||||
self.sandbox_outputs_dir(thread_id),
|
||||
self.acp_workspace_dir(thread_id),
|
||||
]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
d.chmod(0o777)
|
||||
|
||||
def delete_thread_dir(self, thread_id: str) -> None:
|
||||
"""Delete all persisted data for a thread.
|
||||
|
||||
The operation is idempotent: missing thread directories are ignored.
|
||||
"""
|
||||
thread_dir = self.thread_dir(thread_id)
|
||||
if thread_dir.exists():
|
||||
shutil.rmtree(thread_dir)
|
||||
|
||||
def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
|
||||
"""Resolve a sandbox virtual path to the actual host filesystem path.
|
||||
|
||||
Args:
|
||||
thread_id: The thread ID.
|
||||
virtual_path: Virtual path as seen inside the sandbox, e.g.
|
||||
``/mnt/user-data/outputs/report.pdf``.
|
||||
Leading slashes are stripped before matching.
|
||||
|
||||
Returns:
|
||||
The resolved absolute host filesystem path.
|
||||
|
||||
Raises:
|
||||
ValueError: If the path does not start with the expected virtual
|
||||
prefix or a path-traversal attempt is detected.
|
||||
"""
|
||||
stripped = virtual_path.lstrip("/")
|
||||
prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||
|
||||
# Require an exact segment-boundary match to avoid prefix confusion
|
||||
# (e.g. reject paths like "mnt/user-dataX/...").
|
||||
if stripped != prefix and not stripped.startswith(prefix + "/"):
|
||||
raise ValueError(f"Path must start with /{prefix}")
|
||||
|
||||
relative = stripped[len(prefix) :].lstrip("/")
|
||||
base = self.sandbox_user_data_dir(thread_id).resolve()
|
||||
actual = (base / relative).resolve()
|
||||
|
||||
try:
|
||||
actual.relative_to(base)
|
||||
except ValueError:
|
||||
raise ValueError("Access denied: path traversal detected")
|
||||
|
||||
return actual
|
||||
|
||||
|
||||
# ── Singleton ────────────────────────────────────────────────────────────
|
||||
|
||||
_paths: Paths | None = None
|
||||
|
||||
|
||||
def get_paths() -> Paths:
|
||||
"""Return the global Paths singleton (lazy-initialized)."""
|
||||
global _paths
|
||||
if _paths is None:
|
||||
_paths = Paths()
|
||||
return _paths
|
||||
|
||||
|
||||
def resolve_path(path: str) -> Path:
|
||||
"""Resolve *path* to an absolute ``Path``.
|
||||
|
||||
Relative paths are resolved relative to the application base directory.
|
||||
Absolute paths are returned as-is (after normalisation).
|
||||
"""
|
||||
p = Path(path)
|
||||
if not p.is_absolute():
|
||||
p = get_paths().base_dir / path
|
||||
return p.resolve()
|
||||
@@ -0,0 +1,83 @@
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class VolumeMountConfig(BaseModel):
|
||||
"""Configuration for a volume mount."""
|
||||
|
||||
host_path: str = Field(..., description="Path on the host machine")
|
||||
container_path: str = Field(..., description="Path inside the container")
|
||||
read_only: bool = Field(default=False, description="Whether the mount is read-only")
|
||||
|
||||
|
||||
class SandboxConfig(BaseModel):
|
||||
"""Config section for a sandbox.
|
||||
|
||||
Common options:
|
||||
use: Class path of the sandbox provider (required)
|
||||
allow_host_bash: Enable host-side bash execution for LocalSandboxProvider.
|
||||
Dangerous and intended only for fully trusted local workflows.
|
||||
|
||||
AioSandboxProvider specific options:
|
||||
image: Docker image to use (default: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest)
|
||||
port: Base port for sandbox containers (default: 8080)
|
||||
replicas: Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.
|
||||
container_prefix: Prefix for container names (default: deer-flow-sandbox)
|
||||
idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
|
||||
mounts: List of volume mounts to share directories with the container
|
||||
environment: Environment variables to inject into the container (values starting with $ are resolved from host env)
|
||||
"""
|
||||
|
||||
use: str = Field(
|
||||
...,
|
||||
description="Class path of the sandbox provider (e.g. deerflow.sandbox.local:LocalSandboxProvider)",
|
||||
)
|
||||
allow_host_bash: bool = Field(
|
||||
default=False,
|
||||
description="Allow the bash tool to execute directly on the host when using LocalSandboxProvider. Dangerous; intended only for fully trusted local environments.",
|
||||
)
|
||||
image: str | None = Field(
|
||||
default=None,
|
||||
description="Docker image to use for the sandbox container",
|
||||
)
|
||||
port: int | None = Field(
|
||||
default=None,
|
||||
description="Base port for sandbox containers",
|
||||
)
|
||||
replicas: int | None = Field(
|
||||
default=None,
|
||||
description="Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.",
|
||||
)
|
||||
container_prefix: str | None = Field(
|
||||
default=None,
|
||||
description="Prefix for container names",
|
||||
)
|
||||
idle_timeout: int | None = Field(
|
||||
default=None,
|
||||
description="Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.",
|
||||
)
|
||||
mounts: list[VolumeMountConfig] = Field(
|
||||
default_factory=list,
|
||||
description="List of volume mounts to share directories between host and container",
|
||||
)
|
||||
environment: dict[str, str] = Field(
|
||||
default_factory=dict,
|
||||
description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
|
||||
)
|
||||
|
||||
bash_output_max_chars: int = Field(
|
||||
default=20000,
|
||||
ge=0,
|
||||
description="Maximum characters to keep from bash tool output. Output exceeding this limit is middle-truncated (head + tail), preserving the first and last half. Set to 0 to disable truncation.",
|
||||
)
|
||||
read_file_output_max_chars: int = Field(
|
||||
default=50000,
|
||||
ge=0,
|
||||
description="Maximum characters to keep from read_file tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
|
||||
)
|
||||
ls_output_max_chars: int = Field(
|
||||
default=20000,
|
||||
ge=0,
|
||||
description="Maximum characters to keep from ls tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
|
||||
)
|
||||
|
||||
model_config = ConfigDict(extra="allow")
|
||||
@@ -0,0 +1,14 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SkillEvolutionConfig(BaseModel):
|
||||
"""Configuration for agent-managed skill evolution."""
|
||||
|
||||
enabled: bool = Field(
|
||||
default=False,
|
||||
description="Whether the agent can create and modify skills under skills/custom.",
|
||||
)
|
||||
moderation_model_name: str | None = Field(
|
||||
default=None,
|
||||
description="Optional model name for skill security moderation. Defaults to the primary chat model.",
|
||||
)
|
||||
@@ -0,0 +1,54 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
def _default_repo_root() -> Path:
|
||||
"""Resolve the repo root without relying on the current working directory."""
|
||||
return Path(__file__).resolve().parents[5]
|
||||
|
||||
|
||||
class SkillsConfig(BaseModel):
|
||||
"""Configuration for skills system"""
|
||||
|
||||
path: str | None = Field(
|
||||
default=None,
|
||||
description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory",
|
||||
)
|
||||
container_path: str = Field(
|
||||
default="/mnt/skills",
|
||||
description="Path where skills are mounted in the sandbox container",
|
||||
)
|
||||
|
||||
def get_skills_path(self) -> Path:
|
||||
"""
|
||||
Get the resolved skills directory path.
|
||||
|
||||
Returns:
|
||||
Path to the skills directory
|
||||
"""
|
||||
if self.path:
|
||||
# Use configured path (can be absolute or relative)
|
||||
path = Path(self.path)
|
||||
if not path.is_absolute():
|
||||
# If relative, resolve from the repo root for deterministic behavior.
|
||||
path = _default_repo_root() / path
|
||||
return path.resolve()
|
||||
else:
|
||||
# Default: ../skills relative to backend directory
|
||||
from deerflow.skills.loader import get_skills_root_path
|
||||
|
||||
return get_skills_root_path()
|
||||
|
||||
def get_skill_container_path(self, skill_name: str, category: str = "public") -> str:
|
||||
"""
|
||||
Get the full container path for a specific skill.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill (directory name)
|
||||
category: Category of the skill (public or custom)
|
||||
|
||||
Returns:
|
||||
Full path to the skill in the container
|
||||
"""
|
||||
return f"{self.container_path}/{category}/{skill_name}"
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Configuration for stream bridge."""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
StreamBridgeType = Literal["memory", "redis"]
|
||||
|
||||
|
||||
class StreamBridgeConfig(BaseModel):
|
||||
"""Configuration for the stream bridge that connects agent workers to SSE endpoints."""
|
||||
|
||||
type: StreamBridgeType = Field(
|
||||
default="memory",
|
||||
description="Stream bridge backend type. 'memory' uses in-process asyncio.Queue (single-process only). 'redis' uses Redis Streams (planned for Phase 2, not yet implemented).",
|
||||
)
|
||||
redis_url: str | None = Field(
|
||||
default=None,
|
||||
description="Redis URL for the redis stream bridge type. Example: 'redis://localhost:6379/0'.",
|
||||
)
|
||||
queue_maxsize: int = Field(
|
||||
default=256,
|
||||
description="Maximum number of events buffered per run in the memory bridge.",
|
||||
)
|
||||
|
||||
|
||||
# Global configuration instance — None means no stream bridge is configured
|
||||
# (falls back to memory with defaults).
|
||||
_stream_bridge_config: StreamBridgeConfig | None = None
|
||||
|
||||
|
||||
def get_stream_bridge_config() -> StreamBridgeConfig | None:
|
||||
"""Get the current stream bridge configuration, or None if not configured."""
|
||||
return _stream_bridge_config
|
||||
|
||||
|
||||
def set_stream_bridge_config(config: StreamBridgeConfig | None) -> None:
|
||||
"""Set the stream bridge configuration."""
|
||||
global _stream_bridge_config
|
||||
_stream_bridge_config = config
|
||||
|
||||
|
||||
def load_stream_bridge_config_from_dict(config_dict: dict) -> None:
|
||||
"""Load stream bridge configuration from a dictionary."""
|
||||
global _stream_bridge_config
|
||||
_stream_bridge_config = StreamBridgeConfig(**config_dict)
|
||||
@@ -0,0 +1,102 @@
|
||||
"""Configuration for the subagent system loaded from config.yaml."""
|
||||
|
||||
import logging
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubagentOverrideConfig(BaseModel):
|
||||
"""Per-agent configuration overrides."""
|
||||
|
||||
timeout_seconds: int | None = Field(
|
||||
default=None,
|
||||
ge=1,
|
||||
description="Timeout in seconds for this subagent (None = use global default)",
|
||||
)
|
||||
max_turns: int | None = Field(
|
||||
default=None,
|
||||
ge=1,
|
||||
description="Maximum turns for this subagent (None = use global or builtin default)",
|
||||
)
|
||||
|
||||
|
||||
class SubagentsAppConfig(BaseModel):
|
||||
"""Configuration for the subagent system."""
|
||||
|
||||
timeout_seconds: int = Field(
|
||||
default=900,
|
||||
ge=1,
|
||||
description="Default timeout in seconds for all subagents (default: 900 = 15 minutes)",
|
||||
)
|
||||
max_turns: int | None = Field(
|
||||
default=None,
|
||||
ge=1,
|
||||
description="Optional default max-turn override for all subagents (None = keep builtin defaults)",
|
||||
)
|
||||
agents: dict[str, SubagentOverrideConfig] = Field(
|
||||
default_factory=dict,
|
||||
description="Per-agent configuration overrides keyed by agent name",
|
||||
)
|
||||
|
||||
def get_timeout_for(self, agent_name: str) -> int:
|
||||
"""Get the effective timeout for a specific agent.
|
||||
|
||||
Args:
|
||||
agent_name: The name of the subagent.
|
||||
|
||||
Returns:
|
||||
The timeout in seconds, using per-agent override if set, otherwise global default.
|
||||
"""
|
||||
override = self.agents.get(agent_name)
|
||||
if override is not None and override.timeout_seconds is not None:
|
||||
return override.timeout_seconds
|
||||
return self.timeout_seconds
|
||||
|
||||
def get_max_turns_for(self, agent_name: str, builtin_default: int) -> int:
|
||||
"""Get the effective max_turns for a specific agent."""
|
||||
override = self.agents.get(agent_name)
|
||||
if override is not None and override.max_turns is not None:
|
||||
return override.max_turns
|
||||
if self.max_turns is not None:
|
||||
return self.max_turns
|
||||
return builtin_default
|
||||
|
||||
|
||||
_subagents_config: SubagentsAppConfig = SubagentsAppConfig()
|
||||
|
||||
|
||||
def get_subagents_app_config() -> SubagentsAppConfig:
|
||||
"""Get the current subagents configuration."""
|
||||
return _subagents_config
|
||||
|
||||
|
||||
def load_subagents_config_from_dict(config_dict: dict) -> None:
|
||||
"""Load subagents configuration from a dictionary."""
|
||||
global _subagents_config
|
||||
_subagents_config = SubagentsAppConfig(**config_dict)
|
||||
|
||||
overrides_summary = {}
|
||||
for name, override in _subagents_config.agents.items():
|
||||
parts = []
|
||||
if override.timeout_seconds is not None:
|
||||
parts.append(f"timeout={override.timeout_seconds}s")
|
||||
if override.max_turns is not None:
|
||||
parts.append(f"max_turns={override.max_turns}")
|
||||
if parts:
|
||||
overrides_summary[name] = ", ".join(parts)
|
||||
|
||||
if overrides_summary:
|
||||
logger.info(
|
||||
"Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s",
|
||||
_subagents_config.timeout_seconds,
|
||||
_subagents_config.max_turns,
|
||||
overrides_summary,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Subagents config loaded: default timeout=%ss, default max_turns=%s, no per-agent overrides",
|
||||
_subagents_config.timeout_seconds,
|
||||
_subagents_config.max_turns,
|
||||
)
|
||||
@@ -0,0 +1,74 @@
|
||||
"""Configuration for conversation summarization."""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
ContextSizeType = Literal["fraction", "tokens", "messages"]
|
||||
|
||||
|
||||
class ContextSize(BaseModel):
|
||||
"""Context size specification for trigger or keep parameters."""
|
||||
|
||||
type: ContextSizeType = Field(description="Type of context size specification")
|
||||
value: int | float = Field(description="Value for the context size specification")
|
||||
|
||||
def to_tuple(self) -> tuple[ContextSizeType, int | float]:
|
||||
"""Convert to tuple format expected by SummarizationMiddleware."""
|
||||
return (self.type, self.value)
|
||||
|
||||
|
||||
class SummarizationConfig(BaseModel):
|
||||
"""Configuration for automatic conversation summarization."""
|
||||
|
||||
enabled: bool = Field(
|
||||
default=False,
|
||||
description="Whether to enable automatic conversation summarization",
|
||||
)
|
||||
model_name: str | None = Field(
|
||||
default=None,
|
||||
description="Model name to use for summarization (None = use a lightweight model)",
|
||||
)
|
||||
trigger: ContextSize | list[ContextSize] | None = Field(
|
||||
default=None,
|
||||
description="One or more thresholds that trigger summarization. When any threshold is met, summarization runs. "
|
||||
"Examples: {'type': 'messages', 'value': 50} triggers at 50 messages, "
|
||||
"{'type': 'tokens', 'value': 4000} triggers at 4000 tokens, "
|
||||
"{'type': 'fraction', 'value': 0.8} triggers at 80% of model's max input tokens",
|
||||
)
|
||||
keep: ContextSize = Field(
|
||||
default_factory=lambda: ContextSize(type="messages", value=20),
|
||||
description="Context retention policy after summarization. Specifies how much history to preserve. "
|
||||
"Examples: {'type': 'messages', 'value': 20} keeps 20 messages, "
|
||||
"{'type': 'tokens', 'value': 3000} keeps 3000 tokens, "
|
||||
"{'type': 'fraction', 'value': 0.3} keeps 30% of model's max input tokens",
|
||||
)
|
||||
trim_tokens_to_summarize: int | None = Field(
|
||||
default=4000,
|
||||
description="Maximum tokens to keep when preparing messages for summarization. Pass null to skip trimming.",
|
||||
)
|
||||
summary_prompt: str | None = Field(
|
||||
default=None,
|
||||
description="Custom prompt template for generating summaries. If not provided, uses the default LangChain prompt.",
|
||||
)
|
||||
|
||||
|
||||
# Global configuration instance
|
||||
_summarization_config: SummarizationConfig = SummarizationConfig()
|
||||
|
||||
|
||||
def get_summarization_config() -> SummarizationConfig:
|
||||
"""Get the current summarization configuration."""
|
||||
return _summarization_config
|
||||
|
||||
|
||||
def set_summarization_config(config: SummarizationConfig) -> None:
|
||||
"""Set the summarization configuration."""
|
||||
global _summarization_config
|
||||
_summarization_config = config
|
||||
|
||||
|
||||
def load_summarization_config_from_dict(config_dict: dict) -> None:
|
||||
"""Load summarization configuration from a dictionary."""
|
||||
global _summarization_config
|
||||
_summarization_config = SummarizationConfig(**config_dict)
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Configuration for automatic thread title generation."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TitleConfig(BaseModel):
|
||||
"""Configuration for automatic thread title generation."""
|
||||
|
||||
enabled: bool = Field(
|
||||
default=True,
|
||||
description="Whether to enable automatic title generation",
|
||||
)
|
||||
max_words: int = Field(
|
||||
default=6,
|
||||
ge=1,
|
||||
le=20,
|
||||
description="Maximum number of words in the generated title",
|
||||
)
|
||||
max_chars: int = Field(
|
||||
default=60,
|
||||
ge=10,
|
||||
le=200,
|
||||
description="Maximum number of characters in the generated title",
|
||||
)
|
||||
model_name: str | None = Field(
|
||||
default=None,
|
||||
description="Model name to use for title generation (None = use default model)",
|
||||
)
|
||||
prompt_template: str = Field(
|
||||
default=("Generate a concise title (max {max_words} words) for this conversation.\nUser: {user_msg}\nAssistant: {assistant_msg}\n\nReturn ONLY the title, no quotes, no explanation."),
|
||||
description="Prompt template for title generation",
|
||||
)
|
||||
|
||||
|
||||
# Global configuration instance
|
||||
_title_config: TitleConfig = TitleConfig()
|
||||
|
||||
|
||||
def get_title_config() -> TitleConfig:
|
||||
"""Get the current title configuration."""
|
||||
return _title_config
|
||||
|
||||
|
||||
def set_title_config(config: TitleConfig) -> None:
|
||||
"""Set the title configuration."""
|
||||
global _title_config
|
||||
_title_config = config
|
||||
|
||||
|
||||
def load_title_config_from_dict(config_dict: dict) -> None:
|
||||
"""Load title configuration from a dictionary."""
|
||||
global _title_config
|
||||
_title_config = TitleConfig(**config_dict)
|
||||
@@ -0,0 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TokenUsageConfig(BaseModel):
|
||||
"""Configuration for token usage tracking."""
|
||||
|
||||
enabled: bool = Field(default=False, description="Enable token usage tracking middleware")
|
||||
@@ -0,0 +1,20 @@
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class ToolGroupConfig(BaseModel):
|
||||
"""Config section for a tool group"""
|
||||
|
||||
name: str = Field(..., description="Unique name for the tool group")
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
|
||||
class ToolConfig(BaseModel):
|
||||
"""Config section for a tool"""
|
||||
|
||||
name: str = Field(..., description="Unique name for the tool")
|
||||
group: str = Field(..., description="Group name for the tool")
|
||||
use: str = Field(
|
||||
...,
|
||||
description="Variable name of the tool provider(e.g. deerflow.sandbox.tools:bash_tool)",
|
||||
)
|
||||
model_config = ConfigDict(extra="allow")
|
||||
@@ -0,0 +1,35 @@
|
||||
"""Configuration for deferred tool loading via tool_search."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ToolSearchConfig(BaseModel):
|
||||
"""Configuration for deferred tool loading via tool_search.
|
||||
|
||||
When enabled, MCP tools are not loaded into the agent's context directly.
|
||||
Instead, they are listed by name in the system prompt and discoverable
|
||||
via the tool_search tool at runtime.
|
||||
"""
|
||||
|
||||
enabled: bool = Field(
|
||||
default=False,
|
||||
description="Defer tools and enable tool_search",
|
||||
)
|
||||
|
||||
|
||||
_tool_search_config: ToolSearchConfig | None = None
|
||||
|
||||
|
||||
def get_tool_search_config() -> ToolSearchConfig:
|
||||
"""Get the tool search config, loading from AppConfig if needed."""
|
||||
global _tool_search_config
|
||||
if _tool_search_config is None:
|
||||
_tool_search_config = ToolSearchConfig()
|
||||
return _tool_search_config
|
||||
|
||||
|
||||
def load_tool_search_config_from_dict(data: dict) -> ToolSearchConfig:
|
||||
"""Load tool search config from a dict (called during AppConfig loading)."""
|
||||
global _tool_search_config
|
||||
_tool_search_config = ToolSearchConfig.model_validate(data)
|
||||
return _tool_search_config
|
||||
@@ -0,0 +1,149 @@
|
||||
import os
|
||||
import threading
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
_config_lock = threading.Lock()
|
||||
|
||||
|
||||
class LangSmithTracingConfig(BaseModel):
|
||||
"""Configuration for LangSmith tracing."""
|
||||
|
||||
enabled: bool = Field(...)
|
||||
api_key: str | None = Field(...)
|
||||
project: str = Field(...)
|
||||
endpoint: str = Field(...)
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
return self.enabled and bool(self.api_key)
|
||||
|
||||
def validate(self) -> None:
|
||||
if self.enabled and not self.api_key:
|
||||
raise ValueError("LangSmith tracing is enabled but LANGSMITH_API_KEY (or LANGCHAIN_API_KEY) is not set.")
|
||||
|
||||
|
||||
class LangfuseTracingConfig(BaseModel):
|
||||
"""Configuration for Langfuse tracing."""
|
||||
|
||||
enabled: bool = Field(...)
|
||||
public_key: str | None = Field(...)
|
||||
secret_key: str | None = Field(...)
|
||||
host: str = Field(...)
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
return self.enabled and bool(self.public_key) and bool(self.secret_key)
|
||||
|
||||
def validate(self) -> None:
|
||||
if not self.enabled:
|
||||
return
|
||||
missing: list[str] = []
|
||||
if not self.public_key:
|
||||
missing.append("LANGFUSE_PUBLIC_KEY")
|
||||
if not self.secret_key:
|
||||
missing.append("LANGFUSE_SECRET_KEY")
|
||||
if missing:
|
||||
raise ValueError(f"Langfuse tracing is enabled but required settings are missing: {', '.join(missing)}")
|
||||
|
||||
|
||||
class TracingConfig(BaseModel):
|
||||
"""Tracing configuration for supported providers."""
|
||||
|
||||
langsmith: LangSmithTracingConfig = Field(...)
|
||||
langfuse: LangfuseTracingConfig = Field(...)
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
return bool(self.enabled_providers)
|
||||
|
||||
@property
|
||||
def explicitly_enabled_providers(self) -> list[str]:
|
||||
enabled: list[str] = []
|
||||
if self.langsmith.enabled:
|
||||
enabled.append("langsmith")
|
||||
if self.langfuse.enabled:
|
||||
enabled.append("langfuse")
|
||||
return enabled
|
||||
|
||||
@property
|
||||
def enabled_providers(self) -> list[str]:
|
||||
enabled: list[str] = []
|
||||
if self.langsmith.is_configured:
|
||||
enabled.append("langsmith")
|
||||
if self.langfuse.is_configured:
|
||||
enabled.append("langfuse")
|
||||
return enabled
|
||||
|
||||
def validate_enabled(self) -> None:
|
||||
self.langsmith.validate()
|
||||
self.langfuse.validate()
|
||||
|
||||
|
||||
_tracing_config: TracingConfig | None = None
|
||||
|
||||
|
||||
_TRUTHY_VALUES = {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _env_flag_preferred(*names: str) -> bool:
|
||||
"""Return the boolean value of the first env var that is present and non-empty."""
|
||||
for name in names:
|
||||
value = os.environ.get(name)
|
||||
if value is not None and value.strip():
|
||||
return value.strip().lower() in _TRUTHY_VALUES
|
||||
return False
|
||||
|
||||
|
||||
def _first_env_value(*names: str) -> str | None:
|
||||
"""Return the first non-empty environment value from candidate names."""
|
||||
for name in names:
|
||||
value = os.environ.get(name)
|
||||
if value and value.strip():
|
||||
return value.strip()
|
||||
return None
|
||||
|
||||
|
||||
def get_tracing_config() -> TracingConfig:
|
||||
"""Get the current tracing configuration from environment variables."""
|
||||
global _tracing_config
|
||||
if _tracing_config is not None:
|
||||
return _tracing_config
|
||||
with _config_lock:
|
||||
if _tracing_config is not None:
|
||||
return _tracing_config
|
||||
_tracing_config = TracingConfig(
|
||||
langsmith=LangSmithTracingConfig(
|
||||
enabled=_env_flag_preferred("LANGSMITH_TRACING", "LANGCHAIN_TRACING_V2", "LANGCHAIN_TRACING"),
|
||||
api_key=_first_env_value("LANGSMITH_API_KEY", "LANGCHAIN_API_KEY"),
|
||||
project=_first_env_value("LANGSMITH_PROJECT", "LANGCHAIN_PROJECT") or "deer-flow",
|
||||
endpoint=_first_env_value("LANGSMITH_ENDPOINT", "LANGCHAIN_ENDPOINT") or "https://api.smith.langchain.com",
|
||||
),
|
||||
langfuse=LangfuseTracingConfig(
|
||||
enabled=_env_flag_preferred("LANGFUSE_TRACING"),
|
||||
public_key=_first_env_value("LANGFUSE_PUBLIC_KEY"),
|
||||
secret_key=_first_env_value("LANGFUSE_SECRET_KEY"),
|
||||
host=_first_env_value("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com",
|
||||
),
|
||||
)
|
||||
return _tracing_config
|
||||
|
||||
|
||||
def get_enabled_tracing_providers() -> list[str]:
|
||||
"""Return the configured tracing providers that are enabled and complete."""
|
||||
return get_tracing_config().enabled_providers
|
||||
|
||||
|
||||
def get_explicitly_enabled_tracing_providers() -> list[str]:
|
||||
"""Return tracing providers explicitly enabled by config, even if incomplete."""
|
||||
return get_tracing_config().explicitly_enabled_providers
|
||||
|
||||
|
||||
def validate_enabled_tracing_providers() -> None:
|
||||
"""Validate that any explicitly enabled providers are fully configured."""
|
||||
get_tracing_config().validate_enabled()
|
||||
|
||||
|
||||
def is_tracing_enabled() -> bool:
|
||||
"""Check if any tracing provider is enabled and fully configured."""
|
||||
return get_tracing_config().is_configured
|
||||
Reference in New Issue
Block a user