Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions
--- a/deer-flow/backend/packages/harness/deerflow/config/init.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/init.py
@@ -0,0 +1,30 @@
+from .app_config import get_app_config
+from .extensions_config import ExtensionsConfig, get_extensions_config
+from .memory_config import MemoryConfig, get_memory_config
+from .paths import Paths, get_paths
+from .skill_evolution_config import SkillEvolutionConfig
+from .skills_config import SkillsConfig
+from .tracing_config import (
+    get_enabled_tracing_providers,
+    get_explicitly_enabled_tracing_providers,
+    get_tracing_config,
+    is_tracing_enabled,
+    validate_enabled_tracing_providers,
+)
+
+__all__ = [
+    "get_app_config",
+    "SkillEvolutionConfig",
+    "Paths",
+    "get_paths",
+    "SkillsConfig",
+    "ExtensionsConfig",
+    "get_extensions_config",
+    "MemoryConfig",
+    "get_memory_config",
+    "get_tracing_config",
+    "get_explicitly_enabled_tracing_providers",
+    "get_enabled_tracing_providers",
+    "is_tracing_enabled",
+    "validate_enabled_tracing_providers",
+]
--- a/deer-flow/backend/packages/harness/deerflow/config/acp_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/acp_config.py
@@ -0,0 +1,51 @@
+"""ACP (Agent Client Protocol) agent configuration loaded from config.yaml."""
+
+import logging
+from collections.abc import Mapping
+
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+class ACPAgentConfig(BaseModel):
+    """Configuration for a single ACP-compatible agent."""
+
+    command: str = Field(description="Command to launch the ACP agent subprocess")
+    args: list[str] = Field(default_factory=list, description="Additional command arguments")
+    env: dict[str, str] = Field(default_factory=dict, description="Environment variables to inject into the agent subprocess. Values starting with $ are resolved from host environment variables.")
+    description: str = Field(description="Description of the agent's capabilities (shown in tool description)")
+    model: str | None = Field(default=None, description="Model hint passed to the agent (optional)")
+    auto_approve_permissions: bool = Field(
+        default=False,
+        description=(
+            "When True, DeerFlow automatically approves all ACP permission requests from this agent "
+            "(allow_once preferred over allow_always). When False (default), all permission requests "
+            "are denied — the agent must be configured to operate without requesting permissions."
+        ),
+    )
+
+
+_acp_agents: dict[str, ACPAgentConfig] = {}
+
+
+def get_acp_agents() -> dict[str, ACPAgentConfig]:
+    """Get the currently configured ACP agents.
+
+    Returns:
+        Mapping of agent name -> ACPAgentConfig.  Empty dict if no ACP agents are configured.
+    """
+    return _acp_agents
+
+
+def load_acp_config_from_dict(config_dict: Mapping[str, Mapping[str, object]] | None) -> None:
+    """Load ACP agent configuration from a dictionary (typically from config.yaml).
+
+    Args:
+        config_dict: Mapping of agent name -> config fields.
+    """
+    global _acp_agents
+    if config_dict is None:
+        config_dict = {}
+    _acp_agents = {name: ACPAgentConfig(**cfg) for name, cfg in config_dict.items()}
+    logger.info("ACP config loaded: %d agent(s): %s", len(_acp_agents), list(_acp_agents.keys()))
--- a/deer-flow/backend/packages/harness/deerflow/config/agents_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/agents_config.py
@@ -0,0 +1,125 @@
+"""Configuration and loaders for custom agents."""
+
+import logging
+import re
+from typing import Any
+
+import yaml
+from pydantic import BaseModel
+
+from deerflow.config.paths import get_paths
+
+logger = logging.getLogger(__name__)
+
+SOUL_FILENAME = "SOUL.md"
+AGENT_NAME_PATTERN = re.compile(r"^[A-Za-z0-9-]+$")
+
+
+class AgentConfig(BaseModel):
+    """Configuration for a custom agent."""
+
+    name: str
+    description: str = ""
+    model: str | None = None
+    tool_groups: list[str] | None = None
+    # skills controls which skills are loaded into the agent's prompt:
+    # - None (or omitted): load all enabled skills (default fallback behavior)
+    # - [] (explicit empty list): disable all skills
+    # - ["skill1", "skill2"]: load only the specified skills
+    skills: list[str] | None = None
+
+
+def load_agent_config(name: str | None) -> AgentConfig | None:
+    """Load the custom or default agent's config from its directory.
+
+    Args:
+        name: The agent name.
+
+    Returns:
+        AgentConfig instance.
+
+    Raises:
+        FileNotFoundError: If the agent directory or config.yaml does not exist.
+        ValueError: If config.yaml cannot be parsed.
+    """
+
+    if name is None:
+        return None
+
+    if not AGENT_NAME_PATTERN.match(name):
+        raise ValueError(f"Invalid agent name '{name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}")
+    agent_dir = get_paths().agent_dir(name)
+    config_file = agent_dir / "config.yaml"
+
+    if not agent_dir.exists():
+        raise FileNotFoundError(f"Agent directory not found: {agent_dir}")
+
+    if not config_file.exists():
+        raise FileNotFoundError(f"Agent config not found: {config_file}")
+
+    try:
+        with open(config_file, encoding="utf-8") as f:
+            data: dict[str, Any] = yaml.safe_load(f) or {}
+    except yaml.YAMLError as e:
+        raise ValueError(f"Failed to parse agent config {config_file}: {e}") from e
+
+    # Ensure name is set from directory name if not in file
+    if "name" not in data:
+        data["name"] = name
+
+    # Strip unknown fields before passing to Pydantic (e.g. legacy prompt_file)
+    known_fields = set(AgentConfig.model_fields.keys())
+    data = {k: v for k, v in data.items() if k in known_fields}
+
+    return AgentConfig(**data)
+
+
+def load_agent_soul(agent_name: str | None) -> str | None:
+    """Read the SOUL.md file for a custom agent, if it exists.
+
+    SOUL.md defines the agent's personality, values, and behavioral guardrails.
+    It is injected into the lead agent's system prompt as additional context.
+
+    Args:
+        agent_name: The name of the agent or None for the default agent.
+
+    Returns:
+        The SOUL.md content as a string, or None if the file does not exist.
+    """
+    agent_dir = get_paths().agent_dir(agent_name) if agent_name else get_paths().base_dir
+    soul_path = agent_dir / SOUL_FILENAME
+    if not soul_path.exists():
+        return None
+    content = soul_path.read_text(encoding="utf-8").strip()
+    return content or None
+
+
+def list_custom_agents() -> list[AgentConfig]:
+    """Scan the agents directory and return all valid custom agents.
+
+    Returns:
+        List of AgentConfig for each valid agent directory found.
+    """
+    agents_dir = get_paths().agents_dir
+
+    if not agents_dir.exists():
+        return []
+
+    agents: list[AgentConfig] = []
+
+    for entry in sorted(agents_dir.iterdir()):
+        if not entry.is_dir():
+            continue
+
+        config_file = entry / "config.yaml"
+        if not config_file.exists():
+            logger.debug(f"Skipping {entry.name}: no config.yaml")
+            continue
+
+        try:
+            agent_cfg = load_agent_config(entry.name)
+            agents.append(agent_cfg)
+        except Exception as e:
+            logger.warning(f"Skipping agent '{entry.name}': {e}")
+
+    return agents
--- a/deer-flow/backend/packages/harness/deerflow/config/app_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/app_config.py
@@ -0,0 +1,379 @@
+import logging
+import os
+from contextvars import ContextVar
+from pathlib import Path
+from typing import Any, Self
+
+import yaml
+from dotenv import load_dotenv
+from pydantic import BaseModel, ConfigDict, Field
+
+from deerflow.config.acp_config import load_acp_config_from_dict
+from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
+from deerflow.config.extensions_config import ExtensionsConfig
+from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
+from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
+from deerflow.config.model_config import ModelConfig
+from deerflow.config.sandbox_config import SandboxConfig
+from deerflow.config.skill_evolution_config import SkillEvolutionConfig
+from deerflow.config.skills_config import SkillsConfig
+from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
+from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
+from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
+from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
+from deerflow.config.token_usage_config import TokenUsageConfig
+from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
+from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+
+def _default_config_candidates() -> tuple[Path, ...]:
+    """Return deterministic config.yaml locations without relying on cwd."""
+    backend_dir = Path(__file__).resolve().parents[4]
+    repo_root = backend_dir.parent
+    return (backend_dir / "config.yaml", repo_root / "config.yaml")
+
+
+class AppConfig(BaseModel):
+    """Config for the DeerFlow application"""
+
+    log_level: str = Field(default="info", description="Logging level for deerflow modules (debug/info/warning/error)")
+    token_usage: TokenUsageConfig = Field(default_factory=TokenUsageConfig, description="Token usage tracking configuration")
+    models: list[ModelConfig] = Field(default_factory=list, description="Available models")
+    sandbox: SandboxConfig = Field(description="Sandbox configuration")
+    tools: list[ToolConfig] = Field(default_factory=list, description="Available tools")
+    tool_groups: list[ToolGroupConfig] = Field(default_factory=list, description="Available tool groups")
+    skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
+    skill_evolution: SkillEvolutionConfig = Field(default_factory=SkillEvolutionConfig, description="Agent-managed skill evolution configuration")
+    extensions: ExtensionsConfig = Field(default_factory=ExtensionsConfig, description="Extensions configuration (MCP servers and skills state)")
+    tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig, description="Tool search / deferred loading configuration")
+    title: TitleConfig = Field(default_factory=TitleConfig, description="Automatic title generation configuration")
+    summarization: SummarizationConfig = Field(default_factory=SummarizationConfig, description="Conversation summarization configuration")
+    memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory subsystem configuration")
+    subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
+    guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
+    model_config = ConfigDict(extra="allow", frozen=False)
+    checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration")
+    stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration")
+
+    @classmethod
+    def resolve_config_path(cls, config_path: str | None = None) -> Path:
+        """Resolve the config file path.
+
+        Priority:
+        1. If provided `config_path` argument, use it.
+        2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
+        3. Otherwise, search deterministic backend/repository-root defaults from `_default_config_candidates()`.
+        """
+        if config_path:
+            path = Path(config_path)
+            if not Path.exists(path):
+                raise FileNotFoundError(f"Config file specified by param `config_path` not found at {path}")
+            return path
+        elif os.getenv("DEER_FLOW_CONFIG_PATH"):
+            path = Path(os.getenv("DEER_FLOW_CONFIG_PATH"))
+            if not Path.exists(path):
+                raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
+            return path
+        else:
+            for path in _default_config_candidates():
+                if path.exists():
+                    return path
+            raise FileNotFoundError("`config.yaml` file not found at the default backend or repository root locations")
+
+    @classmethod
+    def from_file(cls, config_path: str | None = None) -> Self:
+        """Load config from YAML file.
+
+        See `resolve_config_path` for more details.
+
+        Args:
+            config_path: Path to the config file.
+
+        Returns:
+            AppConfig: The loaded config.
+        """
+        resolved_path = cls.resolve_config_path(config_path)
+        with open(resolved_path, encoding="utf-8") as f:
+            config_data = yaml.safe_load(f) or {}
+
+        # Check config version before processing
+        cls._check_config_version(config_data, resolved_path)
+
+        config_data = cls.resolve_env_variables(config_data)
+
+        # Load title config if present
+        if "title" in config_data:
+            load_title_config_from_dict(config_data["title"])
+
+        # Load summarization config if present
+        if "summarization" in config_data:
+            load_summarization_config_from_dict(config_data["summarization"])
+
+        # Load memory config if present
+        if "memory" in config_data:
+            load_memory_config_from_dict(config_data["memory"])
+
+        # Load subagents config if present
+        if "subagents" in config_data:
+            load_subagents_config_from_dict(config_data["subagents"])
+
+        # Load tool_search config if present
+        if "tool_search" in config_data:
+            load_tool_search_config_from_dict(config_data["tool_search"])
+
+        # Load guardrails config if present
+        if "guardrails" in config_data:
+            load_guardrails_config_from_dict(config_data["guardrails"])
+
+        # Load checkpointer config if present
+        if "checkpointer" in config_data:
+            load_checkpointer_config_from_dict(config_data["checkpointer"])
+
+        # Load stream bridge config if present
+        if "stream_bridge" in config_data:
+            load_stream_bridge_config_from_dict(config_data["stream_bridge"])
+
+        # Always refresh ACP agent config so removed entries do not linger across reloads.
+        load_acp_config_from_dict(config_data.get("acp_agents", {}))
+
+        # Load extensions config separately (it's in a different file)
+        extensions_config = ExtensionsConfig.from_file()
+        config_data["extensions"] = extensions_config.model_dump()
+
+        result = cls.model_validate(config_data)
+        return result
+
+    @classmethod
+    def _check_config_version(cls, config_data: dict, config_path: Path) -> None:
+        """Check if the user's config.yaml is outdated compared to config.example.yaml.
+
+        Emits a warning if the user's config_version is lower than the example's.
+        Missing config_version is treated as version 0 (pre-versioning).
+        """
+        try:
+            user_version = int(config_data.get("config_version", 0))
+        except (TypeError, ValueError):
+            user_version = 0
+
+        # Find config.example.yaml by searching config.yaml's directory and its parents
+        example_path = None
+        search_dir = config_path.parent
+        for _ in range(5):  # search up to 5 levels
+            candidate = search_dir / "config.example.yaml"
+            if candidate.exists():
+                example_path = candidate
+                break
+            parent = search_dir.parent
+            if parent == search_dir:
+                break
+            search_dir = parent
+        if example_path is None:
+            return
+
+        try:
+            with open(example_path, encoding="utf-8") as f:
+                example_data = yaml.safe_load(f)
+            raw = example_data.get("config_version", 0) if example_data else 0
+            try:
+                example_version = int(raw)
+            except (TypeError, ValueError):
+                example_version = 0
+        except Exception:
+            return
+
+        if user_version < example_version:
+            logger.warning(
+                "Your config.yaml (version %d) is outdated — the latest version is %d. Run `make config-upgrade` to merge new fields into your config.",
+                user_version,
+                example_version,
+            )
+
+    @classmethod
+    def resolve_env_variables(cls, config: Any) -> Any:
+        """Recursively resolve environment variables in the config.
+
+        Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
+
+        Args:
+            config: The config to resolve environment variables in.
+
+        Returns:
+            The config with environment variables resolved.
+        """
+        if isinstance(config, str):
+            if config.startswith("$"):
+                env_value = os.getenv(config[1:])
+                if env_value is None:
+                    raise ValueError(f"Environment variable {config[1:]} not found for config value {config}")
+                return env_value
+            return config
+        elif isinstance(config, dict):
+            return {k: cls.resolve_env_variables(v) for k, v in config.items()}
+        elif isinstance(config, list):
+            return [cls.resolve_env_variables(item) for item in config]
+        return config
+
+    def get_model_config(self, name: str) -> ModelConfig | None:
+        """Get the model config by name.
+
+        Args:
+            name: The name of the model to get the config for.
+
+        Returns:
+            The model config if found, otherwise None.
+        """
+        return next((model for model in self.models if model.name == name), None)
+
+    def get_tool_config(self, name: str) -> ToolConfig | None:
+        """Get the tool config by name.
+
+        Args:
+            name: The name of the tool to get the config for.
+
+        Returns:
+            The tool config if found, otherwise None.
+        """
+        return next((tool for tool in self.tools if tool.name == name), None)
+
+    def get_tool_group_config(self, name: str) -> ToolGroupConfig | None:
+        """Get the tool group config by name.
+
+        Args:
+            name: The name of the tool group to get the config for.
+
+        Returns:
+            The tool group config if found, otherwise None.
+        """
+        return next((group for group in self.tool_groups if group.name == name), None)
+
+
+_app_config: AppConfig | None = None
+_app_config_path: Path | None = None
+_app_config_mtime: float | None = None
+_app_config_is_custom = False
+_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
+_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())
+
+
+def _get_config_mtime(config_path: Path) -> float | None:
+    """Get the modification time of a config file if it exists."""
+    try:
+        return config_path.stat().st_mtime
+    except OSError:
+        return None
+
+
+def _load_and_cache_app_config(config_path: str | None = None) -> AppConfig:
+    """Load config from disk and refresh cache metadata."""
+    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
+
+    resolved_path = AppConfig.resolve_config_path(config_path)
+    _app_config = AppConfig.from_file(str(resolved_path))
+    _app_config_path = resolved_path
+    _app_config_mtime = _get_config_mtime(resolved_path)
+    _app_config_is_custom = False
+    return _app_config
+
+
+def get_app_config() -> AppConfig:
+    """Get the DeerFlow config instance.
+
+    Returns a cached singleton instance and automatically reloads it when the
+    underlying config file path or modification time changes. Use
+    `reload_app_config()` to force a reload, or `reset_app_config()` to clear
+    the cache.
+    """
+    global _app_config, _app_config_path, _app_config_mtime
+
+    runtime_override = _current_app_config.get()
+    if runtime_override is not None:
+        return runtime_override
+
+    if _app_config is not None and _app_config_is_custom:
+        return _app_config
+
+    resolved_path = AppConfig.resolve_config_path()
+    current_mtime = _get_config_mtime(resolved_path)
+
+    should_reload = _app_config is None or _app_config_path != resolved_path or _app_config_mtime != current_mtime
+    if should_reload:
+        if _app_config_path == resolved_path and _app_config_mtime is not None and current_mtime is not None and _app_config_mtime != current_mtime:
+            logger.info(
+                "Config file has been modified (mtime: %s -> %s), reloading AppConfig",
+                _app_config_mtime,
+                current_mtime,
+            )
+        _load_and_cache_app_config(str(resolved_path))
+    return _app_config
+
+
+def reload_app_config(config_path: str | None = None) -> AppConfig:
+    """Reload the config from file and update the cached instance.
+
+    This is useful when the config file has been modified and you want
+    to pick up the changes without restarting the application.
+
+    Args:
+        config_path: Optional path to config file. If not provided,
+                     uses the default resolution strategy.
+
+    Returns:
+        The newly loaded AppConfig instance.
+    """
+    return _load_and_cache_app_config(config_path)
+
+
+def reset_app_config() -> None:
+    """Reset the cached config instance.
+
+    This clears the singleton cache, causing the next call to
+    `get_app_config()` to reload from file. Useful for testing
+    or when switching between different configurations.
+    """
+    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
+    _app_config = None
+    _app_config_path = None
+    _app_config_mtime = None
+    _app_config_is_custom = False
+
+
+def set_app_config(config: AppConfig) -> None:
+    """Set a custom config instance.
+
+    This allows injecting a custom or mock config for testing purposes.
+
+    Args:
+        config: The AppConfig instance to use.
+    """
+    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
+    _app_config = config
+    _app_config_path = None
+    _app_config_mtime = None
+    _app_config_is_custom = True
+
+
+def peek_current_app_config() -> AppConfig | None:
+    """Return the runtime-scoped AppConfig override, if one is active."""
+    return _current_app_config.get()
+
+
+def push_current_app_config(config: AppConfig) -> None:
+    """Push a runtime-scoped AppConfig override for the current execution context."""
+    stack = _current_app_config_stack.get()
+    _current_app_config_stack.set(stack + (_current_app_config.get(),))
+    _current_app_config.set(config)
+
+
+def pop_current_app_config() -> None:
+    """Pop the latest runtime-scoped AppConfig override for the current execution context."""
+    stack = _current_app_config_stack.get()
+    if not stack:
+        _current_app_config.set(None)
+        return
+    previous = stack[-1]
+    _current_app_config_stack.set(stack[:-1])
+    _current_app_config.set(previous)
--- a/deer-flow/backend/packages/harness/deerflow/config/checkpointer_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/checkpointer_config.py
@@ -0,0 +1,46 @@
+"""Configuration for LangGraph checkpointer."""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+CheckpointerType = Literal["memory", "sqlite", "postgres"]
+
+
+class CheckpointerConfig(BaseModel):
+    """Configuration for LangGraph state persistence checkpointer."""
+
+    type: CheckpointerType = Field(
+        description="Checkpointer backend type. "
+        "'memory' is in-process only (lost on restart). "
+        "'sqlite' persists to a local file (requires langgraph-checkpoint-sqlite). "
+        "'postgres' persists to PostgreSQL (requires langgraph-checkpoint-postgres)."
+    )
+    connection_string: str | None = Field(
+        default=None,
+        description="Connection string for sqlite (file path) or postgres (DSN). "
+        "Required for sqlite and postgres types. "
+        "For sqlite, use a file path like '.deer-flow/checkpoints.db' or ':memory:' for in-memory. "
+        "For postgres, use a DSN like 'postgresql://user:pass@localhost:5432/db'.",
+    )
+
+
+# Global configuration instance — None means no checkpointer is configured.
+_checkpointer_config: CheckpointerConfig | None = None
+
+
+def get_checkpointer_config() -> CheckpointerConfig | None:
+    """Get the current checkpointer configuration, or None if not configured."""
+    return _checkpointer_config
+
+
+def set_checkpointer_config(config: CheckpointerConfig | None) -> None:
+    """Set the checkpointer configuration."""
+    global _checkpointer_config
+    _checkpointer_config = config
+
+
+def load_checkpointer_config_from_dict(config_dict: dict) -> None:
+    """Load checkpointer configuration from a dictionary."""
+    global _checkpointer_config
+    _checkpointer_config = CheckpointerConfig(**config_dict)
--- a/deer-flow/backend/packages/harness/deerflow/config/extensions_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/extensions_config.py
@@ -0,0 +1,256 @@
+"""Unified extensions configuration for MCP servers and skills."""
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class McpOAuthConfig(BaseModel):
+    """OAuth configuration for an MCP server (HTTP/SSE transports)."""
+
+    enabled: bool = Field(default=True, description="Whether OAuth token injection is enabled")
+    token_url: str = Field(description="OAuth token endpoint URL")
+    grant_type: Literal["client_credentials", "refresh_token"] = Field(
+        default="client_credentials",
+        description="OAuth grant type",
+    )
+    client_id: str | None = Field(default=None, description="OAuth client ID")
+    client_secret: str | None = Field(default=None, description="OAuth client secret")
+    refresh_token: str | None = Field(default=None, description="OAuth refresh token (for refresh_token grant)")
+    scope: str | None = Field(default=None, description="OAuth scope")
+    audience: str | None = Field(default=None, description="OAuth audience (provider-specific)")
+    token_field: str = Field(default="access_token", description="Field name containing access token in token response")
+    token_type_field: str = Field(default="token_type", description="Field name containing token type in token response")
+    expires_in_field: str = Field(default="expires_in", description="Field name containing expiry (seconds) in token response")
+    default_token_type: str = Field(default="Bearer", description="Default token type when missing in token response")
+    refresh_skew_seconds: int = Field(default=60, description="Refresh token this many seconds before expiry")
+    extra_token_params: dict[str, str] = Field(default_factory=dict, description="Additional form params sent to token endpoint")
+    model_config = ConfigDict(extra="allow")
+
+
+class McpServerConfig(BaseModel):
+    """Configuration for a single MCP server."""
+
+    enabled: bool = Field(default=True, description="Whether this MCP server is enabled")
+    type: str = Field(default="stdio", description="Transport type: 'stdio', 'sse', or 'http'")
+    command: str | None = Field(default=None, description="Command to execute to start the MCP server (for stdio type)")
+    args: list[str] = Field(default_factory=list, description="Arguments to pass to the command (for stdio type)")
+    env: dict[str, str] = Field(default_factory=dict, description="Environment variables for the MCP server")
+    url: str | None = Field(default=None, description="URL of the MCP server (for sse or http type)")
+    headers: dict[str, str] = Field(default_factory=dict, description="HTTP headers to send (for sse or http type)")
+    oauth: McpOAuthConfig | None = Field(default=None, description="OAuth configuration (for sse or http type)")
+    description: str = Field(default="", description="Human-readable description of what this MCP server provides")
+    model_config = ConfigDict(extra="allow")
+
+
+class SkillStateConfig(BaseModel):
+    """Configuration for a single skill's state."""
+
+    enabled: bool = Field(default=True, description="Whether this skill is enabled")
+
+
+class ExtensionsConfig(BaseModel):
+    """Unified configuration for MCP servers and skills."""
+
+    mcp_servers: dict[str, McpServerConfig] = Field(
+        default_factory=dict,
+        description="Map of MCP server name to configuration",
+        alias="mcpServers",
+    )
+    skills: dict[str, SkillStateConfig] = Field(
+        default_factory=dict,
+        description="Map of skill name to state configuration",
+    )
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    @classmethod
+    def resolve_config_path(cls, config_path: str | None = None) -> Path | None:
+        """Resolve the extensions config file path.
+
+        Priority:
+        1. If provided `config_path` argument, use it.
+        2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
+        3. Otherwise, check for `extensions_config.json` in the current directory, then in the parent directory.
+        4. For backward compatibility, also check for `mcp_config.json` if `extensions_config.json` is not found.
+        5. If not found, return None (extensions are optional).
+
+        Args:
+            config_path: Optional path to extensions config file.
+
+        Resolution order:
+            1. If provided `config_path` argument, use it.
+            2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
+            3. Otherwise, search backend/repository-root defaults for
+               `extensions_config.json`, then legacy `mcp_config.json`.
+
+        Returns:
+            Path to the extensions config file if found, otherwise None.
+        """
+        if config_path:
+            path = Path(config_path)
+            if not path.exists():
+                raise FileNotFoundError(f"Extensions config file specified by param `config_path` not found at {path}")
+            return path
+        elif os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"):
+            path = Path(os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"))
+            if not path.exists():
+                raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
+            return path
+        else:
+            backend_dir = Path(__file__).resolve().parents[4]
+            repo_root = backend_dir.parent
+            for path in (
+                backend_dir / "extensions_config.json",
+                repo_root / "extensions_config.json",
+                backend_dir / "mcp_config.json",
+                repo_root / "mcp_config.json",
+            ):
+                if path.exists():
+                    return path
+
+            # Extensions are optional, so return None if not found
+            return None
+
+    @classmethod
+    def from_file(cls, config_path: str | None = None) -> "ExtensionsConfig":
+        """Load extensions config from JSON file.
+
+        See `resolve_config_path` for more details.
+
+        Args:
+            config_path: Path to the extensions config file.
+
+        Returns:
+            ExtensionsConfig: The loaded config, or empty config if file not found.
+        """
+        resolved_path = cls.resolve_config_path(config_path)
+        if resolved_path is None:
+            # Return empty config if extensions config file is not found
+            return cls(mcp_servers={}, skills={})
+
+        try:
+            with open(resolved_path, encoding="utf-8") as f:
+                config_data = json.load(f)
+            cls.resolve_env_variables(config_data)
+            return cls.model_validate(config_data)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Extensions config file at {resolved_path} is not valid JSON: {e}") from e
+        except Exception as e:
+            raise RuntimeError(f"Failed to load extensions config from {resolved_path}: {e}") from e
+
+    @classmethod
+    def resolve_env_variables(cls, config: dict[str, Any]) -> dict[str, Any]:
+        """Recursively resolve environment variables in the config.
+
+        Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
+
+        Args:
+            config: The config to resolve environment variables in.
+
+        Returns:
+            The config with environment variables resolved.
+        """
+        for key, value in config.items():
+            if isinstance(value, str):
+                if value.startswith("$"):
+                    env_value = os.getenv(value[1:])
+                    if env_value is None:
+                        # Unresolved placeholder — store empty string so downstream
+                        # consumers (e.g. MCP servers) don't receive the literal "$VAR"
+                        # token as an actual environment value.
+                        config[key] = ""
+                    else:
+                        config[key] = env_value
+                else:
+                    config[key] = value
+            elif isinstance(value, dict):
+                config[key] = cls.resolve_env_variables(value)
+            elif isinstance(value, list):
+                config[key] = [cls.resolve_env_variables(item) if isinstance(item, dict) else item for item in value]
+        return config
+
+    def get_enabled_mcp_servers(self) -> dict[str, McpServerConfig]:
+        """Get only the enabled MCP servers.
+
+        Returns:
+            Dictionary of enabled MCP servers.
+        """
+        return {name: config for name, config in self.mcp_servers.items() if config.enabled}
+
+    def is_skill_enabled(self, skill_name: str, skill_category: str) -> bool:
+        """Check if a skill is enabled.
+
+        Args:
+            skill_name: Name of the skill
+            skill_category: Category of the skill
+
+        Returns:
+            True if enabled, False otherwise
+        """
+        skill_config = self.skills.get(skill_name)
+        if skill_config is None:
+            # Default to enable for public & custom skill
+            return skill_category in ("public", "custom")
+        return skill_config.enabled
+
+
+_extensions_config: ExtensionsConfig | None = None
+
+
+def get_extensions_config() -> ExtensionsConfig:
+    """Get the extensions config instance.
+
+    Returns a cached singleton instance. Use `reload_extensions_config()` to reload
+    from file, or `reset_extensions_config()` to clear the cache.
+
+    Returns:
+        The cached ExtensionsConfig instance.
+    """
+    global _extensions_config
+    if _extensions_config is None:
+        _extensions_config = ExtensionsConfig.from_file()
+    return _extensions_config
+
+
+def reload_extensions_config(config_path: str | None = None) -> ExtensionsConfig:
+    """Reload the extensions config from file and update the cached instance.
+
+    This is useful when the config file has been modified and you want
+    to pick up the changes without restarting the application.
+
+    Args:
+        config_path: Optional path to extensions config file. If not provided,
+                     uses the default resolution strategy.
+
+    Returns:
+        The newly loaded ExtensionsConfig instance.
+    """
+    global _extensions_config
+    _extensions_config = ExtensionsConfig.from_file(config_path)
+    return _extensions_config
+
+
+def reset_extensions_config() -> None:
+    """Reset the cached extensions config instance.
+
+    This clears the singleton cache, causing the next call to
+    `get_extensions_config()` to reload from file. Useful for testing
+    or when switching between different configurations.
+    """
+    global _extensions_config
+    _extensions_config = None
+
+
+def set_extensions_config(config: ExtensionsConfig) -> None:
+    """Set a custom extensions config instance.
+
+    This allows injecting a custom or mock config for testing purposes.
+
+    Args:
+        config: The ExtensionsConfig instance to use.
+    """
+    global _extensions_config
+    _extensions_config = config
--- a/deer-flow/backend/packages/harness/deerflow/config/guardrails_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/guardrails_config.py
@@ -0,0 +1,48 @@
+"""Configuration for pre-tool-call authorization."""
+
+from pydantic import BaseModel, Field
+
+
+class GuardrailProviderConfig(BaseModel):
+    """Configuration for a guardrail provider."""
+
+    use: str = Field(description="Class path (e.g. 'deerflow.guardrails.builtin:AllowlistProvider')")
+    config: dict = Field(default_factory=dict, description="Provider-specific settings passed as kwargs")
+
+
+class GuardrailsConfig(BaseModel):
+    """Configuration for pre-tool-call authorization.
+
+    When enabled, every tool call passes through the configured provider
+    before execution. The provider receives tool name, arguments, and the
+    agent's passport reference, and returns an allow/deny decision.
+    """
+
+    enabled: bool = Field(default=False, description="Enable guardrail middleware")
+    fail_closed: bool = Field(default=True, description="Block tool calls if provider errors")
+    passport: str | None = Field(default=None, description="OAP passport path or hosted agent ID")
+    provider: GuardrailProviderConfig | None = Field(default=None, description="Guardrail provider configuration")
+
+
+_guardrails_config: GuardrailsConfig | None = None
+
+
+def get_guardrails_config() -> GuardrailsConfig:
+    """Get the guardrails config, returning defaults if not loaded."""
+    global _guardrails_config
+    if _guardrails_config is None:
+        _guardrails_config = GuardrailsConfig()
+    return _guardrails_config
+
+
+def load_guardrails_config_from_dict(data: dict) -> GuardrailsConfig:
+    """Load guardrails config from a dict (called during AppConfig loading)."""
+    global _guardrails_config
+    _guardrails_config = GuardrailsConfig.model_validate(data)
+    return _guardrails_config
+
+
+def reset_guardrails_config() -> None:
+    """Reset the cached config instance. Used in tests to prevent singleton leaks."""
+    global _guardrails_config
+    _guardrails_config = None
--- a/deer-flow/backend/packages/harness/deerflow/config/memory_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/memory_config.py
@@ -0,0 +1,82 @@
+"""Configuration for memory mechanism."""
+
+from pydantic import BaseModel, Field
+
+
+class MemoryConfig(BaseModel):
+    """Configuration for global memory mechanism."""
+
+    enabled: bool = Field(
+        default=True,
+        description="Whether to enable memory mechanism",
+    )
+    storage_path: str = Field(
+        default="",
+        description=(
+            "Path to store memory data. "
+            "If empty, defaults to `{base_dir}/memory.json` (see Paths.memory_file). "
+            "Absolute paths are used as-is. "
+            "Relative paths are resolved against `Paths.base_dir` "
+            "(not the backend working directory). "
+            "Note: if you previously set this to `.deer-flow/memory.json`, "
+            "the file will now be resolved as `{base_dir}/.deer-flow/memory.json`; "
+            "migrate existing data or use an absolute path to preserve the old location."
+        ),
+    )
+    storage_class: str = Field(
+        default="deerflow.agents.memory.storage.FileMemoryStorage",
+        description="The class path for memory storage provider",
+    )
+    debounce_seconds: int = Field(
+        default=30,
+        ge=1,
+        le=300,
+        description="Seconds to wait before processing queued updates (debounce)",
+    )
+    model_name: str | None = Field(
+        default=None,
+        description="Model name to use for memory updates (None = use default model)",
+    )
+    max_facts: int = Field(
+        default=100,
+        ge=10,
+        le=500,
+        description="Maximum number of facts to store",
+    )
+    fact_confidence_threshold: float = Field(
+        default=0.7,
+        ge=0.0,
+        le=1.0,
+        description="Minimum confidence threshold for storing facts",
+    )
+    injection_enabled: bool = Field(
+        default=True,
+        description="Whether to inject memory into system prompt",
+    )
+    max_injection_tokens: int = Field(
+        default=2000,
+        ge=100,
+        le=8000,
+        description="Maximum tokens to use for memory injection",
+    )
+
+
+# Global configuration instance
+_memory_config: MemoryConfig = MemoryConfig()
+
+
+def get_memory_config() -> MemoryConfig:
+    """Get the current memory configuration."""
+    return _memory_config
+
+
+def set_memory_config(config: MemoryConfig) -> None:
+    """Set the memory configuration."""
+    global _memory_config
+    _memory_config = config
+
+
+def load_memory_config_from_dict(config_dict: dict) -> None:
+    """Load memory configuration from a dictionary."""
+    global _memory_config
+    _memory_config = MemoryConfig(**config_dict)
--- a/deer-flow/backend/packages/harness/deerflow/config/model_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/model_config.py
@@ -0,0 +1,41 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class ModelConfig(BaseModel):
+    """Config section for a model"""
+
+    name: str = Field(..., description="Unique name for the model")
+    display_name: str | None = Field(..., default_factory=lambda: None, description="Display name for the model")
+    description: str | None = Field(..., default_factory=lambda: None, description="Description for the model")
+    use: str = Field(
+        ...,
+        description="Class path of the model provider(e.g. langchain_openai.ChatOpenAI)",
+    )
+    model: str = Field(..., description="Model name")
+    model_config = ConfigDict(extra="allow")
+    use_responses_api: bool | None = Field(
+        default=None,
+        description="Whether to route OpenAI ChatOpenAI calls through the /v1/responses API",
+    )
+    output_version: str | None = Field(
+        default=None,
+        description="Structured output version for OpenAI responses content, e.g. responses/v1",
+    )
+    supports_thinking: bool = Field(default_factory=lambda: False, description="Whether the model supports thinking")
+    supports_reasoning_effort: bool = Field(default_factory=lambda: False, description="Whether the model supports reasoning effort")
+    when_thinking_enabled: dict | None = Field(
+        default_factory=lambda: None,
+        description="Extra settings to be passed to the model when thinking is enabled",
+    )
+    when_thinking_disabled: dict | None = Field(
+        default_factory=lambda: None,
+        description="Extra settings to be passed to the model when thinking is disabled",
+    )
+    supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
+    thinking: dict | None = Field(
+        default_factory=lambda: None,
+        description=(
+            "Thinking settings for the model. If provided, these settings will be passed to the model when thinking is enabled. "
+            "This is a shortcut for `when_thinking_enabled` and will be merged with `when_thinking_enabled` if both are provided."
+        ),
+    )
--- a/deer-flow/backend/packages/harness/deerflow/config/paths.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/paths.py
@@ -0,0 +1,306 @@
+import os
+import re
+import shutil
+from pathlib import Path, PureWindowsPath
+
+# Virtual path prefix seen by agents inside the sandbox
+VIRTUAL_PATH_PREFIX = "/mnt/user-data"
+
+_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
+
+
+def _default_local_base_dir() -> Path:
+    """Return the repo-local DeerFlow state directory without relying on cwd."""
+    backend_dir = Path(__file__).resolve().parents[4]
+    return backend_dir / ".deer-flow"
+
+
+def _validate_thread_id(thread_id: str) -> str:
+    """Validate a thread ID before using it in filesystem paths."""
+    if not _SAFE_THREAD_ID_RE.match(thread_id):
+        raise ValueError(f"Invalid thread_id {thread_id!r}: only alphanumeric characters, hyphens, and underscores are allowed.")
+    return thread_id
+
+
+def _join_host_path(base: str, *parts: str) -> str:
+    """Join host filesystem path segments while preserving native style.
+
+    Docker Desktop on Windows expects bind mount sources to stay in Windows
+    path form (for example ``C:\\repo\\backend\\.deer-flow``).  Using
+    ``Path(base) / ...`` on a POSIX host can accidentally rewrite those paths
+    with mixed separators, so this helper preserves the original style.
+    """
+    if not parts:
+        return base
+
+    if re.match(r"^[A-Za-z]:[\\/]", base) or base.startswith("\\\\") or "\\" in base:
+        result = PureWindowsPath(base)
+        for part in parts:
+            result /= part
+        return str(result)
+
+    result = Path(base)
+    for part in parts:
+        result /= part
+    return str(result)
+
+
+def join_host_path(base: str, *parts: str) -> str:
+    """Join host filesystem path segments while preserving native style."""
+    return _join_host_path(base, *parts)
+
+
+class Paths:
+    """
+    Centralized path configuration for DeerFlow application data.
+
+    Directory layout (host side):
+        {base_dir}/
+        ├── memory.json
+        ├── USER.md          <-- global user profile (injected into all agents)
+        ├── agents/
+        │   └── {agent_name}/
+        │       ├── config.yaml
+        │       ├── SOUL.md  <-- agent personality/identity (injected alongside lead prompt)
+        │       └── memory.json
+        └── threads/
+            └── {thread_id}/
+                └── user-data/         <-- mounted as /mnt/user-data/ inside sandbox
+                    ├── workspace/     <-- /mnt/user-data/workspace/
+                    ├── uploads/       <-- /mnt/user-data/uploads/
+                    └── outputs/       <-- /mnt/user-data/outputs/
+
+    BaseDir resolution (in priority order):
+        1. Constructor argument `base_dir`
+        2. DEER_FLOW_HOME environment variable
+        3. Repo-local fallback derived from this module path: `{backend_dir}/.deer-flow`
+    """
+
+    def __init__(self, base_dir: str | Path | None = None) -> None:
+        self._base_dir = Path(base_dir).resolve() if base_dir is not None else None
+
+    @property
+    def host_base_dir(self) -> Path:
+        """Host-visible base dir for Docker volume mount sources.
+
+        When running inside Docker with a mounted Docker socket (DooD), the Docker
+        daemon runs on the host and resolves mount paths against the host filesystem.
+        Set DEER_FLOW_HOST_BASE_DIR to the host-side path that corresponds to this
+        container's base_dir so that sandbox container volume mounts work correctly.
+
+        Falls back to base_dir when the env var is not set (native/local execution).
+        """
+        if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
+            return Path(env)
+        return self.base_dir
+
+    def _host_base_dir_str(self) -> str:
+        """Return the host base dir as a raw string for bind mounts."""
+        if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
+            return env
+        return str(self.base_dir)
+
+    @property
+    def base_dir(self) -> Path:
+        """Root directory for all application data."""
+        if self._base_dir is not None:
+            return self._base_dir
+
+        if env_home := os.getenv("DEER_FLOW_HOME"):
+            return Path(env_home).resolve()
+
+        return _default_local_base_dir()
+
+    @property
+    def memory_file(self) -> Path:
+        """Path to the persisted memory file: `{base_dir}/memory.json`."""
+        return self.base_dir / "memory.json"
+
+    @property
+    def user_md_file(self) -> Path:
+        """Path to the global user profile file: `{base_dir}/USER.md`."""
+        return self.base_dir / "USER.md"
+
+    @property
+    def agents_dir(self) -> Path:
+        """Root directory for all custom agents: `{base_dir}/agents/`."""
+        return self.base_dir / "agents"
+
+    def agent_dir(self, name: str) -> Path:
+        """Directory for a specific agent: `{base_dir}/agents/{name}/`."""
+        return self.agents_dir / name.lower()
+
+    def agent_memory_file(self, name: str) -> Path:
+        """Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
+        return self.agent_dir(name) / "memory.json"
+
+    def thread_dir(self, thread_id: str) -> Path:
+        """
+        Host path for a thread's data: `{base_dir}/threads/{thread_id}/`
+
+        This directory contains a `user-data/` subdirectory that is mounted
+        as `/mnt/user-data/` inside the sandbox.
+
+        Raises:
+            ValueError: If `thread_id` contains unsafe characters (path separators
+                        or `..`) that could cause directory traversal.
+        """
+        return self.base_dir / "threads" / _validate_thread_id(thread_id)
+
+    def sandbox_work_dir(self, thread_id: str) -> Path:
+        """
+        Host path for the agent's workspace directory.
+        Host: `{base_dir}/threads/{thread_id}/user-data/workspace/`
+        Sandbox: `/mnt/user-data/workspace/`
+        """
+        return self.thread_dir(thread_id) / "user-data" / "workspace"
+
+    def sandbox_uploads_dir(self, thread_id: str) -> Path:
+        """
+        Host path for user-uploaded files.
+        Host: `{base_dir}/threads/{thread_id}/user-data/uploads/`
+        Sandbox: `/mnt/user-data/uploads/`
+        """
+        return self.thread_dir(thread_id) / "user-data" / "uploads"
+
+    def sandbox_outputs_dir(self, thread_id: str) -> Path:
+        """
+        Host path for agent-generated artifacts.
+        Host: `{base_dir}/threads/{thread_id}/user-data/outputs/`
+        Sandbox: `/mnt/user-data/outputs/`
+        """
+        return self.thread_dir(thread_id) / "user-data" / "outputs"
+
+    def acp_workspace_dir(self, thread_id: str) -> Path:
+        """
+        Host path for the ACP workspace of a specific thread.
+        Host: `{base_dir}/threads/{thread_id}/acp-workspace/`
+        Sandbox: `/mnt/acp-workspace/`
+
+        Each thread gets its own isolated ACP workspace so that concurrent
+        sessions cannot read each other's ACP agent outputs.
+        """
+        return self.thread_dir(thread_id) / "acp-workspace"
+
+    def sandbox_user_data_dir(self, thread_id: str) -> Path:
+        """
+        Host path for the user-data root.
+        Host: `{base_dir}/threads/{thread_id}/user-data/`
+        Sandbox: `/mnt/user-data/`
+        """
+        return self.thread_dir(thread_id) / "user-data"
+
+    def host_thread_dir(self, thread_id: str) -> str:
+        """Host path for a thread directory, preserving Windows path syntax."""
+        return _join_host_path(self._host_base_dir_str(), "threads", _validate_thread_id(thread_id))
+
+    def host_sandbox_user_data_dir(self, thread_id: str) -> str:
+        """Host path for a thread's user-data root."""
+        return _join_host_path(self.host_thread_dir(thread_id), "user-data")
+
+    def host_sandbox_work_dir(self, thread_id: str) -> str:
+        """Host path for the workspace mount source."""
+        return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "workspace")
+
+    def host_sandbox_uploads_dir(self, thread_id: str) -> str:
+        """Host path for the uploads mount source."""
+        return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "uploads")
+
+    def host_sandbox_outputs_dir(self, thread_id: str) -> str:
+        """Host path for the outputs mount source."""
+        return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "outputs")
+
+    def host_acp_workspace_dir(self, thread_id: str) -> str:
+        """Host path for the ACP workspace mount source."""
+        return _join_host_path(self.host_thread_dir(thread_id), "acp-workspace")
+
+    def ensure_thread_dirs(self, thread_id: str) -> None:
+        """Create all standard sandbox directories for a thread.
+
+        Directories are created with mode 0o777 so that sandbox containers
+        (which may run as a different UID than the host backend process) can
+        write to the volume-mounted paths without "Permission denied" errors.
+        The explicit chmod() call is necessary because Path.mkdir(mode=...) is
+        subject to the process umask and may not yield the intended permissions.
+
+        Includes the ACP workspace directory so it can be volume-mounted into
+        the sandbox container at ``/mnt/acp-workspace`` even before the first
+        ACP agent invocation.
+        """
+        for d in [
+            self.sandbox_work_dir(thread_id),
+            self.sandbox_uploads_dir(thread_id),
+            self.sandbox_outputs_dir(thread_id),
+            self.acp_workspace_dir(thread_id),
+        ]:
+            d.mkdir(parents=True, exist_ok=True)
+            d.chmod(0o777)
+
+    def delete_thread_dir(self, thread_id: str) -> None:
+        """Delete all persisted data for a thread.
+
+        The operation is idempotent: missing thread directories are ignored.
+        """
+        thread_dir = self.thread_dir(thread_id)
+        if thread_dir.exists():
+            shutil.rmtree(thread_dir)
+
+    def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
+        """Resolve a sandbox virtual path to the actual host filesystem path.
+
+        Args:
+            thread_id: The thread ID.
+            virtual_path: Virtual path as seen inside the sandbox, e.g.
+                          ``/mnt/user-data/outputs/report.pdf``.
+                          Leading slashes are stripped before matching.
+
+        Returns:
+            The resolved absolute host filesystem path.
+
+        Raises:
+            ValueError: If the path does not start with the expected virtual
+                        prefix or a path-traversal attempt is detected.
+        """
+        stripped = virtual_path.lstrip("/")
+        prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
+
+        # Require an exact segment-boundary match to avoid prefix confusion
+        # (e.g. reject paths like "mnt/user-dataX/...").
+        if stripped != prefix and not stripped.startswith(prefix + "/"):
+            raise ValueError(f"Path must start with /{prefix}")
+
+        relative = stripped[len(prefix) :].lstrip("/")
+        base = self.sandbox_user_data_dir(thread_id).resolve()
+        actual = (base / relative).resolve()
+
+        try:
+            actual.relative_to(base)
+        except ValueError:
+            raise ValueError("Access denied: path traversal detected")
+
+        return actual
+
+
+# ── Singleton ────────────────────────────────────────────────────────────
+
+_paths: Paths | None = None
+
+
+def get_paths() -> Paths:
+    """Return the global Paths singleton (lazy-initialized)."""
+    global _paths
+    if _paths is None:
+        _paths = Paths()
+    return _paths
+
+
+def resolve_path(path: str) -> Path:
+    """Resolve *path* to an absolute ``Path``.
+
+    Relative paths are resolved relative to the application base directory.
+    Absolute paths are returned as-is (after normalisation).
+    """
+    p = Path(path)
+    if not p.is_absolute():
+        p = get_paths().base_dir / path
+    return p.resolve()
--- a/deer-flow/backend/packages/harness/deerflow/config/sandbox_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/sandbox_config.py
@@ -0,0 +1,83 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class VolumeMountConfig(BaseModel):
+    """Configuration for a volume mount."""
+
+    host_path: str = Field(..., description="Path on the host machine")
+    container_path: str = Field(..., description="Path inside the container")
+    read_only: bool = Field(default=False, description="Whether the mount is read-only")
+
+
+class SandboxConfig(BaseModel):
+    """Config section for a sandbox.
+
+    Common options:
+        use: Class path of the sandbox provider (required)
+        allow_host_bash: Enable host-side bash execution for LocalSandboxProvider.
+            Dangerous and intended only for fully trusted local workflows.
+
+    AioSandboxProvider specific options:
+        image: Docker image to use (default: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest)
+        port: Base port for sandbox containers (default: 8080)
+        replicas: Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.
+        container_prefix: Prefix for container names (default: deer-flow-sandbox)
+        idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
+        mounts: List of volume mounts to share directories with the container
+        environment: Environment variables to inject into the container (values starting with $ are resolved from host env)
+    """
+
+    use: str = Field(
+        ...,
+        description="Class path of the sandbox provider (e.g. deerflow.sandbox.local:LocalSandboxProvider)",
+    )
+    allow_host_bash: bool = Field(
+        default=False,
+        description="Allow the bash tool to execute directly on the host when using LocalSandboxProvider. Dangerous; intended only for fully trusted local environments.",
+    )
+    image: str | None = Field(
+        default=None,
+        description="Docker image to use for the sandbox container",
+    )
+    port: int | None = Field(
+        default=None,
+        description="Base port for sandbox containers",
+    )
+    replicas: int | None = Field(
+        default=None,
+        description="Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.",
+    )
+    container_prefix: str | None = Field(
+        default=None,
+        description="Prefix for container names",
+    )
+    idle_timeout: int | None = Field(
+        default=None,
+        description="Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.",
+    )
+    mounts: list[VolumeMountConfig] = Field(
+        default_factory=list,
+        description="List of volume mounts to share directories between host and container",
+    )
+    environment: dict[str, str] = Field(
+        default_factory=dict,
+        description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
+    )
+
+    bash_output_max_chars: int = Field(
+        default=20000,
+        ge=0,
+        description="Maximum characters to keep from bash tool output. Output exceeding this limit is middle-truncated (head + tail), preserving the first and last half. Set to 0 to disable truncation.",
+    )
+    read_file_output_max_chars: int = Field(
+        default=50000,
+        ge=0,
+        description="Maximum characters to keep from read_file tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
+    )
+    ls_output_max_chars: int = Field(
+        default=20000,
+        ge=0,
+        description="Maximum characters to keep from ls tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
+    )
+
+    model_config = ConfigDict(extra="allow")
--- a/deer-flow/backend/packages/harness/deerflow/config/skill_evolution_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/skill_evolution_config.py
@@ -0,0 +1,14 @@
+from pydantic import BaseModel, Field
+
+
+class SkillEvolutionConfig(BaseModel):
+    """Configuration for agent-managed skill evolution."""
+
+    enabled: bool = Field(
+        default=False,
+        description="Whether the agent can create and modify skills under skills/custom.",
+    )
+    moderation_model_name: str | None = Field(
+        default=None,
+        description="Optional model name for skill security moderation. Defaults to the primary chat model.",
+    )
--- a/deer-flow/backend/packages/harness/deerflow/config/skills_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/skills_config.py
@@ -0,0 +1,54 @@
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+
+def _default_repo_root() -> Path:
+    """Resolve the repo root without relying on the current working directory."""
+    return Path(__file__).resolve().parents[5]
+
+
+class SkillsConfig(BaseModel):
+    """Configuration for skills system"""
+
+    path: str | None = Field(
+        default=None,
+        description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory",
+    )
+    container_path: str = Field(
+        default="/mnt/skills",
+        description="Path where skills are mounted in the sandbox container",
+    )
+
+    def get_skills_path(self) -> Path:
+        """
+        Get the resolved skills directory path.
+
+        Returns:
+            Path to the skills directory
+        """
+        if self.path:
+            # Use configured path (can be absolute or relative)
+            path = Path(self.path)
+            if not path.is_absolute():
+                # If relative, resolve from the repo root for deterministic behavior.
+                path = _default_repo_root() / path
+            return path.resolve()
+        else:
+            # Default: ../skills relative to backend directory
+            from deerflow.skills.loader import get_skills_root_path
+
+            return get_skills_root_path()
+
+    def get_skill_container_path(self, skill_name: str, category: str = "public") -> str:
+        """
+        Get the full container path for a specific skill.
+
+        Args:
+            skill_name: Name of the skill (directory name)
+            category: Category of the skill (public or custom)
+
+        Returns:
+            Full path to the skill in the container
+        """
+        return f"{self.container_path}/{category}/{skill_name}"
--- a/deer-flow/backend/packages/harness/deerflow/config/stream_bridge_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/stream_bridge_config.py
@@ -0,0 +1,46 @@
+"""Configuration for stream bridge."""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+StreamBridgeType = Literal["memory", "redis"]
+
+
+class StreamBridgeConfig(BaseModel):
+    """Configuration for the stream bridge that connects agent workers to SSE endpoints."""
+
+    type: StreamBridgeType = Field(
+        default="memory",
+        description="Stream bridge backend type. 'memory' uses in-process asyncio.Queue (single-process only). 'redis' uses Redis Streams (planned for Phase 2, not yet implemented).",
+    )
+    redis_url: str | None = Field(
+        default=None,
+        description="Redis URL for the redis stream bridge type. Example: 'redis://localhost:6379/0'.",
+    )
+    queue_maxsize: int = Field(
+        default=256,
+        description="Maximum number of events buffered per run in the memory bridge.",
+    )
+
+
+# Global configuration instance — None means no stream bridge is configured
+# (falls back to memory with defaults).
+_stream_bridge_config: StreamBridgeConfig | None = None
+
+
+def get_stream_bridge_config() -> StreamBridgeConfig | None:
+    """Get the current stream bridge configuration, or None if not configured."""
+    return _stream_bridge_config
+
+
+def set_stream_bridge_config(config: StreamBridgeConfig | None) -> None:
+    """Set the stream bridge configuration."""
+    global _stream_bridge_config
+    _stream_bridge_config = config
+
+
+def load_stream_bridge_config_from_dict(config_dict: dict) -> None:
+    """Load stream bridge configuration from a dictionary."""
+    global _stream_bridge_config
+    _stream_bridge_config = StreamBridgeConfig(**config_dict)
--- a/deer-flow/backend/packages/harness/deerflow/config/subagents_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/subagents_config.py
@@ -0,0 +1,102 @@
+"""Configuration for the subagent system loaded from config.yaml."""
+
+import logging
+
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+class SubagentOverrideConfig(BaseModel):
+    """Per-agent configuration overrides."""
+
+    timeout_seconds: int | None = Field(
+        default=None,
+        ge=1,
+        description="Timeout in seconds for this subagent (None = use global default)",
+    )
+    max_turns: int | None = Field(
+        default=None,
+        ge=1,
+        description="Maximum turns for this subagent (None = use global or builtin default)",
+    )
+
+
+class SubagentsAppConfig(BaseModel):
+    """Configuration for the subagent system."""
+
+    timeout_seconds: int = Field(
+        default=900,
+        ge=1,
+        description="Default timeout in seconds for all subagents (default: 900 = 15 minutes)",
+    )
+    max_turns: int | None = Field(
+        default=None,
+        ge=1,
+        description="Optional default max-turn override for all subagents (None = keep builtin defaults)",
+    )
+    agents: dict[str, SubagentOverrideConfig] = Field(
+        default_factory=dict,
+        description="Per-agent configuration overrides keyed by agent name",
+    )
+
+    def get_timeout_for(self, agent_name: str) -> int:
+        """Get the effective timeout for a specific agent.
+
+        Args:
+            agent_name: The name of the subagent.
+
+        Returns:
+            The timeout in seconds, using per-agent override if set, otherwise global default.
+        """
+        override = self.agents.get(agent_name)
+        if override is not None and override.timeout_seconds is not None:
+            return override.timeout_seconds
+        return self.timeout_seconds
+
+    def get_max_turns_for(self, agent_name: str, builtin_default: int) -> int:
+        """Get the effective max_turns for a specific agent."""
+        override = self.agents.get(agent_name)
+        if override is not None and override.max_turns is not None:
+            return override.max_turns
+        if self.max_turns is not None:
+            return self.max_turns
+        return builtin_default
+
+
+_subagents_config: SubagentsAppConfig = SubagentsAppConfig()
+
+
+def get_subagents_app_config() -> SubagentsAppConfig:
+    """Get the current subagents configuration."""
+    return _subagents_config
+
+
+def load_subagents_config_from_dict(config_dict: dict) -> None:
+    """Load subagents configuration from a dictionary."""
+    global _subagents_config
+    _subagents_config = SubagentsAppConfig(**config_dict)
+
+    overrides_summary = {}
+    for name, override in _subagents_config.agents.items():
+        parts = []
+        if override.timeout_seconds is not None:
+            parts.append(f"timeout={override.timeout_seconds}s")
+        if override.max_turns is not None:
+            parts.append(f"max_turns={override.max_turns}")
+        if parts:
+            overrides_summary[name] = ", ".join(parts)
+
+    if overrides_summary:
+        logger.info(
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s",
+            _subagents_config.timeout_seconds,
+            _subagents_config.max_turns,
+            overrides_summary,
+        )
+    else:
+        logger.info(
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, no per-agent overrides",
+            _subagents_config.timeout_seconds,
+            _subagents_config.max_turns,
+        )
--- a/deer-flow/backend/packages/harness/deerflow/config/summarization_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/summarization_config.py
@@ -0,0 +1,74 @@
+"""Configuration for conversation summarization."""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+ContextSizeType = Literal["fraction", "tokens", "messages"]
+
+
+class ContextSize(BaseModel):
+    """Context size specification for trigger or keep parameters."""
+
+    type: ContextSizeType = Field(description="Type of context size specification")
+    value: int | float = Field(description="Value for the context size specification")
+
+    def to_tuple(self) -> tuple[ContextSizeType, int | float]:
+        """Convert to tuple format expected by SummarizationMiddleware."""
+        return (self.type, self.value)
+
+
+class SummarizationConfig(BaseModel):
+    """Configuration for automatic conversation summarization."""
+
+    enabled: bool = Field(
+        default=False,
+        description="Whether to enable automatic conversation summarization",
+    )
+    model_name: str | None = Field(
+        default=None,
+        description="Model name to use for summarization (None = use a lightweight model)",
+    )
+    trigger: ContextSize | list[ContextSize] | None = Field(
+        default=None,
+        description="One or more thresholds that trigger summarization. When any threshold is met, summarization runs. "
+        "Examples: {'type': 'messages', 'value': 50} triggers at 50 messages, "
+        "{'type': 'tokens', 'value': 4000} triggers at 4000 tokens, "
+        "{'type': 'fraction', 'value': 0.8} triggers at 80% of model's max input tokens",
+    )
+    keep: ContextSize = Field(
+        default_factory=lambda: ContextSize(type="messages", value=20),
+        description="Context retention policy after summarization. Specifies how much history to preserve. "
+        "Examples: {'type': 'messages', 'value': 20} keeps 20 messages, "
+        "{'type': 'tokens', 'value': 3000} keeps 3000 tokens, "
+        "{'type': 'fraction', 'value': 0.3} keeps 30% of model's max input tokens",
+    )
+    trim_tokens_to_summarize: int | None = Field(
+        default=4000,
+        description="Maximum tokens to keep when preparing messages for summarization. Pass null to skip trimming.",
+    )
+    summary_prompt: str | None = Field(
+        default=None,
+        description="Custom prompt template for generating summaries. If not provided, uses the default LangChain prompt.",
+    )
+
+
+# Global configuration instance
+_summarization_config: SummarizationConfig = SummarizationConfig()
+
+
+def get_summarization_config() -> SummarizationConfig:
+    """Get the current summarization configuration."""
+    return _summarization_config
+
+
+def set_summarization_config(config: SummarizationConfig) -> None:
+    """Set the summarization configuration."""
+    global _summarization_config
+    _summarization_config = config
+
+
+def load_summarization_config_from_dict(config_dict: dict) -> None:
+    """Load summarization configuration from a dictionary."""
+    global _summarization_config
+    _summarization_config = SummarizationConfig(**config_dict)
--- a/deer-flow/backend/packages/harness/deerflow/config/title_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/title_config.py
@@ -0,0 +1,53 @@
+"""Configuration for automatic thread title generation."""
+
+from pydantic import BaseModel, Field
+
+
+class TitleConfig(BaseModel):
+    """Configuration for automatic thread title generation."""
+
+    enabled: bool = Field(
+        default=True,
+        description="Whether to enable automatic title generation",
+    )
+    max_words: int = Field(
+        default=6,
+        ge=1,
+        le=20,
+        description="Maximum number of words in the generated title",
+    )
+    max_chars: int = Field(
+        default=60,
+        ge=10,
+        le=200,
+        description="Maximum number of characters in the generated title",
+    )
+    model_name: str | None = Field(
+        default=None,
+        description="Model name to use for title generation (None = use default model)",
+    )
+    prompt_template: str = Field(
+        default=("Generate a concise title (max {max_words} words) for this conversation.\nUser: {user_msg}\nAssistant: {assistant_msg}\n\nReturn ONLY the title, no quotes, no explanation."),
+        description="Prompt template for title generation",
+    )
+
+
+# Global configuration instance
+_title_config: TitleConfig = TitleConfig()
+
+
+def get_title_config() -> TitleConfig:
+    """Get the current title configuration."""
+    return _title_config
+
+
+def set_title_config(config: TitleConfig) -> None:
+    """Set the title configuration."""
+    global _title_config
+    _title_config = config
+
+
+def load_title_config_from_dict(config_dict: dict) -> None:
+    """Load title configuration from a dictionary."""
+    global _title_config
+    _title_config = TitleConfig(**config_dict)
--- a/deer-flow/backend/packages/harness/deerflow/config/token_usage_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/token_usage_config.py
@@ -0,0 +1,7 @@
+from pydantic import BaseModel, Field
+
+
+class TokenUsageConfig(BaseModel):
+    """Configuration for token usage tracking."""
+
+    enabled: bool = Field(default=False, description="Enable token usage tracking middleware")
--- a/deer-flow/backend/packages/harness/deerflow/config/tool_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/tool_config.py
@@ -0,0 +1,20 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class ToolGroupConfig(BaseModel):
+    """Config section for a tool group"""
+
+    name: str = Field(..., description="Unique name for the tool group")
+    model_config = ConfigDict(extra="allow")
+
+
+class ToolConfig(BaseModel):
+    """Config section for a tool"""
+
+    name: str = Field(..., description="Unique name for the tool")
+    group: str = Field(..., description="Group name for the tool")
+    use: str = Field(
+        ...,
+        description="Variable name of the tool provider(e.g. deerflow.sandbox.tools:bash_tool)",
+    )
+    model_config = ConfigDict(extra="allow")
--- a/deer-flow/backend/packages/harness/deerflow/config/tool_search_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/tool_search_config.py
@@ -0,0 +1,35 @@
+"""Configuration for deferred tool loading via tool_search."""
+
+from pydantic import BaseModel, Field
+
+
+class ToolSearchConfig(BaseModel):
+    """Configuration for deferred tool loading via tool_search.
+
+    When enabled, MCP tools are not loaded into the agent's context directly.
+    Instead, they are listed by name in the system prompt and discoverable
+    via the tool_search tool at runtime.
+    """
+
+    enabled: bool = Field(
+        default=False,
+        description="Defer tools and enable tool_search",
+    )
+
+
+_tool_search_config: ToolSearchConfig | None = None
+
+
+def get_tool_search_config() -> ToolSearchConfig:
+    """Get the tool search config, loading from AppConfig if needed."""
+    global _tool_search_config
+    if _tool_search_config is None:
+        _tool_search_config = ToolSearchConfig()
+    return _tool_search_config
+
+
+def load_tool_search_config_from_dict(data: dict) -> ToolSearchConfig:
+    """Load tool search config from a dict (called during AppConfig loading)."""
+    global _tool_search_config
+    _tool_search_config = ToolSearchConfig.model_validate(data)
+    return _tool_search_config
--- a/deer-flow/backend/packages/harness/deerflow/config/tracing_config.py
+++ b/deer-flow/backend/packages/harness/deerflow/config/tracing_config.py
@@ -0,0 +1,149 @@
+import os
+import threading
+
+from pydantic import BaseModel, Field
+
+_config_lock = threading.Lock()
+
+
+class LangSmithTracingConfig(BaseModel):
+    """Configuration for LangSmith tracing."""
+
+    enabled: bool = Field(...)
+    api_key: str | None = Field(...)
+    project: str = Field(...)
+    endpoint: str = Field(...)
+
+    @property
+    def is_configured(self) -> bool:
+        return self.enabled and bool(self.api_key)
+
+    def validate(self) -> None:
+        if self.enabled and not self.api_key:
+            raise ValueError("LangSmith tracing is enabled but LANGSMITH_API_KEY (or LANGCHAIN_API_KEY) is not set.")
+
+
+class LangfuseTracingConfig(BaseModel):
+    """Configuration for Langfuse tracing."""
+
+    enabled: bool = Field(...)
+    public_key: str | None = Field(...)
+    secret_key: str | None = Field(...)
+    host: str = Field(...)
+
+    @property
+    def is_configured(self) -> bool:
+        return self.enabled and bool(self.public_key) and bool(self.secret_key)
+
+    def validate(self) -> None:
+        if not self.enabled:
+            return
+        missing: list[str] = []
+        if not self.public_key:
+            missing.append("LANGFUSE_PUBLIC_KEY")
+        if not self.secret_key:
+            missing.append("LANGFUSE_SECRET_KEY")
+        if missing:
+            raise ValueError(f"Langfuse tracing is enabled but required settings are missing: {', '.join(missing)}")
+
+
+class TracingConfig(BaseModel):
+    """Tracing configuration for supported providers."""
+
+    langsmith: LangSmithTracingConfig = Field(...)
+    langfuse: LangfuseTracingConfig = Field(...)
+
+    @property
+    def is_configured(self) -> bool:
+        return bool(self.enabled_providers)
+
+    @property
+    def explicitly_enabled_providers(self) -> list[str]:
+        enabled: list[str] = []
+        if self.langsmith.enabled:
+            enabled.append("langsmith")
+        if self.langfuse.enabled:
+            enabled.append("langfuse")
+        return enabled
+
+    @property
+    def enabled_providers(self) -> list[str]:
+        enabled: list[str] = []
+        if self.langsmith.is_configured:
+            enabled.append("langsmith")
+        if self.langfuse.is_configured:
+            enabled.append("langfuse")
+        return enabled
+
+    def validate_enabled(self) -> None:
+        self.langsmith.validate()
+        self.langfuse.validate()
+
+
+_tracing_config: TracingConfig | None = None
+
+
+_TRUTHY_VALUES = {"1", "true", "yes", "on"}
+
+
+def _env_flag_preferred(*names: str) -> bool:
+    """Return the boolean value of the first env var that is present and non-empty."""
+    for name in names:
+        value = os.environ.get(name)
+        if value is not None and value.strip():
+            return value.strip().lower() in _TRUTHY_VALUES
+    return False
+
+
+def _first_env_value(*names: str) -> str | None:
+    """Return the first non-empty environment value from candidate names."""
+    for name in names:
+        value = os.environ.get(name)
+        if value and value.strip():
+            return value.strip()
+    return None
+
+
+def get_tracing_config() -> TracingConfig:
+    """Get the current tracing configuration from environment variables."""
+    global _tracing_config
+    if _tracing_config is not None:
+        return _tracing_config
+    with _config_lock:
+        if _tracing_config is not None:
+            return _tracing_config
+        _tracing_config = TracingConfig(
+            langsmith=LangSmithTracingConfig(
+                enabled=_env_flag_preferred("LANGSMITH_TRACING", "LANGCHAIN_TRACING_V2", "LANGCHAIN_TRACING"),
+                api_key=_first_env_value("LANGSMITH_API_KEY", "LANGCHAIN_API_KEY"),
+                project=_first_env_value("LANGSMITH_PROJECT", "LANGCHAIN_PROJECT") or "deer-flow",
+                endpoint=_first_env_value("LANGSMITH_ENDPOINT", "LANGCHAIN_ENDPOINT") or "https://api.smith.langchain.com",
+            ),
+            langfuse=LangfuseTracingConfig(
+                enabled=_env_flag_preferred("LANGFUSE_TRACING"),
+                public_key=_first_env_value("LANGFUSE_PUBLIC_KEY"),
+                secret_key=_first_env_value("LANGFUSE_SECRET_KEY"),
+                host=_first_env_value("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com",
+            ),
+        )
+        return _tracing_config
+
+
+def get_enabled_tracing_providers() -> list[str]:
+    """Return the configured tracing providers that are enabled and complete."""
+    return get_tracing_config().enabled_providers
+
+
+def get_explicitly_enabled_tracing_providers() -> list[str]:
+    """Return tracing providers explicitly enabled by config, even if incomplete."""
+    return get_tracing_config().explicitly_enabled_providers
+
+
+def validate_enabled_tracing_providers() -> None:
+    """Validate that any explicitly enabled providers are fully configured."""
+    get_tracing_config().validate_enabled()
+
+
+def is_tracing_enabled() -> bool:
+    """Check if any tracing provider is enabled and fully configured."""
+    return get_tracing_config().is_configured