Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions
--- a/deer-flow/backend/packages/harness/deerflow/tools/init.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/init.py
@@ -0,0 +1,11 @@
+from .tools import get_available_tools
+
+__all__ = ["get_available_tools", "skill_manage_tool"]
+
+
+def __getattr__(name: str):
+    if name == "skill_manage_tool":
+        from .skill_manage_tool import skill_manage_tool
+
+        return skill_manage_tool
+    raise AttributeError(name)
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/init.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/init.py
@@ -0,0 +1,13 @@
+from .clarification_tool import ask_clarification_tool
+from .present_file_tool import present_file_tool
+from .setup_agent_tool import setup_agent
+from .task_tool import task_tool
+from .view_image_tool import view_image_tool
+
+__all__ = [
+    "setup_agent",
+    "present_file_tool",
+    "ask_clarification_tool",
+    "view_image_tool",
+    "task_tool",
+]
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/clarification_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/clarification_tool.py
@@ -0,0 +1,55 @@
+from typing import Literal
+
+from langchain.tools import tool
+
+
+@tool("ask_clarification", parse_docstring=True, return_direct=True)
+def ask_clarification_tool(
+    question: str,
+    clarification_type: Literal[
+        "missing_info",
+        "ambiguous_requirement",
+        "approach_choice",
+        "risk_confirmation",
+        "suggestion",
+    ],
+    context: str | None = None,
+    options: list[str] | None = None,
+) -> str:
+    """Ask the user for clarification when you need more information to proceed.
+
+    Use this tool when you encounter situations where you cannot proceed without user input:
+
+    - **Missing information**: Required details not provided (e.g., file paths, URLs, specific requirements)
+    - **Ambiguous requirements**: Multiple valid interpretations exist
+    - **Approach choices**: Several valid approaches exist and you need user preference
+    - **Risky operations**: Destructive actions that need explicit confirmation (e.g., deleting files, modifying production)
+    - **Suggestions**: You have a recommendation but want user approval before proceeding
+
+    The execution will be interrupted and the question will be presented to the user.
+    Wait for the user's response before continuing.
+
+    When to use ask_clarification:
+    - You need information that wasn't provided in the user's request
+    - The requirement can be interpreted in multiple ways
+    - Multiple valid implementation approaches exist
+    - You're about to perform a potentially dangerous operation
+    - You have a recommendation but need user approval
+
+    Best practices:
+    - Ask ONE clarification at a time for clarity
+    - Be specific and clear in your question
+    - Don't make assumptions when clarification is needed
+    - For risky operations, ALWAYS ask for confirmation
+    - After calling this tool, execution will be interrupted automatically
+
+    Args:
+        question: The clarification question to ask the user. Be specific and clear.
+        clarification_type: The type of clarification needed (missing_info, ambiguous_requirement, approach_choice, risk_confirmation, suggestion).
+        context: Optional context explaining why clarification is needed. Helps the user understand the situation.
+        options: Optional list of choices (for approach_choice or suggestion types). Present clear options for the user to choose from.
+    """
+    # This is a placeholder implementation
+    # The actual logic is handled by ClarificationMiddleware which intercepts this tool call
+    # and interrupts execution to present the question to the user
+    return "Clarification request processed by middleware"
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/invoke_acp_agent_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/invoke_acp_agent_tool.py
@@ -0,0 +1,256 @@
+"""Built-in tool for invoking external ACP-compatible agents."""
+
+import logging
+import os
+import shutil
+from typing import Annotated, Any
+
+from langchain_core.runnables import RunnableConfig
+from langchain_core.tools import BaseTool, InjectedToolArg, StructuredTool
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+class _InvokeACPAgentInput(BaseModel):
+    agent: str = Field(description="Name of the ACP agent to invoke")
+    prompt: str = Field(description="The concise task prompt to send to the agent")
+
+
+def _get_work_dir(thread_id: str | None) -> str:
+    """Get the per-thread ACP workspace directory.
+
+    Each thread gets an isolated workspace under
+    ``{base_dir}/threads/{thread_id}/acp-workspace/`` so that concurrent
+    sessions cannot read or overwrite each other's ACP agent outputs.
+
+    Falls back to the legacy global ``{base_dir}/acp-workspace/`` when
+    ``thread_id`` is not available (e.g. embedded / direct invocation).
+
+    The directory is created automatically if it does not exist.
+
+    Returns:
+        An absolute physical filesystem path to use as the working directory.
+    """
+    from deerflow.config.paths import get_paths
+
+    paths = get_paths()
+    if thread_id:
+        try:
+            work_dir = paths.acp_workspace_dir(thread_id)
+        except ValueError:
+            logger.warning("Invalid thread_id %r for ACP workspace, falling back to global", thread_id)
+            work_dir = paths.base_dir / "acp-workspace"
+    else:
+        work_dir = paths.base_dir / "acp-workspace"
+
+    work_dir.mkdir(parents=True, exist_ok=True)
+    logger.info("ACP agent work_dir: %s", work_dir)
+    return str(work_dir)
+
+
+def _build_mcp_servers() -> dict[str, dict[str, Any]]:
+    """Build ACP ``mcpServers`` config from DeerFlow's enabled MCP servers."""
+    from deerflow.config.extensions_config import ExtensionsConfig
+    from deerflow.mcp.client import build_servers_config
+
+    return build_servers_config(ExtensionsConfig.from_file())
+
+
+def _build_acp_mcp_servers() -> list[dict[str, Any]]:
+    """Build ACP ``mcpServers`` payload for ``new_session``.
+
+    The ACP client expects a list of server objects, while DeerFlow's MCP helper
+    returns a name -> config mapping for the LangChain MCP adapter. This helper
+    converts the enabled servers into the ACP wire format.
+    """
+    from deerflow.config.extensions_config import ExtensionsConfig
+
+    extensions_config = ExtensionsConfig.from_file()
+    enabled_servers = extensions_config.get_enabled_mcp_servers()
+
+    mcp_servers: list[dict[str, Any]] = []
+    for name, server_config in enabled_servers.items():
+        transport_type = server_config.type or "stdio"
+        payload: dict[str, Any] = {"name": name, "type": transport_type}
+
+        if transport_type == "stdio":
+            if not server_config.command:
+                raise ValueError(f"MCP server '{name}' with stdio transport requires 'command' field")
+            payload["command"] = server_config.command
+            payload["args"] = server_config.args
+            payload["env"] = [{"name": key, "value": value} for key, value in server_config.env.items()]
+        elif transport_type in ("http", "sse"):
+            if not server_config.url:
+                raise ValueError(f"MCP server '{name}' with {transport_type} transport requires 'url' field")
+            payload["url"] = server_config.url
+            payload["headers"] = [{"name": key, "value": value} for key, value in server_config.headers.items()]
+        else:
+            raise ValueError(f"MCP server '{name}' has unsupported transport type: {transport_type}")
+
+        mcp_servers.append(payload)
+
+    return mcp_servers
+
+
+def _build_permission_response(options: list[Any], *, auto_approve: bool) -> Any:
+    """Build an ACP permission response.
+
+    When ``auto_approve`` is True, selects the first ``allow_once`` (preferred)
+    or ``allow_always`` option.  When False (the default), always cancels —
+    permission requests must be handled by the ACP agent's own policy or the
+    agent must be configured to operate without requesting permissions.
+    """
+    from acp import RequestPermissionResponse
+    from acp.schema import AllowedOutcome, DeniedOutcome
+
+    if auto_approve:
+        for preferred_kind in ("allow_once", "allow_always"):
+            for option in options:
+                if getattr(option, "kind", None) != preferred_kind:
+                    continue
+
+                option_id = getattr(option, "option_id", None)
+                if option_id is None:
+                    option_id = getattr(option, "optionId", None)
+                if option_id is None:
+                    continue
+
+                return RequestPermissionResponse(
+                    outcome=AllowedOutcome(outcome="selected", optionId=option_id),
+                )
+
+    return RequestPermissionResponse(outcome=DeniedOutcome(outcome="cancelled"))
+
+
+def _format_invocation_error(agent: str, cmd: str, exc: Exception) -> str:
+    """Return a user-facing ACP invocation error with actionable remediation."""
+    if not isinstance(exc, FileNotFoundError):
+        return f"Error invoking ACP agent '{agent}': {exc}"
+
+    message = f"Error invoking ACP agent '{agent}': Command '{cmd}' was not found on PATH."
+    if cmd == "codex-acp" and shutil.which("codex"):
+        return f"{message} The installed `codex` CLI does not speak ACP directly. Install a Codex ACP adapter (for example `npx @zed-industries/codex-acp`) or update `acp_agents.codex.command` and `args` in config.yaml."
+
+    return f"{message} Install the agent binary or update `acp_agents.{agent}.command` in config.yaml."
+
+
+def build_invoke_acp_agent_tool(agents: dict) -> BaseTool:
+    """Create the ``invoke_acp_agent`` tool with a description generated from configured agents.
+
+    The tool description includes the list of available agents so that the LLM
+    knows which agents it can invoke without requiring hardcoded names.
+
+    Args:
+        agents: Mapping of agent name -> ``ACPAgentConfig``.
+
+    Returns:
+        A LangChain ``BaseTool`` ready to be included in the tool list.
+    """
+    agent_lines = "\n".join(f"- {name}: {cfg.description}" for name, cfg in agents.items())
+    description = (
+        "Invoke an external ACP-compatible agent and return its final response.\n\n"
+        "Available agents:\n"
+        f"{agent_lines}\n\n"
+        "IMPORTANT: ACP agents operate in their own independent workspace. "
+        "Do NOT include /mnt/user-data paths in the prompt. "
+        "Give the agent a self-contained task description — it will produce results in its own workspace. "
+        "After the agent completes, its output files are accessible at /mnt/acp-workspace/ (read-only)."
+    )
+
+    # Capture agents in closure so the function can reference it
+    _agents = dict(agents)
+
+    async def _invoke(agent: str, prompt: str, config: Annotated[RunnableConfig, InjectedToolArg] = None) -> str:
+        logger.info("Invoking ACP agent %s (prompt length: %d)", agent, len(prompt))
+        logger.debug("Invoking ACP agent %s with prompt: %.200s%s", agent, prompt, "..." if len(prompt) > 200 else "")
+        if agent not in _agents:
+            available = ", ".join(_agents.keys())
+            return f"Error: Unknown agent '{agent}'. Available: {available}"
+
+        agent_config = _agents[agent]
+        thread_id: str | None = ((config or {}).get("configurable") or {}).get("thread_id")
+
+        try:
+            from acp import PROTOCOL_VERSION, Client, text_block
+            from acp.schema import ClientCapabilities, Implementation
+        except ImportError:
+            return "Error: agent-client-protocol package is not installed. Run `uv sync` to install project dependencies."
+
+        class _CollectingClient(Client):
+            """Minimal ACP Client that collects streamed text from session updates."""
+
+            def __init__(self) -> None:
+                self._chunks: list[str] = []
+
+            @property
+            def collected_text(self) -> str:
+                return "".join(self._chunks)
+
+            async def session_update(self, session_id: str, update, **kwargs) -> None:  # type: ignore[override]
+                try:
+                    from acp.schema import TextContentBlock
+
+                    if hasattr(update, "content") and isinstance(update.content, TextContentBlock):
+                        self._chunks.append(update.content.text)
+                except Exception:
+                    pass
+
+            async def request_permission(self, options, session_id: str, tool_call, **kwargs):  # type: ignore[override]
+                response = _build_permission_response(options, auto_approve=agent_config.auto_approve_permissions)
+                outcome = response.outcome.outcome
+                if outcome == "selected":
+                    logger.info("ACP permission auto-approved for tool call %s in session %s", tool_call.tool_call_id, session_id)
+                else:
+                    logger.warning("ACP permission denied for tool call %s in session %s (set auto_approve_permissions: true in config.yaml to enable)", tool_call.tool_call_id, session_id)
+                return response
+
+        client = _CollectingClient()
+        cmd = agent_config.command
+        args = agent_config.args or []
+        physical_cwd = _get_work_dir(thread_id)
+        try:
+            mcp_servers = _build_acp_mcp_servers()
+        except ValueError as exc:
+            logger.warning(
+                "Invalid MCP server configuration for ACP agent '%s'; continuing without MCP servers: %s",
+                agent,
+                exc,
+            )
+            mcp_servers = []
+        agent_env: dict[str, str] | None = None
+        if agent_config.env:
+            agent_env = {k: (os.environ.get(v[1:], "") if v.startswith("$") else v) for k, v in agent_config.env.items()}
+
+        try:
+            from acp import spawn_agent_process
+
+            async with spawn_agent_process(client, cmd, *args, env=agent_env, cwd=physical_cwd) as (conn, proc):
+                logger.info("Spawning ACP agent '%s' with command '%s' and args %s in cwd %s", agent, cmd, args, physical_cwd)
+                await conn.initialize(
+                    protocol_version=PROTOCOL_VERSION,
+                    client_capabilities=ClientCapabilities(),
+                    client_info=Implementation(name="deerflow", title="DeerFlow", version="0.1.0"),
+                )
+                session_kwargs: dict[str, Any] = {"cwd": physical_cwd, "mcp_servers": mcp_servers}
+                if agent_config.model:
+                    session_kwargs["model"] = agent_config.model
+                session = await conn.new_session(**session_kwargs)
+                await conn.prompt(
+                    session_id=session.session_id,
+                    prompt=[text_block(prompt)],
+                )
+            result = client.collected_text
+            logger.info("ACP agent '%s' returned %s", agent, result[:1000])
+            logger.info("ACP agent '%s' returned %d characters", agent, len(result))
+            return result or "(no response)"
+        except Exception as e:
+            logger.error("ACP agent '%s' invocation failed: %s", agent, e)
+            return _format_invocation_error(agent, cmd, e)
+
+    return StructuredTool.from_function(
+        name="invoke_acp_agent",
+        description=description,
+        coroutine=_invoke,
+        args_schema=_InvokeACPAgentInput,
+    )
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py
@@ -0,0 +1,100 @@
+from pathlib import Path
+from typing import Annotated
+
+from langchain.tools import InjectedToolCallId, ToolRuntime, tool
+from langchain_core.messages import ToolMessage
+from langgraph.types import Command
+from langgraph.typing import ContextT
+
+from deerflow.agents.thread_state import ThreadState
+from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
+
+OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs"
+
+
+def _normalize_presented_filepath(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    filepath: str,
+) -> str:
+    """Normalize a presented file path to the `/mnt/user-data/outputs/*` contract.
+
+    Accepts either:
+    - A virtual sandbox path such as `/mnt/user-data/outputs/report.md`
+    - A host-side thread outputs path such as
+      `/app/backend/.deer-flow/threads/<thread>/user-data/outputs/report.md`
+
+    Returns:
+        The normalized virtual path.
+
+    Raises:
+        ValueError: If runtime metadata is missing or the path is outside the
+            current thread's outputs directory.
+    """
+    if runtime.state is None:
+        raise ValueError("Thread runtime state is not available")
+
+    thread_id = runtime.context.get("thread_id") if runtime.context else None
+    if not thread_id:
+        raise ValueError("Thread ID is not available in runtime context")
+
+    thread_data = runtime.state.get("thread_data") or {}
+    outputs_path = thread_data.get("outputs_path")
+    if not outputs_path:
+        raise ValueError("Thread outputs path is not available in runtime state")
+
+    outputs_dir = Path(outputs_path).resolve()
+    stripped = filepath.lstrip("/")
+    virtual_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
+
+    if stripped == virtual_prefix or stripped.startswith(virtual_prefix + "/"):
+        actual_path = get_paths().resolve_virtual_path(thread_id, filepath)
+    else:
+        actual_path = Path(filepath).expanduser().resolve()
+
+    try:
+        relative_path = actual_path.relative_to(outputs_dir)
+    except ValueError as exc:
+        raise ValueError(f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}") from exc
+
+    return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
+
+
+@tool("present_files", parse_docstring=True)
+def present_file_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    filepaths: list[str],
+    tool_call_id: Annotated[str, InjectedToolCallId],
+) -> Command:
+    """Make files visible to the user for viewing and rendering in the client interface.
+
+    When to use the present_files tool:
+
+    - Making any file available for the user to view, download, or interact with
+    - Presenting multiple related files at once
+    - After creating files that should be presented to the user
+
+    When NOT to use the present_files tool:
+    - When you only need to read file contents for your own processing
+    - For temporary or intermediate files not meant for user viewing
+
+    Notes:
+    - You should call this tool after creating files and moving them to the `/mnt/user-data/outputs` directory.
+    - This tool can be safely called in parallel with other tools. State updates are handled by a reducer to prevent conflicts.
+
+    Args:
+        filepaths: List of absolute file paths to present to the user. **Only** files in `/mnt/user-data/outputs` can be presented.
+    """
+    try:
+        normalized_paths = [_normalize_presented_filepath(runtime, filepath) for filepath in filepaths]
+    except ValueError as exc:
+        return Command(
+            update={"messages": [ToolMessage(f"Error: {exc}", tool_call_id=tool_call_id)]},
+        )
+
+    # The merge_artifacts reducer will handle merging and deduplication
+    return Command(
+        update={
+            "artifacts": normalized_paths,
+            "messages": [ToolMessage("Successfully presented files", tool_call_id=tool_call_id)],
+        },
+    )
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/setup_agent_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/setup_agent_tool.py
@@ -0,0 +1,62 @@
+import logging
+
+import yaml
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langgraph.prebuilt import ToolRuntime
+from langgraph.types import Command
+
+from deerflow.config.paths import get_paths
+
+logger = logging.getLogger(__name__)
+
+
+@tool
+def setup_agent(
+    soul: str,
+    description: str,
+    runtime: ToolRuntime,
+) -> Command:
+    """Setup the custom DeerFlow agent.
+
+    Args:
+        soul: Full SOUL.md content defining the agent's personality and behavior.
+        description: One-line description of what the agent does.
+    """
+
+    agent_name: str | None = runtime.context.get("agent_name") if runtime.context else None
+
+    try:
+        paths = get_paths()
+        agent_dir = paths.agent_dir(agent_name) if agent_name else paths.base_dir
+        agent_dir.mkdir(parents=True, exist_ok=True)
+
+        if agent_name:
+            # If agent_name is provided, we are creating a custom agent in the agents/ directory
+            config_data: dict = {"name": agent_name}
+            if description:
+                config_data["description"] = description
+
+            config_file = agent_dir / "config.yaml"
+            with open(config_file, "w", encoding="utf-8") as f:
+                yaml.dump(config_data, f, default_flow_style=False, allow_unicode=True)
+
+        soul_file = agent_dir / "SOUL.md"
+        soul_file.write_text(soul, encoding="utf-8")
+
+        logger.info(f"[agent_creator] Created agent '{agent_name}' at {agent_dir}")
+        return Command(
+            update={
+                "created_agent_name": agent_name,
+                "messages": [ToolMessage(content=f"Agent '{agent_name}' created successfully!", tool_call_id=runtime.tool_call_id)],
+            }
+        )
+
+    except Exception as e:
+        import shutil
+
+        if agent_name and agent_dir.exists():
+            # Cleanup the custom agent directory only if it was created but an error occurred during setup
+            shutil.rmtree(agent_dir)
+        logger.error(f"[agent_creator] Failed to create agent '{agent_name}': {e}", exc_info=True)
+        return Command(update={"messages": [ToolMessage(content=f"Error: {e}", tool_call_id=runtime.tool_call_id)]})
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/task_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/task_tool.py
@@ -0,0 +1,248 @@
+"""Task tool for delegating work to subagents."""
+
+import asyncio
+import logging
+import uuid
+from dataclasses import replace
+from typing import Annotated
+
+from langchain.tools import InjectedToolCallId, ToolRuntime, tool
+from langgraph.config import get_stream_writer
+from langgraph.typing import ContextT
+
+from deerflow.agents.lead_agent.prompt import get_skills_prompt_section
+from deerflow.agents.thread_state import ThreadState
+from deerflow.sandbox.security import LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE, is_host_bash_allowed
+from deerflow.subagents import SubagentExecutor, get_available_subagent_names, get_subagent_config
+from deerflow.subagents.executor import SubagentStatus, cleanup_background_task, get_background_task_result, request_cancel_background_task
+
+logger = logging.getLogger(__name__)
+
+
+@tool("task", parse_docstring=True)
+async def task_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    description: str,
+    prompt: str,
+    subagent_type: str,
+    tool_call_id: Annotated[str, InjectedToolCallId],
+    max_turns: int | None = None,
+) -> str:
+    """Delegate a task to a specialized subagent that runs in its own context.
+
+    Subagents help you:
+    - Preserve context by keeping exploration and implementation separate
+    - Handle complex multi-step tasks autonomously
+    - Execute commands or operations in isolated contexts
+
+    Available subagent types depend on the active sandbox configuration:
+    - **general-purpose**: A capable agent for complex, multi-step tasks that require
+      both exploration and action. Use when the task requires complex reasoning,
+      multiple dependent steps, or would benefit from isolated context.
+    - **bash**: Command execution specialist for running bash commands. This is only
+      available when host bash is explicitly allowed or when using an isolated shell
+      sandbox such as `AioSandboxProvider`.
+
+    When to use this tool:
+    - Complex tasks requiring multiple steps or tools
+    - Tasks that produce verbose output
+    - When you want to isolate context from the main conversation
+    - Parallel research or exploration tasks
+
+    When NOT to use this tool:
+    - Simple, single-step operations (use tools directly)
+    - Tasks requiring user interaction or clarification
+
+    Args:
+        description: A short (3-5 word) description of the task for logging/display. ALWAYS PROVIDE THIS PARAMETER FIRST.
+        prompt: The task description for the subagent. Be specific and clear about what needs to be done. ALWAYS PROVIDE THIS PARAMETER SECOND.
+        subagent_type: The type of subagent to use. ALWAYS PROVIDE THIS PARAMETER THIRD.
+        max_turns: Optional maximum number of agent turns. Defaults to subagent's configured max.
+    """
+    available_subagent_names = get_available_subagent_names()
+
+    # Get subagent configuration
+    config = get_subagent_config(subagent_type)
+    if config is None:
+        available = ", ".join(available_subagent_names)
+        return f"Error: Unknown subagent type '{subagent_type}'. Available: {available}"
+    if subagent_type == "bash" and not is_host_bash_allowed():
+        return f"Error: {LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE}"
+
+    # Build config overrides
+    overrides: dict = {}
+
+    skills_section = get_skills_prompt_section()
+    if skills_section:
+        overrides["system_prompt"] = config.system_prompt + "\n\n" + skills_section
+
+    if max_turns is not None:
+        overrides["max_turns"] = max_turns
+
+    if overrides:
+        config = replace(config, **overrides)
+
+    # Extract parent context from runtime
+    sandbox_state = None
+    thread_data = None
+    thread_id = None
+    parent_model = None
+    trace_id = None
+
+    if runtime is not None:
+        sandbox_state = runtime.state.get("sandbox")
+        thread_data = runtime.state.get("thread_data")
+        thread_id = runtime.context.get("thread_id") if runtime.context else None
+        if thread_id is None:
+            thread_id = runtime.config.get("configurable", {}).get("thread_id")
+
+        # Try to get parent model from configurable
+        metadata = runtime.config.get("metadata", {})
+        parent_model = metadata.get("model_name")
+
+        # Get or generate trace_id for distributed tracing
+        trace_id = metadata.get("trace_id") or str(uuid.uuid4())[:8]
+
+    # Get available tools (excluding task tool to prevent nesting)
+    # Lazy import to avoid circular dependency
+    from deerflow.tools import get_available_tools
+
+    # Subagents should not have subagent tools enabled (prevent recursive nesting)
+    tools = get_available_tools(model_name=parent_model, subagent_enabled=False)
+
+    # Create executor
+    executor = SubagentExecutor(
+        config=config,
+        tools=tools,
+        parent_model=parent_model,
+        sandbox_state=sandbox_state,
+        thread_data=thread_data,
+        thread_id=thread_id,
+        trace_id=trace_id,
+    )
+
+    # Start background execution (always async to prevent blocking)
+    # Use tool_call_id as task_id for better traceability
+    task_id = executor.execute_async(prompt, task_id=tool_call_id)
+
+    # Poll for task completion in backend (removes need for LLM to poll)
+    poll_count = 0
+    last_status = None
+    last_message_count = 0  # Track how many AI messages we've already sent
+    # Polling timeout: execution timeout + 60s buffer, checked every 5s
+    max_poll_count = (config.timeout_seconds + 60) // 5
+
+    logger.info(f"[trace={trace_id}] Started background task {task_id} (subagent={subagent_type}, timeout={config.timeout_seconds}s, polling_limit={max_poll_count} polls)")
+
+    writer = get_stream_writer()
+    # Send Task Started message'
+    writer({"type": "task_started", "task_id": task_id, "description": description})
+
+    try:
+        while True:
+            result = get_background_task_result(task_id)
+
+            if result is None:
+                logger.error(f"[trace={trace_id}] Task {task_id} not found in background tasks")
+                writer({"type": "task_failed", "task_id": task_id, "error": "Task disappeared from background tasks"})
+                cleanup_background_task(task_id)
+                return f"Error: Task {task_id} disappeared from background tasks"
+
+            # Log status changes for debugging
+            if result.status != last_status:
+                logger.info(f"[trace={trace_id}] Task {task_id} status: {result.status.value}")
+                last_status = result.status
+
+            # Check for new AI messages and send task_running events
+            current_message_count = len(result.ai_messages)
+            if current_message_count > last_message_count:
+                # Send task_running event for each new message
+                for i in range(last_message_count, current_message_count):
+                    message = result.ai_messages[i]
+                    writer(
+                        {
+                            "type": "task_running",
+                            "task_id": task_id,
+                            "message": message,
+                            "message_index": i + 1,  # 1-based index for display
+                            "total_messages": current_message_count,
+                        }
+                    )
+                    logger.info(f"[trace={trace_id}] Task {task_id} sent message #{i + 1}/{current_message_count}")
+                last_message_count = current_message_count
+
+            # Check if task completed, failed, or timed out
+            if result.status == SubagentStatus.COMPLETED:
+                writer({"type": "task_completed", "task_id": task_id, "result": result.result})
+                logger.info(f"[trace={trace_id}] Task {task_id} completed after {poll_count} polls")
+                cleanup_background_task(task_id)
+                return f"Task Succeeded. Result: {result.result}"
+            elif result.status == SubagentStatus.FAILED:
+                writer({"type": "task_failed", "task_id": task_id, "error": result.error})
+                logger.error(f"[trace={trace_id}] Task {task_id} failed: {result.error}")
+                cleanup_background_task(task_id)
+                return f"Task failed. Error: {result.error}"
+            elif result.status == SubagentStatus.CANCELLED:
+                writer({"type": "task_cancelled", "task_id": task_id, "error": result.error})
+                logger.info(f"[trace={trace_id}] Task {task_id} cancelled: {result.error}")
+                cleanup_background_task(task_id)
+                return "Task cancelled by user."
+            elif result.status == SubagentStatus.TIMED_OUT:
+                writer({"type": "task_timed_out", "task_id": task_id, "error": result.error})
+                logger.warning(f"[trace={trace_id}] Task {task_id} timed out: {result.error}")
+                cleanup_background_task(task_id)
+                return f"Task timed out. Error: {result.error}"
+
+            # Still running, wait before next poll
+            await asyncio.sleep(5)
+            poll_count += 1
+
+            # Polling timeout as a safety net (in case thread pool timeout doesn't work)
+            # Set to execution timeout + 60s buffer, in 5s poll intervals
+            # This catches edge cases where the background task gets stuck
+            # Note: We don't call cleanup_background_task here because the task may
+            # still be running in the background. The cleanup will happen when the
+            # executor completes and sets a terminal status.
+            if poll_count > max_poll_count:
+                timeout_minutes = config.timeout_seconds // 60
+                logger.error(f"[trace={trace_id}] Task {task_id} polling timed out after {poll_count} polls (should have been caught by thread pool timeout)")
+                writer({"type": "task_timed_out", "task_id": task_id})
+                return f"Task polling timed out after {timeout_minutes} minutes. This may indicate the background task is stuck. Status: {result.status.value}"
+    except asyncio.CancelledError:
+        # Signal the background subagent thread to stop cooperatively.
+        # Without this, the thread (running in ThreadPoolExecutor with its
+        # own event loop via asyncio.run) would continue executing even
+        # after the parent task is cancelled.
+        request_cancel_background_task(task_id)
+
+        async def cleanup_when_done() -> None:
+            max_cleanup_polls = max_poll_count
+            cleanup_poll_count = 0
+
+            while True:
+                result = get_background_task_result(task_id)
+                if result is None:
+                    return
+
+                if result.status in {SubagentStatus.COMPLETED, SubagentStatus.FAILED, SubagentStatus.CANCELLED, SubagentStatus.TIMED_OUT} or getattr(result, "completed_at", None) is not None:
+                    cleanup_background_task(task_id)
+                    return
+
+                if cleanup_poll_count > max_cleanup_polls:
+                    logger.warning(f"[trace={trace_id}] Deferred cleanup for task {task_id} timed out after {cleanup_poll_count} polls")
+                    return
+
+                await asyncio.sleep(5)
+                cleanup_poll_count += 1
+
+        def log_cleanup_failure(cleanup_task: asyncio.Task[None]) -> None:
+            if cleanup_task.cancelled():
+                return
+
+            exc = cleanup_task.exception()
+            if exc is not None:
+                logger.error(f"[trace={trace_id}] Deferred cleanup failed for task {task_id}: {exc}")
+
+        logger.debug(f"[trace={trace_id}] Scheduling deferred cleanup for cancelled task {task_id}")
+        asyncio.create_task(cleanup_when_done()).add_done_callback(log_cleanup_failure)
+        raise
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/tool_search.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/tool_search.py
@@ -0,0 +1,193 @@
+"""Tool search — deferred tool discovery at runtime.
+
+Contains:
+- DeferredToolRegistry: stores deferred tools and handles regex search
+- tool_search: the LangChain tool the agent calls to discover deferred tools
+
+The agent sees deferred tool names in <available-deferred-tools> but cannot
+call them until it fetches their full schema via the tool_search tool.
+Source-agnostic: no mention of MCP or tool origin.
+"""
+
+import contextvars
+import json
+import logging
+import re
+from dataclasses import dataclass
+
+from langchain.tools import BaseTool
+from langchain_core.tools import tool
+from langchain_core.utils.function_calling import convert_to_openai_function
+
+logger = logging.getLogger(__name__)
+
+MAX_RESULTS = 5  # Max tools returned per search
+
+
+# ── Registry ──
+
+
+@dataclass
+class DeferredToolEntry:
+    """Lightweight metadata for a deferred tool (no full schema in context)."""
+
+    name: str
+    description: str
+    tool: BaseTool  # Full tool object, returned only on search match
+
+
+class DeferredToolRegistry:
+    """Registry of deferred tools, searchable by regex pattern."""
+
+    def __init__(self):
+        self._entries: list[DeferredToolEntry] = []
+
+    def register(self, tool: BaseTool) -> None:
+        self._entries.append(
+            DeferredToolEntry(
+                name=tool.name,
+                description=tool.description or "",
+                tool=tool,
+            )
+        )
+
+    def promote(self, names: set[str]) -> None:
+        """Remove tools from the deferred registry so they pass through the filter.
+
+        Called after tool_search returns a tool's schema — the LLM now knows
+        the full definition, so the DeferredToolFilterMiddleware should stop
+        stripping it from bind_tools on subsequent calls.
+        """
+        if not names:
+            return
+        before = len(self._entries)
+        self._entries = [e for e in self._entries if e.name not in names]
+        promoted = before - len(self._entries)
+        if promoted:
+            logger.debug(f"Promoted {promoted} tool(s) from deferred to active: {names}")
+
+    def search(self, query: str) -> list[BaseTool]:
+        """Search deferred tools by regex pattern against name + description.
+
+        Supports three query forms (aligned with Claude Code):
+          - "select:name1,name2" — exact name match
+          - "+keyword rest" — name must contain keyword, rank by rest
+          - "keyword query" — regex match against name + description
+
+        Returns:
+            List of matched BaseTool objects (up to MAX_RESULTS).
+        """
+        if query.startswith("select:"):
+            names = {n.strip() for n in query[7:].split(",")}
+            return [e.tool for e in self._entries if e.name in names][:MAX_RESULTS]
+
+        if query.startswith("+"):
+            parts = query[1:].split(None, 1)
+            required = parts[0].lower()
+            candidates = [e for e in self._entries if required in e.name.lower()]
+            if len(parts) > 1:
+                candidates.sort(
+                    key=lambda e: _regex_score(parts[1], e),
+                    reverse=True,
+                )
+            return [e.tool for e in candidates][:MAX_RESULTS]
+
+        # General regex search
+        try:
+            regex = re.compile(query, re.IGNORECASE)
+        except re.error:
+            regex = re.compile(re.escape(query), re.IGNORECASE)
+
+        scored = []
+        for entry in self._entries:
+            searchable = f"{entry.name} {entry.description}"
+            if regex.search(searchable):
+                score = 2 if regex.search(entry.name) else 1
+                scored.append((score, entry))
+
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [entry.tool for _, entry in scored][:MAX_RESULTS]
+
+    @property
+    def entries(self) -> list[DeferredToolEntry]:
+        return list(self._entries)
+
+    def __len__(self) -> int:
+        return len(self._entries)
+
+
+def _regex_score(pattern: str, entry: DeferredToolEntry) -> int:
+    try:
+        regex = re.compile(pattern, re.IGNORECASE)
+    except re.error:
+        regex = re.compile(re.escape(pattern), re.IGNORECASE)
+    return len(regex.findall(f"{entry.name} {entry.description}"))
+
+
+# ── Per-request registry (ContextVar) ──
+#
+# Using a ContextVar instead of a module-level global prevents concurrent
+# requests from clobbering each other's registry.  In asyncio-based LangGraph
+# each graph run executes in its own async context, so each request gets an
+# independent registry value.  For synchronous tools run via
+# loop.run_in_executor, Python copies the current context to the worker thread,
+# so the ContextVar value is correctly inherited there too.
+
+_registry_var: contextvars.ContextVar[DeferredToolRegistry | None] = contextvars.ContextVar("deferred_tool_registry", default=None)
+
+
+def get_deferred_registry() -> DeferredToolRegistry | None:
+    return _registry_var.get()
+
+
+def set_deferred_registry(registry: DeferredToolRegistry) -> None:
+    _registry_var.set(registry)
+
+
+def reset_deferred_registry() -> None:
+    """Reset the deferred registry for the current async context."""
+    _registry_var.set(None)
+
+
+# ── Tool ──
+
+
+@tool
+def tool_search(query: str) -> str:
+    """Fetches full schema definitions for deferred tools so they can be called.
+
+    Deferred tools appear by name in <available-deferred-tools> in the system
+    prompt. Until fetched, only the name is known — there is no parameter
+    schema, so the tool cannot be invoked. This tool takes a query, matches
+    it against the deferred tool list, and returns the matched tools' complete
+    definitions. Once a tool's schema appears in that result, it is callable.
+
+    Query forms:
+      - "select:Read,Edit,Grep" — fetch these exact tools by name
+      - "notebook jupyter" — keyword search, up to max_results best matches
+      - "+slack send" — require "slack" in the name, rank by remaining terms
+
+    Args:
+        query: Query to find deferred tools. Use "select:<tool_name>" for
+               direct selection, or keywords to search.
+
+    Returns:
+        Matched tool definitions as JSON array.
+    """
+    registry = get_deferred_registry()
+    if not registry:
+        return "No deferred tools available."
+
+    matched_tools = registry.search(query)
+    if not matched_tools:
+        return f"No tools found matching: {query}"
+
+    # Use LangChain's built-in serialization to produce OpenAI function format.
+    # This is model-agnostic: all LLMs understand this standard schema.
+    tool_defs = [convert_to_openai_function(t) for t in matched_tools[:MAX_RESULTS]]
+
+    # Promote matched tools so the DeferredToolFilterMiddleware stops filtering
+    # them from bind_tools — the LLM now has the full schema and can invoke them.
+    registry.promote({t.name for t in matched_tools[:MAX_RESULTS]})
+
+    return json.dumps(tool_defs, indent=2, ensure_ascii=False)
--- a/deer-flow/backend/packages/harness/deerflow/tools/builtins/view_image_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/builtins/view_image_tool.py
@@ -0,0 +1,95 @@
+import base64
+import mimetypes
+from pathlib import Path
+from typing import Annotated
+
+from langchain.tools import InjectedToolCallId, ToolRuntime, tool
+from langchain_core.messages import ToolMessage
+from langgraph.types import Command
+from langgraph.typing import ContextT
+
+from deerflow.agents.thread_state import ThreadState
+
+
+@tool("view_image", parse_docstring=True)
+def view_image_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    image_path: str,
+    tool_call_id: Annotated[str, InjectedToolCallId],
+) -> Command:
+    """Read an image file.
+
+    Use this tool to read an image file and make it available for display.
+
+    When to use the view_image tool:
+    - When you need to view an image file.
+
+    When NOT to use the view_image tool:
+    - For non-image files (use present_files instead)
+    - For multiple files at once (use present_files instead)
+
+    Args:
+        image_path: Absolute path to the image file. Common formats supported: jpg, jpeg, png, webp.
+    """
+    from deerflow.sandbox.tools import get_thread_data, replace_virtual_path
+
+    # Replace virtual path with actual path
+    # /mnt/user-data/* paths are mapped to thread-specific directories
+    thread_data = get_thread_data(runtime)
+    actual_path = replace_virtual_path(image_path, thread_data)
+
+    # Validate that the path is absolute
+    path = Path(actual_path)
+    if not path.is_absolute():
+        return Command(
+            update={"messages": [ToolMessage(f"Error: Path must be absolute, got: {image_path}", tool_call_id=tool_call_id)]},
+        )
+
+    # Validate that the file exists
+    if not path.exists():
+        return Command(
+            update={"messages": [ToolMessage(f"Error: Image file not found: {image_path}", tool_call_id=tool_call_id)]},
+        )
+
+    # Validate that it's a file (not a directory)
+    if not path.is_file():
+        return Command(
+            update={"messages": [ToolMessage(f"Error: Path is not a file: {image_path}", tool_call_id=tool_call_id)]},
+        )
+
+    # Validate image extension
+    valid_extensions = {".jpg", ".jpeg", ".png", ".webp"}
+    if path.suffix.lower() not in valid_extensions:
+        return Command(
+            update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(valid_extensions)}", tool_call_id=tool_call_id)]},
+        )
+
+    # Detect MIME type from file extension
+    mime_type, _ = mimetypes.guess_type(actual_path)
+    if mime_type is None:
+        # Fallback to default MIME types for common image formats
+        extension_to_mime = {
+            ".jpg": "image/jpeg",
+            ".jpeg": "image/jpeg",
+            ".png": "image/png",
+            ".webp": "image/webp",
+        }
+        mime_type = extension_to_mime.get(path.suffix.lower(), "application/octet-stream")
+
+    # Read image file and convert to base64
+    try:
+        with open(actual_path, "rb") as f:
+            image_data = f.read()
+            image_base64 = base64.b64encode(image_data).decode("utf-8")
+    except Exception as e:
+        return Command(
+            update={"messages": [ToolMessage(f"Error reading image file: {str(e)}", tool_call_id=tool_call_id)]},
+        )
+
+    # Update viewed_images in state
+    # The merge_viewed_images reducer will handle merging with existing images
+    new_viewed_images = {image_path: {"base64": image_base64, "mime_type": mime_type}}
+
+    return Command(
+        update={"viewed_images": new_viewed_images, "messages": [ToolMessage("Successfully read image", tool_call_id=tool_call_id)]},
+    )
--- a/deer-flow/backend/packages/harness/deerflow/tools/skill_manage_tool.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/skill_manage_tool.py
@@ -0,0 +1,247 @@
+"""Tool for creating and evolving custom skills."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import shutil
+from typing import Any
+from weakref import WeakValueDictionary
+
+from langchain.tools import ToolRuntime, tool
+from langgraph.typing import ContextT
+
+from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async
+from deerflow.agents.thread_state import ThreadState
+from deerflow.mcp.tools import _make_sync_tool_wrapper
+from deerflow.skills.manager import (
+    append_history,
+    atomic_write,
+    custom_skill_exists,
+    ensure_custom_skill_is_editable,
+    ensure_safe_support_path,
+    get_custom_skill_dir,
+    get_custom_skill_file,
+    public_skill_exists,
+    read_custom_skill_content,
+    validate_skill_markdown_content,
+    validate_skill_name,
+)
+from deerflow.skills.security_scanner import scan_skill_content
+
+logger = logging.getLogger(__name__)
+
+_skill_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
+
+
+def _get_lock(name: str) -> asyncio.Lock:
+    lock = _skill_locks.get(name)
+    if lock is None:
+        lock = asyncio.Lock()
+        _skill_locks[name] = lock
+    return lock
+
+
+def _get_thread_id(runtime: ToolRuntime[ContextT, ThreadState] | None) -> str | None:
+    if runtime is None:
+        return None
+    if runtime.context and runtime.context.get("thread_id"):
+        return runtime.context.get("thread_id")
+    return runtime.config.get("configurable", {}).get("thread_id")
+
+
+def _history_record(*, action: str, file_path: str, prev_content: str | None, new_content: str | None, thread_id: str | None, scanner: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "action": action,
+        "author": "agent",
+        "thread_id": thread_id,
+        "file_path": file_path,
+        "prev_content": prev_content,
+        "new_content": new_content,
+        "scanner": scanner,
+    }
+
+
+async def _scan_or_raise(content: str, *, executable: bool, location: str) -> dict[str, str]:
+    result = await scan_skill_content(content, executable=executable, location=location)
+    if result.decision == "block":
+        raise ValueError(f"Security scan blocked the write: {result.reason}")
+    if executable and result.decision != "allow":
+        raise ValueError(f"Security scan rejected executable content: {result.reason}")
+    return {"decision": result.decision, "reason": result.reason}
+
+
+async def _to_thread(func, /, *args, **kwargs):
+    return await asyncio.to_thread(func, *args, **kwargs)
+
+
+async def _skill_manage_impl(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    action: str,
+    name: str,
+    content: str | None = None,
+    path: str | None = None,
+    find: str | None = None,
+    replace: str | None = None,
+    expected_count: int | None = None,
+) -> str:
+    """Manage custom skills under skills/custom/.
+
+    Args:
+        action: One of create, patch, edit, delete, write_file, remove_file.
+        name: Skill name in hyphen-case.
+        content: New file content for create, edit, or write_file.
+        path: Supporting file path for write_file or remove_file.
+        find: Existing text to replace for patch.
+        replace: Replacement text for patch.
+        expected_count: Optional expected number of replacements for patch.
+    """
+    name = validate_skill_name(name)
+    lock = _get_lock(name)
+    thread_id = _get_thread_id(runtime)
+
+    async with lock:
+        if action == "create":
+            if await _to_thread(custom_skill_exists, name):
+                raise ValueError(f"Custom skill '{name}' already exists.")
+            if content is None:
+                raise ValueError("content is required for create.")
+            await _to_thread(validate_skill_markdown_content, name, content)
+            scan = await _scan_or_raise(content, executable=False, location=f"{name}/SKILL.md")
+            skill_file = await _to_thread(get_custom_skill_file, name)
+            await _to_thread(atomic_write, skill_file, content)
+            await _to_thread(
+                append_history,
+                name,
+                _history_record(action="create", file_path="SKILL.md", prev_content=None, new_content=content, thread_id=thread_id, scanner=scan),
+            )
+            await refresh_skills_system_prompt_cache_async()
+            return f"Created custom skill '{name}'."
+
+        if action == "edit":
+            await _to_thread(ensure_custom_skill_is_editable, name)
+            if content is None:
+                raise ValueError("content is required for edit.")
+            await _to_thread(validate_skill_markdown_content, name, content)
+            scan = await _scan_or_raise(content, executable=False, location=f"{name}/SKILL.md")
+            skill_file = await _to_thread(get_custom_skill_file, name)
+            prev_content = await _to_thread(skill_file.read_text, encoding="utf-8")
+            await _to_thread(atomic_write, skill_file, content)
+            await _to_thread(
+                append_history,
+                name,
+                _history_record(action="edit", file_path="SKILL.md", prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan),
+            )
+            await refresh_skills_system_prompt_cache_async()
+            return f"Updated custom skill '{name}'."
+
+        if action == "patch":
+            await _to_thread(ensure_custom_skill_is_editable, name)
+            if find is None or replace is None:
+                raise ValueError("find and replace are required for patch.")
+            skill_file = await _to_thread(get_custom_skill_file, name)
+            prev_content = await _to_thread(skill_file.read_text, encoding="utf-8")
+            occurrences = prev_content.count(find)
+            if occurrences == 0:
+                raise ValueError("Patch target not found in SKILL.md.")
+            if expected_count is not None and occurrences != expected_count:
+                raise ValueError(f"Expected {expected_count} replacements but found {occurrences}.")
+            replacement_count = expected_count if expected_count is not None else 1
+            new_content = prev_content.replace(find, replace, replacement_count)
+            await _to_thread(validate_skill_markdown_content, name, new_content)
+            scan = await _scan_or_raise(new_content, executable=False, location=f"{name}/SKILL.md")
+            await _to_thread(atomic_write, skill_file, new_content)
+            await _to_thread(
+                append_history,
+                name,
+                _history_record(action="patch", file_path="SKILL.md", prev_content=prev_content, new_content=new_content, thread_id=thread_id, scanner=scan),
+            )
+            await refresh_skills_system_prompt_cache_async()
+            return f"Patched custom skill '{name}' ({replacement_count} replacement(s) applied, {occurrences} match(es) found)."
+
+        if action == "delete":
+            await _to_thread(ensure_custom_skill_is_editable, name)
+            skill_dir = await _to_thread(get_custom_skill_dir, name)
+            prev_content = await _to_thread(read_custom_skill_content, name)
+            await _to_thread(
+                append_history,
+                name,
+                _history_record(action="delete", file_path="SKILL.md", prev_content=prev_content, new_content=None, thread_id=thread_id, scanner={"decision": "allow", "reason": "Deletion requested."}),
+            )
+            await _to_thread(shutil.rmtree, skill_dir)
+            await refresh_skills_system_prompt_cache_async()
+            return f"Deleted custom skill '{name}'."
+
+        if action == "write_file":
+            await _to_thread(ensure_custom_skill_is_editable, name)
+            if path is None or content is None:
+                raise ValueError("path and content are required for write_file.")
+            target = await _to_thread(ensure_safe_support_path, name, path)
+            exists = await _to_thread(target.exists)
+            prev_content = await _to_thread(target.read_text, encoding="utf-8") if exists else None
+            executable = "scripts/" in path or path.startswith("scripts/")
+            scan = await _scan_or_raise(content, executable=executable, location=f"{name}/{path}")
+            await _to_thread(atomic_write, target, content)
+            await _to_thread(
+                append_history,
+                name,
+                _history_record(action="write_file", file_path=path, prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan),
+            )
+            return f"Wrote '{path}' for custom skill '{name}'."
+
+        if action == "remove_file":
+            await _to_thread(ensure_custom_skill_is_editable, name)
+            if path is None:
+                raise ValueError("path is required for remove_file.")
+            target = await _to_thread(ensure_safe_support_path, name, path)
+            if not await _to_thread(target.exists):
+                raise FileNotFoundError(f"Supporting file '{path}' not found for skill '{name}'.")
+            prev_content = await _to_thread(target.read_text, encoding="utf-8")
+            await _to_thread(target.unlink)
+            await _to_thread(
+                append_history,
+                name,
+                _history_record(action="remove_file", file_path=path, prev_content=prev_content, new_content=None, thread_id=thread_id, scanner={"decision": "allow", "reason": "Deletion requested."}),
+            )
+            return f"Removed '{path}' from custom skill '{name}'."
+
+        if await _to_thread(public_skill_exists, name):
+            raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.")
+        raise ValueError(f"Unsupported action '{action}'.")
+
+
+@tool("skill_manage", parse_docstring=True)
+async def skill_manage_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    action: str,
+    name: str,
+    content: str | None = None,
+    path: str | None = None,
+    find: str | None = None,
+    replace: str | None = None,
+    expected_count: int | None = None,
+) -> str:
+    """Manage custom skills under skills/custom/.
+
+    Args:
+        action: One of create, patch, edit, delete, write_file, remove_file.
+        name: Skill name in hyphen-case.
+        content: New file content for create, edit, or write_file.
+        path: Supporting file path for write_file or remove_file.
+        find: Existing text to replace for patch.
+        replace: Replacement text for patch.
+        expected_count: Optional expected number of replacements for patch.
+    """
+    return await _skill_manage_impl(
+        runtime=runtime,
+        action=action,
+        name=name,
+        content=content,
+        path=path,
+        find=find,
+        replace=replace,
+        expected_count=expected_count,
+    )
+
+
+skill_manage_tool.func = _make_sync_tool_wrapper(_skill_manage_impl, "skill_manage")
--- a/deer-flow/backend/packages/harness/deerflow/tools/tools.py
+++ b/deer-flow/backend/packages/harness/deerflow/tools/tools.py
@@ -0,0 +1,137 @@
+import logging
+
+from langchain.tools import BaseTool
+
+from deerflow.config import get_app_config
+from deerflow.reflection import resolve_variable
+from deerflow.sandbox.security import is_host_bash_allowed
+from deerflow.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool
+from deerflow.tools.builtins.tool_search import reset_deferred_registry
+
+logger = logging.getLogger(__name__)
+
+BUILTIN_TOOLS = [
+    present_file_tool,
+    ask_clarification_tool,
+]
+
+SUBAGENT_TOOLS = [
+    task_tool,
+    # task_status_tool is no longer exposed to LLM (backend handles polling internally)
+]
+
+
+def _is_host_bash_tool(tool: object) -> bool:
+    """Return True if the tool config represents a host-bash execution surface."""
+    group = getattr(tool, "group", None)
+    use = getattr(tool, "use", None)
+    if group == "bash":
+        return True
+    if use == "deerflow.sandbox.tools:bash_tool":
+        return True
+    return False
+
+
+def get_available_tools(
+    groups: list[str] | None = None,
+    include_mcp: bool = True,
+    model_name: str | None = None,
+    subagent_enabled: bool = False,
+) -> list[BaseTool]:
+    """Get all available tools from config.
+
+    Note: MCP tools should be initialized at application startup using
+    `initialize_mcp_tools()` from deerflow.mcp module.
+
+    Args:
+        groups: Optional list of tool groups to filter by.
+        include_mcp: Whether to include tools from MCP servers (default: True).
+        model_name: Optional model name to determine if vision tools should be included.
+        subagent_enabled: Whether to include subagent tools (task, task_status).
+
+    Returns:
+        List of available tools.
+    """
+    config = get_app_config()
+    tool_configs = [tool for tool in config.tools if groups is None or tool.group in groups]
+
+    # Do not expose host bash by default when LocalSandboxProvider is active.
+    if not is_host_bash_allowed(config):
+        tool_configs = [tool for tool in tool_configs if not _is_host_bash_tool(tool)]
+
+    loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in tool_configs]
+
+    # Conditionally add tools based on config
+    builtin_tools = BUILTIN_TOOLS.copy()
+    skill_evolution_config = getattr(config, "skill_evolution", None)
+    if getattr(skill_evolution_config, "enabled", False):
+        from deerflow.tools.skill_manage_tool import skill_manage_tool
+
+        builtin_tools.append(skill_manage_tool)
+
+    # Add subagent tools only if enabled via runtime parameter
+    if subagent_enabled:
+        builtin_tools.extend(SUBAGENT_TOOLS)
+        logger.info("Including subagent tools (task)")
+
+    # If no model_name specified, use the first model (default)
+    if model_name is None and config.models:
+        model_name = config.models[0].name
+
+    # Add view_image_tool only if the model supports vision
+    model_config = config.get_model_config(model_name) if model_name else None
+    if model_config is not None and model_config.supports_vision:
+        builtin_tools.append(view_image_tool)
+        logger.info(f"Including view_image_tool for model '{model_name}' (supports_vision=True)")
+
+    # Get cached MCP tools if enabled
+    # NOTE: We use ExtensionsConfig.from_file() instead of config.extensions
+    # to always read the latest configuration from disk. This ensures that changes
+    # made through the Gateway API (which runs in a separate process) are immediately
+    # reflected when loading MCP tools.
+    mcp_tools = []
+    # Reset deferred registry upfront to prevent stale state from previous calls
+    reset_deferred_registry()
+    if include_mcp:
+        try:
+            from deerflow.config.extensions_config import ExtensionsConfig
+            from deerflow.mcp.cache import get_cached_mcp_tools
+
+            extensions_config = ExtensionsConfig.from_file()
+            if extensions_config.get_enabled_mcp_servers():
+                mcp_tools = get_cached_mcp_tools()
+                if mcp_tools:
+                    logger.info(f"Using {len(mcp_tools)} cached MCP tool(s)")
+
+                    # When tool_search is enabled, register MCP tools in the
+                    # deferred registry and add tool_search to builtin tools.
+                    if config.tool_search.enabled:
+                        from deerflow.tools.builtins.tool_search import DeferredToolRegistry, set_deferred_registry
+                        from deerflow.tools.builtins.tool_search import tool_search as tool_search_tool
+
+                        registry = DeferredToolRegistry()
+                        for t in mcp_tools:
+                            registry.register(t)
+                        set_deferred_registry(registry)
+                        builtin_tools.append(tool_search_tool)
+                        logger.info(f"Tool search active: {len(mcp_tools)} tools deferred")
+        except ImportError:
+            logger.warning("MCP module not available. Install 'langchain-mcp-adapters' package to enable MCP tools.")
+        except Exception as e:
+            logger.error(f"Failed to get cached MCP tools: {e}")
+
+    # Add invoke_acp_agent tool if any ACP agents are configured
+    acp_tools: list[BaseTool] = []
+    try:
+        from deerflow.config.acp_config import get_acp_agents
+        from deerflow.tools.builtins.invoke_acp_agent_tool import build_invoke_acp_agent_tool
+
+        acp_agents = get_acp_agents()
+        if acp_agents:
+            acp_tools.append(build_invoke_acp_agent_tool(acp_agents))
+            logger.info(f"Including invoke_acp_agent tool ({len(acp_agents)} agent(s): {list(acp_agents.keys())})")
+    except Exception as e:
+        logger.warning(f"Failed to load ACP tool: {e}")
+
+    logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}, ACP tools: {len(acp_tools)}")
+    return loaded_tools + builtin_tools + mcp_tools + acp_tools