Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
114 lines
4.4 KiB
Python
114 lines
4.4 KiB
Python
"""Load MCP tools using langchain-mcp-adapters."""
|
|
|
|
import asyncio
|
|
import atexit
|
|
import concurrent.futures
|
|
import logging
|
|
from collections.abc import Callable
|
|
from typing import Any
|
|
|
|
from langchain_core.tools import BaseTool
|
|
|
|
from deerflow.config.extensions_config import ExtensionsConfig
|
|
from deerflow.mcp.client import build_servers_config
|
|
from deerflow.mcp.oauth import build_oauth_tool_interceptor, get_initial_oauth_headers
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Global thread pool for sync tool invocation in async environments
|
|
_SYNC_TOOL_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10, thread_name_prefix="mcp-sync-tool")
|
|
|
|
# Register shutdown hook for the global executor
|
|
atexit.register(lambda: _SYNC_TOOL_EXECUTOR.shutdown(wait=False))
|
|
|
|
|
|
def _make_sync_tool_wrapper(coro: Callable[..., Any], tool_name: str) -> Callable[..., Any]:
|
|
"""Build a synchronous wrapper for an asynchronous tool coroutine.
|
|
|
|
Args:
|
|
coro: The tool's asynchronous coroutine.
|
|
tool_name: Name of the tool (for logging).
|
|
|
|
Returns:
|
|
A synchronous function that correctly handles nested event loops.
|
|
"""
|
|
|
|
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
try:
|
|
loop = asyncio.get_running_loop()
|
|
except RuntimeError:
|
|
loop = None
|
|
|
|
try:
|
|
if loop is not None and loop.is_running():
|
|
# Use global executor to avoid nested loop issues and improve performance
|
|
future = _SYNC_TOOL_EXECUTOR.submit(asyncio.run, coro(*args, **kwargs))
|
|
return future.result()
|
|
else:
|
|
return asyncio.run(coro(*args, **kwargs))
|
|
except Exception as e:
|
|
logger.error(f"Error invoking MCP tool '{tool_name}' via sync wrapper: {e}", exc_info=True)
|
|
raise
|
|
|
|
return sync_wrapper
|
|
|
|
|
|
async def get_mcp_tools() -> list[BaseTool]:
|
|
"""Get all tools from enabled MCP servers.
|
|
|
|
Returns:
|
|
List of LangChain tools from all enabled MCP servers.
|
|
"""
|
|
try:
|
|
from langchain_mcp_adapters.client import MultiServerMCPClient
|
|
except ImportError:
|
|
logger.warning("langchain-mcp-adapters not installed. Install it to enable MCP tools: pip install langchain-mcp-adapters")
|
|
return []
|
|
|
|
# NOTE: We use ExtensionsConfig.from_file() instead of get_extensions_config()
|
|
# to always read the latest configuration from disk. This ensures that changes
|
|
# made through the Gateway API (which runs in a separate process) are immediately
|
|
# reflected when initializing MCP tools.
|
|
extensions_config = ExtensionsConfig.from_file()
|
|
servers_config = build_servers_config(extensions_config)
|
|
|
|
if not servers_config:
|
|
logger.info("No enabled MCP servers configured")
|
|
return []
|
|
|
|
try:
|
|
# Create the multi-server MCP client
|
|
logger.info(f"Initializing MCP client with {len(servers_config)} server(s)")
|
|
|
|
# Inject initial OAuth headers for server connections (tool discovery/session init)
|
|
initial_oauth_headers = await get_initial_oauth_headers(extensions_config)
|
|
for server_name, auth_header in initial_oauth_headers.items():
|
|
if server_name not in servers_config:
|
|
continue
|
|
if servers_config[server_name].get("transport") in ("sse", "http"):
|
|
existing_headers = dict(servers_config[server_name].get("headers", {}))
|
|
existing_headers["Authorization"] = auth_header
|
|
servers_config[server_name]["headers"] = existing_headers
|
|
|
|
tool_interceptors = []
|
|
oauth_interceptor = build_oauth_tool_interceptor(extensions_config)
|
|
if oauth_interceptor is not None:
|
|
tool_interceptors.append(oauth_interceptor)
|
|
|
|
client = MultiServerMCPClient(servers_config, tool_interceptors=tool_interceptors, tool_name_prefix=True)
|
|
|
|
# Get all tools from all servers
|
|
tools = await client.get_tools()
|
|
logger.info(f"Successfully loaded {len(tools)} tool(s) from MCP servers")
|
|
|
|
# Patch tools to support sync invocation, as deerflow client streams synchronously
|
|
for tool in tools:
|
|
if getattr(tool, "func", None) is None and getattr(tool, "coroutine", None) is not None:
|
|
tool.func = _make_sync_tool_wrapper(tool.coroutine, tool.name)
|
|
|
|
return tools
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load MCP tools: {e}", exc_info=True)
|
|
return []
|