Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
from .checkpointer import get_checkpointer, make_checkpointer, reset_checkpointer
from .factory import create_deerflow_agent
from .features import Next, Prev, RuntimeFeatures
from .lead_agent import make_lead_agent
from .lead_agent.prompt import prime_enabled_skills_cache
from .thread_state import SandboxState, ThreadState
# LangGraph imports deerflow.agents when registering the graph. Prime the
# enabled-skills cache here so the request path can usually read a warm cache
# without forcing synchronous filesystem work during prompt module import.
prime_enabled_skills_cache()
__all__ = [
"create_deerflow_agent",
"RuntimeFeatures",
"Next",
"Prev",
"make_lead_agent",
"SandboxState",
"ThreadState",
"get_checkpointer",
"reset_checkpointer",
"make_checkpointer",
]

View File

@@ -0,0 +1,9 @@
from .async_provider import make_checkpointer
from .provider import checkpointer_context, get_checkpointer, reset_checkpointer
__all__ = [
"get_checkpointer",
"reset_checkpointer",
"checkpointer_context",
"make_checkpointer",
]

View File

@@ -0,0 +1,106 @@
"""Async checkpointer factory.
Provides an **async context manager** for long-running async servers that need
proper resource cleanup.
Supported backends: memory, sqlite, postgres.
Usage (e.g. FastAPI lifespan)::
from deerflow.agents.checkpointer.async_provider import make_checkpointer
async with make_checkpointer() as checkpointer:
app.state.checkpointer = checkpointer # InMemorySaver if not configured
For sync usage see :mod:`deerflow.agents.checkpointer.provider`.
"""
from __future__ import annotations
import asyncio
import contextlib
import logging
from collections.abc import AsyncIterator
from langgraph.types import Checkpointer
from deerflow.agents.checkpointer.provider import (
POSTGRES_CONN_REQUIRED,
POSTGRES_INSTALL,
SQLITE_INSTALL,
)
from deerflow.config.app_config import get_app_config
from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Async factory
# ---------------------------------------------------------------------------
@contextlib.asynccontextmanager
async def _async_checkpointer(config) -> AsyncIterator[Checkpointer]:
"""Async context manager that constructs and tears down a checkpointer."""
if config.type == "memory":
from langgraph.checkpoint.memory import InMemorySaver
yield InMemorySaver()
return
if config.type == "sqlite":
try:
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
except ImportError as exc:
raise ImportError(SQLITE_INSTALL) from exc
conn_str = resolve_sqlite_conn_str(config.connection_string or "store.db")
await asyncio.to_thread(ensure_sqlite_parent_dir, conn_str)
async with AsyncSqliteSaver.from_conn_string(conn_str) as saver:
await saver.setup()
yield saver
return
if config.type == "postgres":
try:
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
except ImportError as exc:
raise ImportError(POSTGRES_INSTALL) from exc
if not config.connection_string:
raise ValueError(POSTGRES_CONN_REQUIRED)
async with AsyncPostgresSaver.from_conn_string(config.connection_string) as saver:
await saver.setup()
yield saver
return
raise ValueError(f"Unknown checkpointer type: {config.type!r}")
# ---------------------------------------------------------------------------
# Public async context manager
# ---------------------------------------------------------------------------
@contextlib.asynccontextmanager
async def make_checkpointer() -> AsyncIterator[Checkpointer]:
"""Async context manager that yields a checkpointer for the caller's lifetime.
Resources are opened on enter and closed on exit — no global state::
async with make_checkpointer() as checkpointer:
app.state.checkpointer = checkpointer
Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
"""
config = get_app_config()
if config.checkpointer is None:
from langgraph.checkpoint.memory import InMemorySaver
yield InMemorySaver()
return
async with _async_checkpointer(config.checkpointer) as saver:
yield saver

View File

@@ -0,0 +1,191 @@
"""Sync checkpointer factory.
Provides a **sync singleton** and a **sync context manager** for LangGraph
graph compilation and CLI tools.
Supported backends: memory, sqlite, postgres.
Usage::
from deerflow.agents.checkpointer.provider import get_checkpointer, checkpointer_context
# Singleton — reused across calls, closed on process exit
cp = get_checkpointer()
# One-shot — fresh connection, closed on block exit
with checkpointer_context() as cp:
graph.invoke(input, config={"configurable": {"thread_id": "1"}})
"""
from __future__ import annotations
import contextlib
import logging
from collections.abc import Iterator
from langgraph.types import Checkpointer
from deerflow.config.app_config import get_app_config
from deerflow.config.checkpointer_config import CheckpointerConfig
from deerflow.runtime.store._sqlite_utils import resolve_sqlite_conn_str
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Error message constants — imported by aio.provider too
# ---------------------------------------------------------------------------
SQLITE_INSTALL = "langgraph-checkpoint-sqlite is required for the SQLite checkpointer. Install it with: uv add langgraph-checkpoint-sqlite"
POSTGRES_INSTALL = "langgraph-checkpoint-postgres is required for the PostgreSQL checkpointer. Install it with: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool"
POSTGRES_CONN_REQUIRED = "checkpointer.connection_string is required for the postgres backend"
# ---------------------------------------------------------------------------
# Sync factory
# ---------------------------------------------------------------------------
@contextlib.contextmanager
def _sync_checkpointer_cm(config: CheckpointerConfig) -> Iterator[Checkpointer]:
"""Context manager that creates and tears down a sync checkpointer.
Returns a configured ``Checkpointer`` instance. Resource cleanup for any
underlying connections or pools is handled by higher-level helpers in
this module (such as the singleton factory or context manager); this
function does not return a separate cleanup callback.
"""
if config.type == "memory":
from langgraph.checkpoint.memory import InMemorySaver
logger.info("Checkpointer: using InMemorySaver (in-process, not persistent)")
yield InMemorySaver()
return
if config.type == "sqlite":
try:
from langgraph.checkpoint.sqlite import SqliteSaver
except ImportError as exc:
raise ImportError(SQLITE_INSTALL) from exc
conn_str = resolve_sqlite_conn_str(config.connection_string or "store.db")
with SqliteSaver.from_conn_string(conn_str) as saver:
saver.setup()
logger.info("Checkpointer: using SqliteSaver (%s)", conn_str)
yield saver
return
if config.type == "postgres":
try:
from langgraph.checkpoint.postgres import PostgresSaver
except ImportError as exc:
raise ImportError(POSTGRES_INSTALL) from exc
if not config.connection_string:
raise ValueError(POSTGRES_CONN_REQUIRED)
with PostgresSaver.from_conn_string(config.connection_string) as saver:
saver.setup()
logger.info("Checkpointer: using PostgresSaver")
yield saver
return
raise ValueError(f"Unknown checkpointer type: {config.type!r}")
# ---------------------------------------------------------------------------
# Sync singleton
# ---------------------------------------------------------------------------
_checkpointer: Checkpointer | None = None
_checkpointer_ctx = None # open context manager keeping the connection alive
def get_checkpointer() -> Checkpointer:
"""Return the global sync checkpointer singleton, creating it on first call.
Returns an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
Raises:
ImportError: If the required package for the configured backend is not installed.
ValueError: If ``connection_string`` is missing for a backend that requires it.
"""
global _checkpointer, _checkpointer_ctx
if _checkpointer is not None:
return _checkpointer
# Ensure app config is loaded before checking checkpointer config
# This prevents returning InMemorySaver when config.yaml actually has a checkpointer section
# but hasn't been loaded yet
from deerflow.config.app_config import _app_config
from deerflow.config.checkpointer_config import get_checkpointer_config
config = get_checkpointer_config()
if config is None and _app_config is None:
# Only load app config lazily when neither the app config nor an explicit
# checkpointer config has been initialized yet. This keeps tests that
# intentionally set the global checkpointer config isolated from any
# ambient config.yaml on disk.
try:
get_app_config()
except FileNotFoundError:
# In test environments without config.yaml, this is expected.
pass
config = get_checkpointer_config()
if config is None:
from langgraph.checkpoint.memory import InMemorySaver
logger.info("Checkpointer: using InMemorySaver (in-process, not persistent)")
_checkpointer = InMemorySaver()
return _checkpointer
_checkpointer_ctx = _sync_checkpointer_cm(config)
_checkpointer = _checkpointer_ctx.__enter__()
return _checkpointer
def reset_checkpointer() -> None:
"""Reset the sync singleton, forcing recreation on the next call.
Closes any open backend connections and clears the cached instance.
Useful in tests or after a configuration change.
"""
global _checkpointer, _checkpointer_ctx
if _checkpointer_ctx is not None:
try:
_checkpointer_ctx.__exit__(None, None, None)
except Exception:
logger.warning("Error during checkpointer cleanup", exc_info=True)
_checkpointer_ctx = None
_checkpointer = None
# ---------------------------------------------------------------------------
# Sync context manager
# ---------------------------------------------------------------------------
@contextlib.contextmanager
def checkpointer_context() -> Iterator[Checkpointer]:
"""Sync context manager that yields a checkpointer and cleans up on exit.
Unlike :func:`get_checkpointer`, this does **not** cache the instance —
each ``with`` block creates and destroys its own connection. Use it in
CLI scripts or tests where you want deterministic cleanup::
with checkpointer_context() as cp:
graph.invoke(input, config={"configurable": {"thread_id": "1"}})
Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
"""
config = get_app_config()
if config.checkpointer is None:
from langgraph.checkpoint.memory import InMemorySaver
yield InMemorySaver()
return
with _sync_checkpointer_cm(config.checkpointer) as saver:
yield saver

View File

@@ -0,0 +1,372 @@
"""Pure-argument factory for DeerFlow agents.
``create_deerflow_agent`` accepts plain Python arguments — no YAML files, no
global singletons. It is the SDK-level entry point sitting between the raw
``langchain.agents.create_agent`` primitive and the config-driven
``make_lead_agent`` application factory.
Note: the factory assembly itself is config-free, but some injected runtime
components (e.g. ``task_tool`` for subagent) may still read global config at
invocation time. Full config-free runtime is a Phase 2 goal.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from langchain.agents import create_agent
from langchain.agents.middleware import AgentMiddleware
from deerflow.agents.features import RuntimeFeatures
from deerflow.agents.middlewares.clarification_middleware import ClarificationMiddleware
from deerflow.agents.middlewares.dangling_tool_call_middleware import DanglingToolCallMiddleware
from deerflow.agents.middlewares.tool_error_handling_middleware import ToolErrorHandlingMiddleware
from deerflow.agents.thread_state import ThreadState
from deerflow.tools.builtins import ask_clarification_tool
if TYPE_CHECKING:
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.checkpoint.base import BaseCheckpointSaver
from langgraph.graph.state import CompiledStateGraph
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# TodoMiddleware prompts (minimal SDK version)
# ---------------------------------------------------------------------------
_TODO_SYSTEM_PROMPT = """
<todo_list_system>
You have access to the `write_todos` tool to help you manage and track complex multi-step objectives.
**CRITICAL RULES:**
- Mark todos as completed IMMEDIATELY after finishing each step - do NOT batch completions
- Keep EXACTLY ONE task as `in_progress` at any time (unless tasks can run in parallel)
- Update the todo list in REAL-TIME as you work - this gives users visibility into your progress
- DO NOT use this tool for simple tasks (< 3 steps) - just complete them directly
</todo_list_system>
"""
_TODO_TOOL_DESCRIPTION = "Use this tool to create and manage a structured task list for complex work sessions. Only use for complex tasks (3+ steps)."
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def create_deerflow_agent(
model: BaseChatModel,
tools: list[BaseTool] | None = None,
*,
system_prompt: str | None = None,
middleware: list[AgentMiddleware] | None = None,
features: RuntimeFeatures | None = None,
extra_middleware: list[AgentMiddleware] | None = None,
plan_mode: bool = False,
state_schema: type | None = None,
checkpointer: BaseCheckpointSaver | None = None,
name: str = "default",
) -> CompiledStateGraph:
"""Create a DeerFlow agent from plain Python arguments.
The factory assembly itself reads no config files. Some injected runtime
components (e.g. ``task_tool``) may still depend on global config at
invocation time — see Phase 2 roadmap for full config-free runtime.
Parameters
----------
model:
Chat model instance.
tools:
User-provided tools. Feature-injected tools are appended automatically.
system_prompt:
System message. ``None`` uses a minimal default.
middleware:
**Full takeover** — if provided, this exact list is used.
Cannot be combined with *features* or *extra_middleware*.
features:
Declarative feature flags. Cannot be combined with *middleware*.
extra_middleware:
Additional middlewares inserted into the auto-assembled chain via
``@Next``/``@Prev`` positioning. Cannot be used with *middleware*.
plan_mode:
Enable TodoMiddleware for task tracking.
state_schema:
LangGraph state type. Defaults to ``ThreadState``.
checkpointer:
Optional persistence backend.
name:
Agent name (passed to middleware that cares, e.g. ``MemoryMiddleware``).
Raises
------
ValueError
If both *middleware* and *features*/*extra_middleware* are provided.
"""
if middleware is not None and features is not None:
raise ValueError("Cannot specify both 'middleware' and 'features'. Use one or the other.")
if middleware is not None and extra_middleware:
raise ValueError("Cannot use 'extra_middleware' with 'middleware' (full takeover).")
if extra_middleware:
for mw in extra_middleware:
if not isinstance(mw, AgentMiddleware):
raise TypeError(f"extra_middleware items must be AgentMiddleware instances, got {type(mw).__name__}")
effective_tools: list[BaseTool] = list(tools or [])
effective_state = state_schema or ThreadState
if middleware is not None:
effective_middleware = list(middleware)
else:
feat = features or RuntimeFeatures()
effective_middleware, extra_tools = _assemble_from_features(
feat,
name=name,
plan_mode=plan_mode,
extra_middleware=extra_middleware or [],
)
# Deduplicate by tool name — user-provided tools take priority.
existing_names = {t.name for t in effective_tools}
for t in extra_tools:
if t.name not in existing_names:
effective_tools.append(t)
existing_names.add(t.name)
return create_agent(
model=model,
tools=effective_tools or None,
middleware=effective_middleware,
system_prompt=system_prompt,
state_schema=effective_state,
checkpointer=checkpointer,
name=name,
)
# ---------------------------------------------------------------------------
# Internal: feature-driven middleware assembly
# ---------------------------------------------------------------------------
def _assemble_from_features(
feat: RuntimeFeatures,
*,
name: str = "default",
plan_mode: bool = False,
extra_middleware: list[AgentMiddleware] | None = None,
) -> tuple[list[AgentMiddleware], list[BaseTool]]:
"""Build an ordered middleware chain + extra tools from *feat*.
Middleware order matches ``make_lead_agent`` (14 middlewares):
0-2. Sandbox infrastructure (ThreadData → Uploads → Sandbox)
3. DanglingToolCallMiddleware (always)
4. GuardrailMiddleware (guardrail feature)
5. ToolErrorHandlingMiddleware (always)
6. SummarizationMiddleware (summarization feature)
7. TodoMiddleware (plan_mode parameter)
8. TitleMiddleware (auto_title feature)
9. MemoryMiddleware (memory feature)
10. ViewImageMiddleware (vision feature)
11. SubagentLimitMiddleware (subagent feature)
12. LoopDetectionMiddleware (always)
13. ClarificationMiddleware (always last)
Two-phase ordering:
1. Built-in chain — fixed sequential append.
2. Extra middleware — inserted via @Next/@Prev.
Each feature value is handled as:
- ``False``: skip
- ``True``: create the built-in default middleware (not available for
``summarization`` and ``guardrail`` — these require a custom instance)
- ``AgentMiddleware`` instance: use directly (custom replacement)
"""
chain: list[AgentMiddleware] = []
extra_tools: list[BaseTool] = []
# --- [0-2] Sandbox infrastructure ---
if feat.sandbox is not False:
if isinstance(feat.sandbox, AgentMiddleware):
chain.append(feat.sandbox)
else:
from deerflow.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
from deerflow.agents.middlewares.uploads_middleware import UploadsMiddleware
from deerflow.sandbox.middleware import SandboxMiddleware
chain.append(ThreadDataMiddleware(lazy_init=True))
chain.append(UploadsMiddleware())
chain.append(SandboxMiddleware(lazy_init=True))
# --- [3] DanglingToolCall (always) ---
chain.append(DanglingToolCallMiddleware())
# --- [4] Guardrail ---
if feat.guardrail is not False:
if isinstance(feat.guardrail, AgentMiddleware):
chain.append(feat.guardrail)
else:
raise ValueError("guardrail=True requires a custom AgentMiddleware instance (no built-in GuardrailMiddleware yet)")
# --- [5] ToolErrorHandling (always) ---
chain.append(ToolErrorHandlingMiddleware())
# --- [6] Summarization ---
if feat.summarization is not False:
if isinstance(feat.summarization, AgentMiddleware):
chain.append(feat.summarization)
else:
raise ValueError("summarization=True requires a custom AgentMiddleware instance (SummarizationMiddleware needs a model argument)")
# --- [7] TodoMiddleware (plan_mode) ---
if plan_mode:
from deerflow.agents.middlewares.todo_middleware import TodoMiddleware
chain.append(TodoMiddleware(system_prompt=_TODO_SYSTEM_PROMPT, tool_description=_TODO_TOOL_DESCRIPTION))
# --- [8] Auto Title ---
if feat.auto_title is not False:
if isinstance(feat.auto_title, AgentMiddleware):
chain.append(feat.auto_title)
else:
from deerflow.agents.middlewares.title_middleware import TitleMiddleware
chain.append(TitleMiddleware())
# --- [9] Memory ---
if feat.memory is not False:
if isinstance(feat.memory, AgentMiddleware):
chain.append(feat.memory)
else:
from deerflow.agents.middlewares.memory_middleware import MemoryMiddleware
chain.append(MemoryMiddleware(agent_name=name))
# --- [10] Vision ---
if feat.vision is not False:
if isinstance(feat.vision, AgentMiddleware):
chain.append(feat.vision)
else:
from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware
chain.append(ViewImageMiddleware())
from deerflow.tools.builtins import view_image_tool
extra_tools.append(view_image_tool)
# --- [11] Subagent ---
if feat.subagent is not False:
if isinstance(feat.subagent, AgentMiddleware):
chain.append(feat.subagent)
else:
from deerflow.agents.middlewares.subagent_limit_middleware import SubagentLimitMiddleware
chain.append(SubagentLimitMiddleware())
from deerflow.tools.builtins import task_tool
extra_tools.append(task_tool)
# --- [12] LoopDetection (always) ---
from deerflow.agents.middlewares.loop_detection_middleware import LoopDetectionMiddleware
chain.append(LoopDetectionMiddleware())
# --- [13] Clarification (always last among built-ins) ---
chain.append(ClarificationMiddleware())
extra_tools.append(ask_clarification_tool)
# --- Insert extra_middleware via @Next/@Prev ---
if extra_middleware:
_insert_extra(chain, extra_middleware)
# Invariant: ClarificationMiddleware must always be last.
# @Next(ClarificationMiddleware) could push it off the tail.
clar_idx = next(i for i, m in enumerate(chain) if isinstance(m, ClarificationMiddleware))
if clar_idx != len(chain) - 1:
chain.append(chain.pop(clar_idx))
return chain, extra_tools
# ---------------------------------------------------------------------------
# Internal: extra middleware insertion with @Next/@Prev
# ---------------------------------------------------------------------------
def _insert_extra(chain: list[AgentMiddleware], extras: list[AgentMiddleware]) -> None:
"""Insert extra middlewares into *chain* using ``@Next``/``@Prev`` anchors.
Algorithm:
1. Validate: no middleware has both @Next and @Prev.
2. Conflict detection: two extras targeting same anchor (same or opposite direction) → error.
3. Insert unanchored extras before ClarificationMiddleware.
4. Insert anchored extras iteratively (supports cross-external anchoring).
5. If an anchor cannot be resolved after all rounds → error.
"""
next_targets: dict[type, type] = {}
prev_targets: dict[type, type] = {}
anchored: list[tuple[AgentMiddleware, str, type]] = []
unanchored: list[AgentMiddleware] = []
for mw in extras:
next_anchor = getattr(type(mw), "_next_anchor", None)
prev_anchor = getattr(type(mw), "_prev_anchor", None)
if next_anchor and prev_anchor:
raise ValueError(f"{type(mw).__name__} cannot have both @Next and @Prev")
if next_anchor:
if next_anchor in next_targets:
raise ValueError(f"Conflict: {type(mw).__name__} and {next_targets[next_anchor].__name__} both @Next({next_anchor.__name__})")
if next_anchor in prev_targets:
raise ValueError(f"Conflict: {type(mw).__name__} @Next({next_anchor.__name__}) and {prev_targets[next_anchor].__name__} @Prev({next_anchor.__name__}) — use cross-anchoring between extras instead")
next_targets[next_anchor] = type(mw)
anchored.append((mw, "next", next_anchor))
elif prev_anchor:
if prev_anchor in prev_targets:
raise ValueError(f"Conflict: {type(mw).__name__} and {prev_targets[prev_anchor].__name__} both @Prev({prev_anchor.__name__})")
if prev_anchor in next_targets:
raise ValueError(f"Conflict: {type(mw).__name__} @Prev({prev_anchor.__name__}) and {next_targets[prev_anchor].__name__} @Next({prev_anchor.__name__}) — use cross-anchoring between extras instead")
prev_targets[prev_anchor] = type(mw)
anchored.append((mw, "prev", prev_anchor))
else:
unanchored.append(mw)
# Unanchored → before ClarificationMiddleware
clarification_idx = next(i for i, m in enumerate(chain) if isinstance(m, ClarificationMiddleware))
for mw in unanchored:
chain.insert(clarification_idx, mw)
clarification_idx += 1
# Anchored → iterative insertion (supports external-to-external anchoring)
pending = list(anchored)
max_rounds = len(pending) + 1
for _ in range(max_rounds):
if not pending:
break
remaining = []
for mw, direction, anchor in pending:
idx = next(
(i for i, m in enumerate(chain) if isinstance(m, anchor)),
None,
)
if idx is None:
remaining.append((mw, direction, anchor))
continue
if direction == "next":
chain.insert(idx + 1, mw)
else:
chain.insert(idx, mw)
if len(remaining) == len(pending):
names = [type(m).__name__ for m, _, _ in remaining]
anchor_types = {a for _, _, a in remaining}
remaining_types = {type(m) for m, _, _ in remaining}
circular = anchor_types & remaining_types
if circular:
raise ValueError(f"Circular dependency among extra middlewares: {', '.join(t.__name__ for t in circular)}")
raise ValueError(f"Cannot resolve positions for {', '.join(names)} — anchors {', '.join(a.__name__ for _, _, a in remaining)} not found in chain")
pending = remaining

View File

@@ -0,0 +1,62 @@
"""Declarative feature flags and middleware positioning for create_deerflow_agent.
Pure data classes and decorators — no I/O, no side effects.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal
from langchain.agents.middleware import AgentMiddleware
@dataclass
class RuntimeFeatures:
"""Declarative feature flags for ``create_deerflow_agent``.
Most features accept:
- ``True``: use the built-in default middleware
- ``False``: disable
- An ``AgentMiddleware`` instance: use this custom implementation instead
``summarization`` and ``guardrail`` have no built-in default — they only
accept ``False`` (disable) or an ``AgentMiddleware`` instance (custom).
"""
sandbox: bool | AgentMiddleware = True
memory: bool | AgentMiddleware = False
summarization: Literal[False] | AgentMiddleware = False
subagent: bool | AgentMiddleware = False
vision: bool | AgentMiddleware = False
auto_title: bool | AgentMiddleware = False
guardrail: Literal[False] | AgentMiddleware = False
# ---------------------------------------------------------------------------
# Middleware positioning decorators
# ---------------------------------------------------------------------------
def Next(anchor: type[AgentMiddleware]):
"""Declare this middleware should be placed after *anchor* in the chain."""
if not (isinstance(anchor, type) and issubclass(anchor, AgentMiddleware)):
raise TypeError(f"@Next expects an AgentMiddleware subclass, got {anchor!r}")
def decorator(cls: type[AgentMiddleware]) -> type[AgentMiddleware]:
cls._next_anchor = anchor # type: ignore[attr-defined]
return cls
return decorator
def Prev(anchor: type[AgentMiddleware]):
"""Declare this middleware should be placed before *anchor* in the chain."""
if not (isinstance(anchor, type) and issubclass(anchor, AgentMiddleware)):
raise TypeError(f"@Prev expects an AgentMiddleware subclass, got {anchor!r}")
def decorator(cls: type[AgentMiddleware]) -> type[AgentMiddleware]:
cls._prev_anchor = anchor # type: ignore[attr-defined]
return cls
return decorator

View File

@@ -0,0 +1,3 @@
from .agent import make_lead_agent
__all__ = ["make_lead_agent"]

View File

@@ -0,0 +1,350 @@
import logging
from langchain.agents import create_agent
from langchain.agents.middleware import AgentMiddleware, SummarizationMiddleware
from langchain_core.runnables import RunnableConfig
from deerflow.agents.lead_agent.prompt import apply_prompt_template
from deerflow.agents.middlewares.clarification_middleware import ClarificationMiddleware
from deerflow.agents.middlewares.loop_detection_middleware import LoopDetectionMiddleware
from deerflow.agents.middlewares.memory_middleware import MemoryMiddleware
from deerflow.agents.middlewares.subagent_limit_middleware import SubagentLimitMiddleware
from deerflow.agents.middlewares.title_middleware import TitleMiddleware
from deerflow.agents.middlewares.todo_middleware import TodoMiddleware
from deerflow.agents.middlewares.token_usage_middleware import TokenUsageMiddleware
from deerflow.agents.middlewares.tool_error_handling_middleware import build_lead_runtime_middlewares
from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware
from deerflow.agents.thread_state import ThreadState
from deerflow.config.agents_config import load_agent_config
from deerflow.config.app_config import get_app_config
from deerflow.config.summarization_config import get_summarization_config
from deerflow.models import create_chat_model
logger = logging.getLogger(__name__)
def _resolve_model_name(requested_model_name: str | None = None) -> str:
"""Resolve a runtime model name safely, falling back to default if invalid. Returns None if no models are configured."""
app_config = get_app_config()
default_model_name = app_config.models[0].name if app_config.models else None
if default_model_name is None:
raise ValueError("No chat models are configured. Please configure at least one model in config.yaml.")
if requested_model_name and app_config.get_model_config(requested_model_name):
return requested_model_name
if requested_model_name and requested_model_name != default_model_name:
logger.warning(f"Model '{requested_model_name}' not found in config; fallback to default model '{default_model_name}'.")
return default_model_name
def _create_summarization_middleware() -> SummarizationMiddleware | None:
"""Create and configure the summarization middleware from config."""
config = get_summarization_config()
if not config.enabled:
return None
# Prepare trigger parameter
trigger = None
if config.trigger is not None:
if isinstance(config.trigger, list):
trigger = [t.to_tuple() for t in config.trigger]
else:
trigger = config.trigger.to_tuple()
# Prepare keep parameter
keep = config.keep.to_tuple()
# Prepare model parameter
if config.model_name:
model = create_chat_model(name=config.model_name, thinking_enabled=False)
else:
# Use a lightweight model for summarization to save costs
# Falls back to default model if not explicitly specified
model = create_chat_model(thinking_enabled=False)
# Prepare kwargs
kwargs = {
"model": model,
"trigger": trigger,
"keep": keep,
}
if config.trim_tokens_to_summarize is not None:
kwargs["trim_tokens_to_summarize"] = config.trim_tokens_to_summarize
if config.summary_prompt is not None:
kwargs["summary_prompt"] = config.summary_prompt
return SummarizationMiddleware(**kwargs)
def _create_todo_list_middleware(is_plan_mode: bool) -> TodoMiddleware | None:
"""Create and configure the TodoList middleware.
Args:
is_plan_mode: Whether to enable plan mode with TodoList middleware.
Returns:
TodoMiddleware instance if plan mode is enabled, None otherwise.
"""
if not is_plan_mode:
return None
# Custom prompts matching DeerFlow's style
system_prompt = """
<todo_list_system>
You have access to the `write_todos` tool to help you manage and track complex multi-step objectives.
**CRITICAL RULES:**
- Mark todos as completed IMMEDIATELY after finishing each step - do NOT batch completions
- Keep EXACTLY ONE task as `in_progress` at any time (unless tasks can run in parallel)
- Update the todo list in REAL-TIME as you work - this gives users visibility into your progress
- DO NOT use this tool for simple tasks (< 3 steps) - just complete them directly
**When to Use:**
This tool is designed for complex objectives that require systematic tracking:
- Complex multi-step tasks requiring 3+ distinct steps
- Non-trivial tasks needing careful planning and execution
- User explicitly requests a todo list
- User provides multiple tasks (numbered or comma-separated list)
- The plan may need revisions based on intermediate results
**When NOT to Use:**
- Single, straightforward tasks
- Trivial tasks (< 3 steps)
- Purely conversational or informational requests
- Simple tool calls where the approach is obvious
**Best Practices:**
- Break down complex tasks into smaller, actionable steps
- Use clear, descriptive task names
- Remove tasks that become irrelevant
- Add new tasks discovered during implementation
- Don't be afraid to revise the todo list as you learn more
**Task Management:**
Writing todos takes time and tokens - use it when helpful for managing complex problems, not for simple requests.
</todo_list_system>
"""
tool_description = """Use this tool to create and manage a structured task list for complex work sessions.
**IMPORTANT: Only use this tool for complex tasks (3+ steps). For simple requests, just do the work directly.**
## When to Use
Use this tool in these scenarios:
1. **Complex multi-step tasks**: When a task requires 3 or more distinct steps or actions
2. **Non-trivial tasks**: Tasks requiring careful planning or multiple operations
3. **User explicitly requests todo list**: When the user directly asks you to track tasks
4. **Multiple tasks**: When users provide a list of things to be done
5. **Dynamic planning**: When the plan may need updates based on intermediate results
## When NOT to Use
Skip this tool when:
1. The task is straightforward and takes less than 3 steps
2. The task is trivial and tracking provides no benefit
3. The task is purely conversational or informational
4. It's clear what needs to be done and you can just do it
## How to Use
1. **Starting a task**: Mark it as `in_progress` BEFORE beginning work
2. **Completing a task**: Mark it as `completed` IMMEDIATELY after finishing
3. **Updating the list**: Add new tasks, remove irrelevant ones, or update descriptions as needed
4. **Multiple updates**: You can make several updates at once (e.g., complete one task and start the next)
## Task States
- `pending`: Task not yet started
- `in_progress`: Currently working on (can have multiple if tasks run in parallel)
- `completed`: Task finished successfully
## Task Completion Requirements
**CRITICAL: Only mark a task as completed when you have FULLY accomplished it.**
Never mark a task as completed if:
- There are unresolved issues or errors
- Work is partial or incomplete
- You encountered blockers preventing completion
- You couldn't find necessary resources or dependencies
- Quality standards haven't been met
If blocked, keep the task as `in_progress` and create a new task describing what needs to be resolved.
## Best Practices
- Create specific, actionable items
- Break complex tasks into smaller, manageable steps
- Use clear, descriptive task names
- Update task status in real-time as you work
- Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
- Remove tasks that are no longer relevant
- **IMPORTANT**: When you write the todo list, mark your first task(s) as `in_progress` immediately
- **IMPORTANT**: Unless all tasks are completed, always have at least one task `in_progress` to show progress
Being proactive with task management demonstrates thoroughness and ensures all requirements are completed successfully.
**Remember**: If you only need a few tool calls to complete a task and it's clear what to do, it's better to just do the task directly and NOT use this tool at all.
"""
return TodoMiddleware(system_prompt=system_prompt, tool_description=tool_description)
# ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
# UploadsMiddleware should be after ThreadDataMiddleware to access thread_id
# DanglingToolCallMiddleware patches missing ToolMessages before model sees the history
# SummarizationMiddleware should be early to reduce context before other processing
# TodoListMiddleware should be before ClarificationMiddleware to allow todo management
# TitleMiddleware generates title after first exchange
# MemoryMiddleware queues conversation for memory update (after TitleMiddleware)
# ViewImageMiddleware should be before ClarificationMiddleware to inject image details before LLM
# ToolErrorHandlingMiddleware should be before ClarificationMiddleware to convert tool exceptions to ToolMessages
# ClarificationMiddleware should be last to intercept clarification requests after model calls
def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_name: str | None = None, custom_middlewares: list[AgentMiddleware] | None = None):
"""Build middleware chain based on runtime configuration.
Args:
config: Runtime configuration containing configurable options like is_plan_mode.
agent_name: If provided, MemoryMiddleware will use per-agent memory storage.
custom_middlewares: Optional list of custom middlewares to inject into the chain.
Returns:
List of middleware instances.
"""
middlewares = build_lead_runtime_middlewares(lazy_init=True)
# Add summarization middleware if enabled
summarization_middleware = _create_summarization_middleware()
if summarization_middleware is not None:
middlewares.append(summarization_middleware)
# Add TodoList middleware if plan mode is enabled
is_plan_mode = config.get("configurable", {}).get("is_plan_mode", False)
todo_list_middleware = _create_todo_list_middleware(is_plan_mode)
if todo_list_middleware is not None:
middlewares.append(todo_list_middleware)
# Add TokenUsageMiddleware when token_usage tracking is enabled
if get_app_config().token_usage.enabled:
middlewares.append(TokenUsageMiddleware())
# Add TitleMiddleware
middlewares.append(TitleMiddleware())
# Add MemoryMiddleware (after TitleMiddleware)
middlewares.append(MemoryMiddleware(agent_name=agent_name))
# Add ViewImageMiddleware only if the current model supports vision.
# Use the resolved runtime model_name from make_lead_agent to avoid stale config values.
app_config = get_app_config()
model_config = app_config.get_model_config(model_name) if model_name else None
if model_config is not None and model_config.supports_vision:
middlewares.append(ViewImageMiddleware())
# Add DeferredToolFilterMiddleware to hide deferred tool schemas from model binding
if app_config.tool_search.enabled:
from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
middlewares.append(DeferredToolFilterMiddleware())
# Add SubagentLimitMiddleware to truncate excess parallel task calls
subagent_enabled = config.get("configurable", {}).get("subagent_enabled", False)
if subagent_enabled:
max_concurrent_subagents = config.get("configurable", {}).get("max_concurrent_subagents", 3)
middlewares.append(SubagentLimitMiddleware(max_concurrent=max_concurrent_subagents))
# LoopDetectionMiddleware — detect and break repetitive tool call loops
middlewares.append(LoopDetectionMiddleware())
# Inject custom middlewares before ClarificationMiddleware
if custom_middlewares:
middlewares.extend(custom_middlewares)
# ClarificationMiddleware should always be last
middlewares.append(ClarificationMiddleware())
return middlewares
def make_lead_agent(config: RunnableConfig):
# Lazy import to avoid circular dependency
from deerflow.tools import get_available_tools
from deerflow.tools.builtins import setup_agent
cfg = config.get("configurable", {})
thinking_enabled = cfg.get("thinking_enabled", True)
reasoning_effort = cfg.get("reasoning_effort", None)
requested_model_name: str | None = cfg.get("model_name") or cfg.get("model")
is_plan_mode = cfg.get("is_plan_mode", False)
subagent_enabled = cfg.get("subagent_enabled", False)
max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)
is_bootstrap = cfg.get("is_bootstrap", False)
agent_name = cfg.get("agent_name")
agent_config = load_agent_config(agent_name) if not is_bootstrap else None
# Custom agent model from agent config (if any), or None to let _resolve_model_name pick the default
agent_model_name = agent_config.model if agent_config and agent_config.model else None
# Final model name resolution: request → agent config → global default, with fallback for unknown names
model_name = _resolve_model_name(requested_model_name or agent_model_name)
app_config = get_app_config()
model_config = app_config.get_model_config(model_name)
if model_config is None:
raise ValueError("No chat model could be resolved. Please configure at least one model in config.yaml or provide a valid 'model_name'/'model' in the request.")
if thinking_enabled and not model_config.supports_thinking:
logger.warning(f"Thinking mode is enabled but model '{model_name}' does not support it; fallback to non-thinking mode.")
thinking_enabled = False
logger.info(
"Create Agent(%s) -> thinking_enabled: %s, reasoning_effort: %s, model_name: %s, is_plan_mode: %s, subagent_enabled: %s, max_concurrent_subagents: %s",
agent_name or "default",
thinking_enabled,
reasoning_effort,
model_name,
is_plan_mode,
subagent_enabled,
max_concurrent_subagents,
)
# Inject run metadata for LangSmith trace tagging
if "metadata" not in config:
config["metadata"] = {}
config["metadata"].update(
{
"agent_name": agent_name or "default",
"model_name": model_name or "default",
"thinking_enabled": thinking_enabled,
"reasoning_effort": reasoning_effort,
"is_plan_mode": is_plan_mode,
"subagent_enabled": subagent_enabled,
}
)
if is_bootstrap:
# Special bootstrap agent with minimal prompt for initial custom agent creation flow
return create_agent(
model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled) + [setup_agent],
middleware=_build_middlewares(config, model_name=model_name),
system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, available_skills=set(["bootstrap"])),
state_schema=ThreadState,
)
# Default lead agent (unchanged behavior)
return create_agent(
model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort),
tools=get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled),
middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name),
system_prompt=apply_prompt_template(
subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, agent_name=agent_name, available_skills=set(agent_config.skills) if agent_config and agent_config.skills is not None else None
),
state_schema=ThreadState,
)

View File

@@ -0,0 +1,727 @@
import asyncio
import logging
import threading
from datetime import datetime
from functools import lru_cache
from deerflow.config.agents_config import load_agent_soul
from deerflow.skills import load_skills
from deerflow.skills.types import Skill
from deerflow.subagents import get_available_subagent_names
logger = logging.getLogger(__name__)
_ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS = 5.0
_enabled_skills_lock = threading.Lock()
_enabled_skills_cache: list[Skill] | None = None
_enabled_skills_refresh_active = False
_enabled_skills_refresh_version = 0
_enabled_skills_refresh_event = threading.Event()
def _load_enabled_skills_sync() -> list[Skill]:
return list(load_skills(enabled_only=True))
def _start_enabled_skills_refresh_thread() -> None:
threading.Thread(
target=_refresh_enabled_skills_cache_worker,
name="deerflow-enabled-skills-loader",
daemon=True,
).start()
def _refresh_enabled_skills_cache_worker() -> None:
global _enabled_skills_cache, _enabled_skills_refresh_active
while True:
with _enabled_skills_lock:
target_version = _enabled_skills_refresh_version
try:
skills = _load_enabled_skills_sync()
except Exception:
logger.exception("Failed to load enabled skills for prompt injection")
skills = []
with _enabled_skills_lock:
if _enabled_skills_refresh_version == target_version:
_enabled_skills_cache = skills
_enabled_skills_refresh_active = False
_enabled_skills_refresh_event.set()
return
# A newer invalidation happened while loading. Keep the worker alive
# and loop again so the cache always converges on the latest version.
_enabled_skills_cache = None
def _ensure_enabled_skills_cache() -> threading.Event:
global _enabled_skills_refresh_active
with _enabled_skills_lock:
if _enabled_skills_cache is not None:
_enabled_skills_refresh_event.set()
return _enabled_skills_refresh_event
if _enabled_skills_refresh_active:
return _enabled_skills_refresh_event
_enabled_skills_refresh_active = True
_enabled_skills_refresh_event.clear()
_start_enabled_skills_refresh_thread()
return _enabled_skills_refresh_event
def _invalidate_enabled_skills_cache() -> threading.Event:
global _enabled_skills_cache, _enabled_skills_refresh_active, _enabled_skills_refresh_version
_get_cached_skills_prompt_section.cache_clear()
with _enabled_skills_lock:
_enabled_skills_cache = None
_enabled_skills_refresh_version += 1
_enabled_skills_refresh_event.clear()
if _enabled_skills_refresh_active:
return _enabled_skills_refresh_event
_enabled_skills_refresh_active = True
_start_enabled_skills_refresh_thread()
return _enabled_skills_refresh_event
def prime_enabled_skills_cache() -> None:
_ensure_enabled_skills_cache()
def warm_enabled_skills_cache(timeout_seconds: float = _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS) -> bool:
if _ensure_enabled_skills_cache().wait(timeout=timeout_seconds):
return True
logger.warning("Timed out waiting %.1fs for enabled skills cache warm-up", timeout_seconds)
return False
def _get_enabled_skills():
with _enabled_skills_lock:
cached = _enabled_skills_cache
if cached is not None:
return list(cached)
_ensure_enabled_skills_cache()
return []
def _skill_mutability_label(category: str) -> str:
return "[custom, editable]" if category == "custom" else "[built-in]"
def clear_skills_system_prompt_cache() -> None:
_invalidate_enabled_skills_cache()
async def refresh_skills_system_prompt_cache_async() -> None:
await asyncio.to_thread(_invalidate_enabled_skills_cache().wait)
def _reset_skills_system_prompt_cache_state() -> None:
global _enabled_skills_cache, _enabled_skills_refresh_active, _enabled_skills_refresh_version
_get_cached_skills_prompt_section.cache_clear()
with _enabled_skills_lock:
_enabled_skills_cache = None
_enabled_skills_refresh_active = False
_enabled_skills_refresh_version = 0
_enabled_skills_refresh_event.clear()
def _refresh_enabled_skills_cache() -> None:
"""Backward-compatible test helper for direct synchronous reload."""
try:
skills = _load_enabled_skills_sync()
except Exception:
logger.exception("Failed to load enabled skills for prompt injection")
skills = []
with _enabled_skills_lock:
_enabled_skills_cache = skills
_enabled_skills_refresh_active = False
_enabled_skills_refresh_event.set()
def _build_skill_evolution_section(skill_evolution_enabled: bool) -> str:
if not skill_evolution_enabled:
return ""
return """
## Skill Self-Evolution
After completing a task, consider creating or updating a skill when:
- The task required 5+ tool calls to resolve
- You overcame non-obvious errors or pitfalls
- The user corrected your approach and the corrected version worked
- You discovered a non-trivial, recurring workflow
If you used a skill and encountered issues not covered by it, patch it immediately.
Prefer patch over edit. Before creating a new skill, confirm with the user first.
Skip simple one-off tasks.
"""
def _build_subagent_section(max_concurrent: int) -> str:
"""Build the subagent system prompt section with dynamic concurrency limit.
Args:
max_concurrent: Maximum number of concurrent subagent calls allowed per response.
Returns:
Formatted subagent section string.
"""
n = max_concurrent
bash_available = "bash" in get_available_subagent_names()
available_subagents = (
"- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.\n- **bash**: For command execution (git, build, test, deploy operations)"
if bash_available
else "- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.\n"
"- **bash**: Not available in the current sandbox configuration. Use direct file/web tools or switch to AioSandboxProvider for isolated shell access."
)
direct_tool_examples = "bash, ls, read_file, web_search, etc." if bash_available else "ls, read_file, web_search, etc."
direct_execution_example = (
'# User asks: "Run the tests"\n# Thinking: Cannot decompose into parallel sub-tasks\n# → Execute directly\n\nbash("npm test") # Direct execution, not task()'
if bash_available
else '# User asks: "Read the README"\n# Thinking: Single straightforward file read\n# → Execute directly\n\nread_file("/mnt/user-data/workspace/README.md") # Direct execution, not task()'
)
return f"""<subagent_system>
**🚀 SUBAGENT MODE ACTIVE - DECOMPOSE, DELEGATE, SYNTHESIZE**
You are running with subagent capabilities enabled. Your role is to be a **task orchestrator**:
1. **DECOMPOSE**: Break complex tasks into parallel sub-tasks
2. **DELEGATE**: Launch multiple subagents simultaneously using parallel `task` calls
3. **SYNTHESIZE**: Collect and integrate results into a coherent answer
**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
**⛔ HARD CONCURRENCY LIMIT: MAXIMUM {n} `task` CALLS PER RESPONSE. THIS IS NOT OPTIONAL.**
- Each response, you may include **at most {n}** `task` tool calls. Any excess calls are **silently discarded** by the system — you will lose that work.
- **Before launching subagents, you MUST count your sub-tasks in your thinking:**
- If count ≤ {n}: Launch all in this response.
- If count > {n}: **Pick the {n} most important/foundational sub-tasks for this turn.** Save the rest for the next turn.
- **Multi-batch execution** (for >{n} sub-tasks):
- Turn 1: Launch sub-tasks 1-{n} in parallel → wait for results
- Turn 2: Launch next batch in parallel → wait for results
- ... continue until all sub-tasks are complete
- Final turn: Synthesize ALL results into a coherent answer
- **Example thinking pattern**: "I identified 6 sub-tasks. Since the limit is {n} per turn, I will launch the first {n} now, and the rest in the next turn."
**Available Subagents:**
{available_subagents}
**Your Orchestration Strategy:**
✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
For complex queries, break them down into focused sub-tasks and execute in parallel batches (max {n} per turn):
**Example 1: "Why is Tencent's stock price declining?" (3 sub-tasks → 1 batch)**
→ Turn 1: Launch 3 subagents in parallel:
- Subagent 1: Recent financial reports, earnings data, and revenue trends
- Subagent 2: Negative news, controversies, and regulatory issues
- Subagent 3: Industry trends, competitor performance, and market sentiment
→ Turn 2: Synthesize results
**Example 2: "Compare 5 cloud providers" (5 sub-tasks → multi-batch)**
→ Turn 1: Launch {n} subagents in parallel (first batch)
→ Turn 2: Launch remaining subagents in parallel
→ Final turn: Synthesize ALL results into comprehensive comparison
**Example 3: "Refactor the authentication system"**
→ Turn 1: Launch 3 subagents in parallel:
- Subagent 1: Analyze current auth implementation and technical debt
- Subagent 2: Research best practices and security patterns
- Subagent 3: Review related tests, documentation, and vulnerabilities
→ Turn 2: Synthesize results
✅ **USE Parallel Subagents (max {n} per turn) when:**
- **Complex research questions**: Requires multiple information sources or perspectives
- **Multi-aspect analysis**: Task has several independent dimensions to explore
- **Large codebases**: Need to analyze different parts simultaneously
- **Comprehensive investigations**: Questions requiring thorough coverage from multiple angles
❌ **DO NOT use subagents (execute directly) when:**
- **Task cannot be decomposed**: If you can't break it into 2+ meaningful parallel sub-tasks, execute directly
- **Ultra-simple actions**: Read one file, quick edits, single commands
- **Need immediate clarification**: Must ask user before proceeding
- **Meta conversation**: Questions about conversation history
- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
**CRITICAL WORKFLOW** (STRICTLY follow this before EVERY action):
1. **COUNT**: In your thinking, list all sub-tasks and count them explicitly: "I have N sub-tasks"
2. **PLAN BATCHES**: If N > {n}, explicitly plan which sub-tasks go in which batch:
- "Batch 1 (this turn): first {n} sub-tasks"
- "Batch 2 (next turn): next batch of sub-tasks"
3. **EXECUTE**: Launch ONLY the current batch (max {n} `task` calls). Do NOT launch sub-tasks from future batches.
4. **REPEAT**: After results return, launch the next batch. Continue until all batches complete.
5. **SYNTHESIZE**: After ALL batches are done, synthesize all results.
6. **Cannot decompose** → Execute directly using available tools ({direct_tool_examples})
**⛔ VIOLATION: Launching more than {n} `task` calls in a single response is a HARD ERROR. The system WILL discard excess calls and you WILL lose work. Always batch.**
**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
**How It Works:**
- The task tool runs subagents asynchronously in the background
- The backend automatically polls for completion (you don't need to poll)
- The tool call will block until the subagent completes its work
- Once complete, the result is returned to you directly
**Usage Example 1 - Single Batch (≤{n} sub-tasks):**
```python
# User asks: "Why is Tencent's stock price declining?"
# Thinking: 3 sub-tasks → fits in 1 batch
# Turn 1: Launch 3 subagents in parallel
task(description="Tencent financial data", prompt="...", subagent_type="general-purpose")
task(description="Tencent news & regulation", prompt="...", subagent_type="general-purpose")
task(description="Industry & market trends", prompt="...", subagent_type="general-purpose")
# All 3 run in parallel → synthesize results
```
**Usage Example 2 - Multiple Batches (>{n} sub-tasks):**
```python
# User asks: "Compare AWS, Azure, GCP, Alibaba Cloud, and Oracle Cloud"
# Thinking: 5 sub-tasks → need multiple batches (max {n} per batch)
# Turn 1: Launch first batch of {n}
task(description="AWS analysis", prompt="...", subagent_type="general-purpose")
task(description="Azure analysis", prompt="...", subagent_type="general-purpose")
task(description="GCP analysis", prompt="...", subagent_type="general-purpose")
# Turn 2: Launch remaining batch (after first batch completes)
task(description="Alibaba Cloud analysis", prompt="...", subagent_type="general-purpose")
task(description="Oracle Cloud analysis", prompt="...", subagent_type="general-purpose")
# Turn 3: Synthesize ALL results from both batches
```
**Counter-Example - Direct Execution (NO subagents):**
```python
{direct_execution_example}
```
**CRITICAL**:
- **Max {n} `task` calls per turn** - the system enforces this, excess calls are discarded
- Only use `task` when you can launch 2+ subagents in parallel
- Single task = No value from subagents = Execute directly
- For >{n} sub-tasks, use sequential batches of {n} across multiple turns
</subagent_system>"""
SYSTEM_PROMPT_TEMPLATE = """
<role>
You are {agent_name}, an open-source super agent.
</role>
{soul}
{memory_context}
<thinking_style>
- Think concisely and strategically about the user's request BEFORE taking action
- Break down the task: What is clear? What is ambiguous? What is missing?
- **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
{subagent_thinking}- Never write down your full final answer or report in thinking process, but only outline
- CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
- Your response must contain the actual answer, not just a reference to what you thought about
</thinking_style>
<clarification_system>
**WORKFLOW PRIORITY: CLARIFY → PLAN → ACT**
1. **FIRST**: Analyze the request in your thinking - identify what's unclear, missing, or ambiguous
2. **SECOND**: If clarification is needed, call `ask_clarification` tool IMMEDIATELY - do NOT start working
3. **THIRD**: Only after all clarifications are resolved, proceed with planning and execution
**CRITICAL RULE: Clarification ALWAYS comes BEFORE action. Never start working and clarify mid-execution.**
**MANDATORY Clarification Scenarios - You MUST call ask_clarification BEFORE starting work when:**
1. **Missing Information** (`missing_info`): Required details not provided
- Example: User says "create a web scraper" but doesn't specify the target website
- Example: "Deploy the app" without specifying environment
- **REQUIRED ACTION**: Call ask_clarification to get the missing information
2. **Ambiguous Requirements** (`ambiguous_requirement`): Multiple valid interpretations exist
- Example: "Optimize the code" could mean performance, readability, or memory usage
- Example: "Make it better" is unclear what aspect to improve
- **REQUIRED ACTION**: Call ask_clarification to clarify the exact requirement
3. **Approach Choices** (`approach_choice`): Several valid approaches exist
- Example: "Add authentication" could use JWT, OAuth, session-based, or API keys
- Example: "Store data" could use database, files, cache, etc.
- **REQUIRED ACTION**: Call ask_clarification to let user choose the approach
4. **Risky Operations** (`risk_confirmation`): Destructive actions need confirmation
- Example: Deleting files, modifying production configs, database operations
- Example: Overwriting existing code or data
- **REQUIRED ACTION**: Call ask_clarification to get explicit confirmation
5. **Suggestions** (`suggestion`): You have a recommendation but want approval
- Example: "I recommend refactoring this code. Should I proceed?"
- **REQUIRED ACTION**: Call ask_clarification to get approval
**STRICT ENFORCEMENT:**
- ❌ DO NOT start working and then ask for clarification mid-execution - clarify FIRST
- ❌ DO NOT skip clarification for "efficiency" - accuracy matters more than speed
- ❌ DO NOT make assumptions when information is missing - ALWAYS ask
- ❌ DO NOT proceed with guesses - STOP and call ask_clarification first
- ✅ Analyze the request in thinking → Identify unclear aspects → Ask BEFORE any action
- ✅ If you identify the need for clarification in your thinking, you MUST call the tool IMMEDIATELY
- ✅ After calling ask_clarification, execution will be interrupted automatically
- ✅ Wait for user response - do NOT continue with assumptions
**How to Use:**
```python
ask_clarification(
question="Your specific question here?",
clarification_type="missing_info", # or other type
context="Why you need this information", # optional but recommended
options=["option1", "option2"] # optional, for choices
)
```
**Example:**
User: "Deploy the application"
You (thinking): Missing environment info - I MUST ask for clarification
You (action): ask_clarification(
question="Which environment should I deploy to?",
clarification_type="approach_choice",
context="I need to know the target environment for proper configuration",
options=["development", "staging", "production"]
)
[Execution stops - wait for user response]
User: "staging"
You: "Deploying to staging..." [proceed]
</clarification_system>
{skills_section}
{deferred_tools_section}
{subagent_section}
<working_directory existed="true">
- User uploads: `/mnt/user-data/uploads` - Files uploaded by the user (automatically listed in context)
- User workspace: `/mnt/user-data/workspace` - Working directory for temporary files
- Output files: `/mnt/user-data/outputs` - Final deliverables must be saved here
**File Management:**
- Uploaded files are automatically listed in the <uploaded_files> section before each request
- Use `read_file` tool to read uploaded files using their paths from the list
- For PDF, PPT, Excel, and Word files, converted Markdown versions (*.md) are available alongside originals
- All temporary work happens in `/mnt/user-data/workspace`
- Treat `/mnt/user-data/workspace` as your default current working directory for coding and file-editing tasks
- When writing scripts or commands that create/read files from the workspace, prefer relative paths such as `hello.txt`, `../uploads/data.csv`, and `../outputs/report.md`
- Avoid hardcoding `/mnt/user-data/...` inside generated scripts when a relative path from the workspace is enough
- Final deliverables must be copied to `/mnt/user-data/outputs` and presented using `present_file` tool
{acp_section}
</working_directory>
<response_style>
- Clear and Concise: Avoid over-formatting unless requested
- Natural Tone: Use paragraphs and prose, not bullet points by default
- Action-Oriented: Focus on delivering results, not explaining processes
</response_style>
<citations>
**CRITICAL: Always include citations when using web search results**
- **When to Use**: MANDATORY after web_search, web_fetch, or any external information source
- **Format**: Use Markdown link format `[citation:TITLE](URL)` immediately after the claim
- **Placement**: Inline citations should appear right after the sentence or claim they support
- **Sources Section**: Also collect all citations in a "Sources" section at the end of reports
**Example - Inline Citations:**
```markdown
The key AI trends for 2026 include enhanced reasoning capabilities and multimodal integration
[citation:AI Trends 2026](https://techcrunch.com/ai-trends).
Recent breakthroughs in language models have also accelerated progress
[citation:OpenAI Research](https://openai.com/research).
```
**Example - Deep Research Report with Citations:**
```markdown
## Executive Summary
DeerFlow is an open-source AI agent framework that gained significant traction in early 2026
[citation:GitHub Repository](https://github.com/bytedance/deer-flow). The project focuses on
providing a production-ready agent system with sandbox execution and memory management
[citation:DeerFlow Documentation](https://deer-flow.dev/docs).
## Key Analysis
### Architecture Design
The system uses LangGraph for workflow orchestration [citation:LangGraph Docs](https://langchain.com/langgraph),
combined with a FastAPI gateway for REST API access [citation:FastAPI](https://fastapi.tiangolo.com).
## Sources
### Primary Sources
- [GitHub Repository](https://github.com/bytedance/deer-flow) - Official source code and documentation
- [DeerFlow Documentation](https://deer-flow.dev/docs) - Technical specifications
### Media Coverage
- [AI Trends 2026](https://techcrunch.com/ai-trends) - Industry analysis
```
**CRITICAL: Sources section format:**
- Every item in the Sources section MUST be a clickable markdown link with URL
- Use standard markdown link `[Title](URL) - Description` format (NOT `[citation:...]` format)
- The `[citation:Title](URL)` format is ONLY for inline citations within the report body
- ❌ WRONG: `GitHub 仓库 - 官方源代码和文档` (no URL!)
- ❌ WRONG in Sources: `[citation:GitHub Repository](url)` (citation prefix is for inline only!)
- ✅ RIGHT in Sources: `[GitHub Repository](https://github.com/bytedance/deer-flow) - 官方源代码和文档`
**WORKFLOW for Research Tasks:**
1. Use web_search to find sources → Extract {{title, url, snippet}} from results
2. Write content with inline citations: `claim [citation:Title](url)`
3. Collect all citations in a "Sources" section at the end
4. NEVER write claims without citations when sources are available
**CRITICAL RULES:**
- ❌ DO NOT write research content without citations
- ❌ DO NOT forget to extract URLs from search results
- ✅ ALWAYS add `[citation:Title](URL)` after claims from external sources
- ✅ ALWAYS include a "Sources" section listing all references
</citations>
<critical_reminders>
- **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
{subagent_reminder}- Skill First: Always load the relevant skill before starting **complex** tasks.
- Progressive Loading: Load resources incrementally as referenced in skills
- Output Files: Final deliverables must be in `/mnt/user-data/outputs`
- Clarity: Be direct and helpful, avoid unnecessary meta-commentary
- Including Images and Mermaid: Images and Mermaid diagrams are always welcomed in the Markdown format, and you're encouraged to use `![Image Description](image_path)\n\n` or "```mermaid" to display images in response or Markdown files
- Multi-task: Better utilize parallel tool calling to call multiple tools at one time for better performance
- Language Consistency: Keep using the same language as user's
- Always Respond: Your thinking is internal. You MUST always provide a visible response to the user after thinking.
</critical_reminders>
"""
def _get_memory_context(agent_name: str | None = None) -> str:
"""Get memory context for injection into system prompt.
Args:
agent_name: If provided, loads per-agent memory. If None, loads global memory.
Returns:
Formatted memory context string wrapped in XML tags, or empty string if disabled.
"""
try:
from deerflow.agents.memory import format_memory_for_injection, get_memory_data
from deerflow.config.memory_config import get_memory_config
config = get_memory_config()
if not config.enabled or not config.injection_enabled:
return ""
memory_data = get_memory_data(agent_name)
memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)
if not memory_content.strip():
return ""
return f"""<memory>
{memory_content}
</memory>
"""
except Exception as e:
logger.error("Failed to load memory context: %s", e)
return ""
@lru_cache(maxsize=32)
def _get_cached_skills_prompt_section(
skill_signature: tuple[tuple[str, str, str, str], ...],
available_skills_key: tuple[str, ...] | None,
container_base_path: str,
skill_evolution_section: str,
) -> str:
filtered = [(name, description, category, location) for name, description, category, location in skill_signature if available_skills_key is None or name in available_skills_key]
skills_list = ""
if filtered:
skill_items = "\n".join(
f" <skill>\n <name>{name}</name>\n <description>{description} {_skill_mutability_label(category)}</description>\n <location>{location}</location>\n </skill>"
for name, description, category, location in filtered
)
skills_list = f"<available_skills>\n{skill_items}\n</available_skills>"
return f"""<skill_system>
You have access to skills that provide optimized workflows for specific tasks. Each skill contains best practices, frameworks, and references to additional resources.
**Progressive Loading Pattern:**
1. When a user query matches a skill's use case, immediately call `read_file` on the skill's main file using the path attribute provided in the skill tag below
2. Read and understand the skill's workflow and instructions
3. The skill file contains references to external resources under the same folder
4. Load referenced resources only when needed during execution
5. Follow the skill's instructions precisely
**Skills are located at:** {container_base_path}
{skill_evolution_section}
{skills_list}
</skill_system>"""
def get_skills_prompt_section(available_skills: set[str] | None = None) -> str:
"""Generate the skills prompt section with available skills list."""
skills = _get_enabled_skills()
try:
from deerflow.config import get_app_config
config = get_app_config()
container_base_path = config.skills.container_path
skill_evolution_enabled = config.skill_evolution.enabled
except Exception:
container_base_path = "/mnt/skills"
skill_evolution_enabled = False
if not skills and not skill_evolution_enabled:
return ""
if available_skills is not None and not any(skill.name in available_skills for skill in skills):
return ""
skill_signature = tuple((skill.name, skill.description, skill.category, skill.get_container_file_path(container_base_path)) for skill in skills)
available_key = tuple(sorted(available_skills)) if available_skills is not None else None
if not skill_signature and available_key is not None:
return ""
skill_evolution_section = _build_skill_evolution_section(skill_evolution_enabled)
return _get_cached_skills_prompt_section(skill_signature, available_key, container_base_path, skill_evolution_section)
def get_agent_soul(agent_name: str | None) -> str:
# Append SOUL.md (agent personality) if present
soul = load_agent_soul(agent_name)
if soul:
return f"<soul>\n{soul}\n</soul>\n" if soul else ""
return ""
def get_deferred_tools_prompt_section() -> str:
"""Generate <available-deferred-tools> block for the system prompt.
Lists only deferred tool names so the agent knows what exists
and can use tool_search to load them.
Returns empty string when tool_search is disabled or no tools are deferred.
"""
from deerflow.tools.builtins.tool_search import get_deferred_registry
try:
from deerflow.config import get_app_config
if not get_app_config().tool_search.enabled:
return ""
except Exception:
return ""
registry = get_deferred_registry()
if not registry:
return ""
names = "\n".join(e.name for e in registry.entries)
return f"<available-deferred-tools>\n{names}\n</available-deferred-tools>"
def _build_acp_section() -> str:
"""Build the ACP agent prompt section, only if ACP agents are configured."""
try:
from deerflow.config.acp_config import get_acp_agents
agents = get_acp_agents()
if not agents:
return ""
except Exception:
return ""
return (
"\n**ACP Agent Tasks (invoke_acp_agent):**\n"
"- ACP agents (e.g. codex, claude_code) run in their own independent workspace — NOT in `/mnt/user-data/`\n"
"- When writing prompts for ACP agents, describe the task only — do NOT reference `/mnt/user-data` paths\n"
"- ACP agent results are accessible at `/mnt/acp-workspace/` (read-only) — use `ls`, `read_file`, or `bash cp` to retrieve output files\n"
"- To deliver ACP output to the user: copy from `/mnt/acp-workspace/<file>` to `/mnt/user-data/outputs/<file>`, then use `present_file`"
)
def _build_custom_mounts_section() -> str:
"""Build a prompt section for explicitly configured sandbox mounts."""
try:
from deerflow.config import get_app_config
mounts = get_app_config().sandbox.mounts or []
except Exception:
logger.exception("Failed to load configured sandbox mounts for the lead-agent prompt")
return ""
if not mounts:
return ""
lines = []
for mount in mounts:
access = "read-only" if mount.read_only else "read-write"
lines.append(f"- Custom mount: `{mount.container_path}` - Host directory mapped into the sandbox ({access})")
mounts_list = "\n".join(lines)
return f"\n**Custom Mounted Directories:**\n{mounts_list}\n- If the user needs files outside `/mnt/user-data`, use these absolute container paths directly when they match the requested directory"
def apply_prompt_template(subagent_enabled: bool = False, max_concurrent_subagents: int = 3, *, agent_name: str | None = None, available_skills: set[str] | None = None) -> str:
# Get memory context
memory_context = _get_memory_context(agent_name)
# Include subagent section only if enabled (from runtime parameter)
n = max_concurrent_subagents
subagent_section = _build_subagent_section(n) if subagent_enabled else ""
# Add subagent reminder to critical_reminders if enabled
subagent_reminder = (
"- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks. "
f"**HARD LIMIT: max {n} `task` calls per response.** "
f"If >{n} sub-tasks, split into sequential batches of ≤{n}. Synthesize after ALL batches complete.\n"
if subagent_enabled
else ""
)
# Add subagent thinking guidance if enabled
subagent_thinking = (
"- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, COUNT them. "
f"If count > {n}, you MUST plan batches of ≤{n} and only launch the FIRST batch now. "
f"NEVER launch more than {n} `task` calls in one response.**\n"
if subagent_enabled
else ""
)
# Get skills section
skills_section = get_skills_prompt_section(available_skills)
# Get deferred tools section (tool_search)
deferred_tools_section = get_deferred_tools_prompt_section()
# Build ACP agent section only if ACP agents are configured
acp_section = _build_acp_section()
custom_mounts_section = _build_custom_mounts_section()
acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section)
# Format the prompt with dynamic skills and memory
prompt = SYSTEM_PROMPT_TEMPLATE.format(
agent_name=agent_name or "DeerFlow 2.0",
soul=get_agent_soul(agent_name),
skills_section=skills_section,
deferred_tools_section=deferred_tools_section,
memory_context=memory_context,
subagent_section=subagent_section,
subagent_reminder=subagent_reminder,
subagent_thinking=subagent_thinking,
acp_section=acp_and_mounts_section,
)
return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"

View File

@@ -0,0 +1,57 @@
"""Memory module for DeerFlow.
This module provides a global memory mechanism that:
- Stores user context and conversation history in memory.json
- Uses LLM to summarize and extract facts from conversations
- Injects relevant memory into system prompts for personalized responses
"""
from deerflow.agents.memory.prompt import (
FACT_EXTRACTION_PROMPT,
MEMORY_UPDATE_PROMPT,
format_conversation_for_update,
format_memory_for_injection,
)
from deerflow.agents.memory.queue import (
ConversationContext,
MemoryUpdateQueue,
get_memory_queue,
reset_memory_queue,
)
from deerflow.agents.memory.storage import (
FileMemoryStorage,
MemoryStorage,
get_memory_storage,
)
from deerflow.agents.memory.updater import (
MemoryUpdater,
clear_memory_data,
delete_memory_fact,
get_memory_data,
reload_memory_data,
update_memory_from_conversation,
)
__all__ = [
# Prompt utilities
"MEMORY_UPDATE_PROMPT",
"FACT_EXTRACTION_PROMPT",
"format_memory_for_injection",
"format_conversation_for_update",
# Queue
"ConversationContext",
"MemoryUpdateQueue",
"get_memory_queue",
"reset_memory_queue",
# Storage
"MemoryStorage",
"FileMemoryStorage",
"get_memory_storage",
# Updater
"MemoryUpdater",
"clear_memory_data",
"delete_memory_fact",
"get_memory_data",
"reload_memory_data",
"update_memory_from_conversation",
]

View File

@@ -0,0 +1,363 @@
"""Prompt templates for memory update and injection."""
import math
import re
from typing import Any
try:
import tiktoken
TIKTOKEN_AVAILABLE = True
except ImportError:
TIKTOKEN_AVAILABLE = False
# Prompt template for updating memory based on conversation
MEMORY_UPDATE_PROMPT = """You are a memory management system. Your task is to analyze a conversation and update the user's memory profile.
Current Memory State:
<current_memory>
{current_memory}
</current_memory>
New Conversation to Process:
<conversation>
{conversation}
</conversation>
Instructions:
1. Analyze the conversation for important information about the user
2. Extract relevant facts, preferences, and context with specific details (numbers, names, technologies)
3. Update the memory sections as needed following the detailed length guidelines below
Before extracting facts, perform a structured reflection on the conversation:
1. Error/Retry Detection: Did the agent encounter errors, require retries, or produce incorrect results?
If yes, record the root cause and correct approach as a high-confidence fact with category "correction".
2. User Correction Detection: Did the user correct the agent's direction, understanding, or output?
If yes, record the correct interpretation or approach as a high-confidence fact with category "correction".
Include what went wrong in "sourceError" only when category is "correction" and the mistake is explicit in the conversation.
3. Project Constraint Discovery: Were any project-specific constraints discovered during the conversation?
If yes, record them as facts with the most appropriate category and confidence.
{correction_hint}
Memory Section Guidelines:
**User Context** (Current state - concise summaries):
- workContext: Professional role, company, key projects, main technologies (2-3 sentences)
Example: Core contributor, project names with metrics (16k+ stars), technical stack
- personalContext: Languages, communication preferences, key interests (1-2 sentences)
Example: Bilingual capabilities, specific interest areas, expertise domains
- topOfMind: Multiple ongoing focus areas and priorities (3-5 sentences, detailed paragraph)
Example: Primary project work, parallel technical investigations, ongoing learning/tracking
Include: Active implementation work, troubleshooting issues, market/research interests
Note: This captures SEVERAL concurrent focus areas, not just one task
**History** (Temporal context - rich paragraphs):
- recentMonths: Detailed summary of recent activities (4-6 sentences or 1-2 paragraphs)
Timeline: Last 1-3 months of interactions
Include: Technologies explored, projects worked on, problems solved, interests demonstrated
- earlierContext: Important historical patterns (3-5 sentences or 1 paragraph)
Timeline: 3-12 months ago
Include: Past projects, learning journeys, established patterns
- longTermBackground: Persistent background and foundational context (2-4 sentences)
Timeline: Overall/foundational information
Include: Core expertise, longstanding interests, fundamental working style
**Facts Extraction**:
- Extract specific, quantifiable details (e.g., "16k+ GitHub stars", "200+ datasets")
- Include proper nouns (company names, project names, technology names)
- Preserve technical terminology and version numbers
- Categories:
* preference: Tools, styles, approaches user prefers/dislikes
* knowledge: Specific expertise, technologies mastered, domain knowledge
* context: Background facts (job title, projects, locations, languages)
* behavior: Working patterns, communication habits, problem-solving approaches
* goal: Stated objectives, learning targets, project ambitions
* correction: Explicit agent mistakes or user corrections, including the correct approach
- Confidence levels:
* 0.9-1.0: Explicitly stated facts ("I work on X", "My role is Y")
* 0.7-0.8: Strongly implied from actions/discussions
* 0.5-0.6: Inferred patterns (use sparingly, only for clear patterns)
**What Goes Where**:
- workContext: Current job, active projects, primary tech stack
- personalContext: Languages, personality, interests outside direct work tasks
- topOfMind: Multiple ongoing priorities and focus areas user cares about recently (gets updated most frequently)
Should capture 3-5 concurrent themes: main work, side explorations, learning/tracking interests
- recentMonths: Detailed account of recent technical explorations and work
- earlierContext: Patterns from slightly older interactions still relevant
- longTermBackground: Unchanging foundational facts about the user
**Multilingual Content**:
- Preserve original language for proper nouns and company names
- Keep technical terms in their original form (DeepSeek, LangGraph, etc.)
- Note language capabilities in personalContext
Output Format (JSON):
{{
"user": {{
"workContext": {{ "summary": "...", "shouldUpdate": true/false }},
"personalContext": {{ "summary": "...", "shouldUpdate": true/false }},
"topOfMind": {{ "summary": "...", "shouldUpdate": true/false }}
}},
"history": {{
"recentMonths": {{ "summary": "...", "shouldUpdate": true/false }},
"earlierContext": {{ "summary": "...", "shouldUpdate": true/false }},
"longTermBackground": {{ "summary": "...", "shouldUpdate": true/false }}
}},
"newFacts": [
{{ "content": "...", "category": "preference|knowledge|context|behavior|goal|correction", "confidence": 0.0-1.0 }}
],
"factsToRemove": ["fact_id_1", "fact_id_2"]
}}
Important Rules:
- Only set shouldUpdate=true if there's meaningful new information
- Follow length guidelines: workContext/personalContext are concise (1-3 sentences), topOfMind and history sections are detailed (paragraphs)
- Include specific metrics, version numbers, and proper nouns in facts
- Only add facts that are clearly stated (0.9+) or strongly implied (0.7+)
- Use category "correction" for explicit agent mistakes or user corrections; assign confidence >= 0.95 when the correction is explicit
- Include "sourceError" only for explicit correction facts when the prior mistake or wrong approach is clearly stated; omit it otherwise
- Remove facts that are contradicted by new information
- When updating topOfMind, integrate new focus areas while removing completed/abandoned ones
Keep 3-5 concurrent focus themes that are still active and relevant
- For history sections, integrate new information chronologically into appropriate time period
- Preserve technical accuracy - keep exact names of technologies, companies, projects
- Focus on information useful for future interactions and personalization
- IMPORTANT: Do NOT record file upload events in memory. Uploaded files are
session-specific and ephemeral — they will not be accessible in future sessions.
Recording upload events causes confusion in subsequent conversations.
Return ONLY valid JSON, no explanation or markdown."""
# Prompt template for extracting facts from a single message
FACT_EXTRACTION_PROMPT = """Extract factual information about the user from this message.
Message:
{message}
Extract facts in this JSON format:
{{
"facts": [
{{ "content": "...", "category": "preference|knowledge|context|behavior|goal|correction", "confidence": 0.0-1.0 }}
]
}}
Categories:
- preference: User preferences (likes/dislikes, styles, tools)
- knowledge: User's expertise or knowledge areas
- context: Background context (location, job, projects)
- behavior: Behavioral patterns
- goal: User's goals or objectives
- correction: Explicit corrections or mistakes to avoid repeating
Rules:
- Only extract clear, specific facts
- Confidence should reflect certainty (explicit statement = 0.9+, implied = 0.6-0.8)
- Skip vague or temporary information
Return ONLY valid JSON."""
def _count_tokens(text: str, encoding_name: str = "cl100k_base") -> int:
"""Count tokens in text using tiktoken.
Args:
text: The text to count tokens for.
encoding_name: The encoding to use (default: cl100k_base for GPT-4/3.5).
Returns:
The number of tokens in the text.
"""
if not TIKTOKEN_AVAILABLE:
# Fallback to character-based estimation if tiktoken is not available
return len(text) // 4
try:
encoding = tiktoken.get_encoding(encoding_name)
return len(encoding.encode(text))
except Exception:
# Fallback to character-based estimation on error
return len(text) // 4
def _coerce_confidence(value: Any, default: float = 0.0) -> float:
"""Coerce a confidence-like value to a bounded float in [0, 1].
Non-finite values (NaN, inf, -inf) are treated as invalid and fall back
to the default before clamping, preventing them from dominating ranking.
The ``default`` parameter is assumed to be a finite value.
"""
try:
confidence = float(value)
except (TypeError, ValueError):
return max(0.0, min(1.0, default))
if not math.isfinite(confidence):
return max(0.0, min(1.0, default))
return max(0.0, min(1.0, confidence))
def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
"""Format memory data for injection into system prompt.
Args:
memory_data: The memory data dictionary.
max_tokens: Maximum tokens to use (counted via tiktoken for accuracy).
Returns:
Formatted memory string for system prompt injection.
"""
if not memory_data:
return ""
sections = []
# Format user context
user_data = memory_data.get("user", {})
if user_data:
user_sections = []
work_ctx = user_data.get("workContext", {})
if work_ctx.get("summary"):
user_sections.append(f"Work: {work_ctx['summary']}")
personal_ctx = user_data.get("personalContext", {})
if personal_ctx.get("summary"):
user_sections.append(f"Personal: {personal_ctx['summary']}")
top_of_mind = user_data.get("topOfMind", {})
if top_of_mind.get("summary"):
user_sections.append(f"Current Focus: {top_of_mind['summary']}")
if user_sections:
sections.append("User Context:\n" + "\n".join(f"- {s}" for s in user_sections))
# Format history
history_data = memory_data.get("history", {})
if history_data:
history_sections = []
recent = history_data.get("recentMonths", {})
if recent.get("summary"):
history_sections.append(f"Recent: {recent['summary']}")
earlier = history_data.get("earlierContext", {})
if earlier.get("summary"):
history_sections.append(f"Earlier: {earlier['summary']}")
background = history_data.get("longTermBackground", {})
if background.get("summary"):
history_sections.append(f"Background: {background['summary']}")
if history_sections:
sections.append("History:\n" + "\n".join(f"- {s}" for s in history_sections))
# Format facts (sorted by confidence; include as many as token budget allows)
facts_data = memory_data.get("facts", [])
if isinstance(facts_data, list) and facts_data:
ranked_facts = sorted(
(f for f in facts_data if isinstance(f, dict) and isinstance(f.get("content"), str) and f.get("content").strip()),
key=lambda fact: _coerce_confidence(fact.get("confidence"), default=0.0),
reverse=True,
)
# Compute token count for existing sections once, then account
# incrementally for each fact line to avoid full-string re-tokenization.
base_text = "\n\n".join(sections)
base_tokens = _count_tokens(base_text) if base_text else 0
# Account for the separator between existing sections and the facts section.
facts_header = "Facts:\n"
separator_tokens = _count_tokens("\n\n" + facts_header) if base_text else _count_tokens(facts_header)
running_tokens = base_tokens + separator_tokens
fact_lines: list[str] = []
for fact in ranked_facts:
content_value = fact.get("content")
if not isinstance(content_value, str):
continue
content = content_value.strip()
if not content:
continue
category = str(fact.get("category", "context")).strip() or "context"
confidence = _coerce_confidence(fact.get("confidence"), default=0.0)
source_error = fact.get("sourceError")
if category == "correction" and isinstance(source_error, str) and source_error.strip():
line = f"- [{category} | {confidence:.2f}] {content} (avoid: {source_error.strip()})"
else:
line = f"- [{category} | {confidence:.2f}] {content}"
# Each additional line is preceded by a newline (except the first).
line_text = ("\n" + line) if fact_lines else line
line_tokens = _count_tokens(line_text)
if running_tokens + line_tokens <= max_tokens:
fact_lines.append(line)
running_tokens += line_tokens
else:
break
if fact_lines:
sections.append("Facts:\n" + "\n".join(fact_lines))
if not sections:
return ""
result = "\n\n".join(sections)
# Use accurate token counting with tiktoken
token_count = _count_tokens(result)
if token_count > max_tokens:
# Truncate to fit within token limit
# Estimate characters to remove based on token ratio
char_per_token = len(result) / token_count
target_chars = int(max_tokens * char_per_token * 0.95) # 95% to leave margin
result = result[:target_chars] + "\n..."
return result
def format_conversation_for_update(messages: list[Any]) -> str:
"""Format conversation messages for memory update prompt.
Args:
messages: List of conversation messages.
Returns:
Formatted conversation string.
"""
lines = []
for msg in messages:
role = getattr(msg, "type", "unknown")
content = getattr(msg, "content", str(msg))
# Handle content that might be a list (multimodal)
if isinstance(content, list):
text_parts = []
for p in content:
if isinstance(p, str):
text_parts.append(p)
elif isinstance(p, dict):
text_val = p.get("text")
if isinstance(text_val, str):
text_parts.append(text_val)
content = " ".join(text_parts) if text_parts else str(content)
# Strip uploaded_files tags from human messages to avoid persisting
# ephemeral file path info into long-term memory. Skip the turn entirely
# when nothing remains after stripping (upload-only message).
if role == "human":
content = re.sub(r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", "", str(content)).strip()
if not content:
continue
# Truncate very long messages
if len(str(content)) > 1000:
content = str(content)[:1000] + "..."
if role == "human":
lines.append(f"User: {content}")
elif role == "ai":
lines.append(f"Assistant: {content}")
return "\n\n".join(lines)

View File

@@ -0,0 +1,219 @@
"""Memory update queue with debounce mechanism."""
import logging
import threading
import time
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Any
from deerflow.config.memory_config import get_memory_config
logger = logging.getLogger(__name__)
@dataclass
class ConversationContext:
"""Context for a conversation to be processed for memory update."""
thread_id: str
messages: list[Any]
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
agent_name: str | None = None
correction_detected: bool = False
reinforcement_detected: bool = False
class MemoryUpdateQueue:
"""Queue for memory updates with debounce mechanism.
This queue collects conversation contexts and processes them after
a configurable debounce period. Multiple conversations received within
the debounce window are batched together.
"""
def __init__(self):
"""Initialize the memory update queue."""
self._queue: list[ConversationContext] = []
self._lock = threading.Lock()
self._timer: threading.Timer | None = None
self._processing = False
def add(
self,
thread_id: str,
messages: list[Any],
agent_name: str | None = None,
correction_detected: bool = False,
reinforcement_detected: bool = False,
) -> None:
"""Add a conversation to the update queue.
Args:
thread_id: The thread ID.
messages: The conversation messages.
agent_name: If provided, memory is stored per-agent. If None, uses global memory.
correction_detected: Whether recent turns include an explicit correction signal.
reinforcement_detected: Whether recent turns include a positive reinforcement signal.
"""
config = get_memory_config()
if not config.enabled:
return
with self._lock:
existing_context = next(
(context for context in self._queue if context.thread_id == thread_id),
None,
)
merged_correction_detected = correction_detected or (existing_context.correction_detected if existing_context is not None else False)
merged_reinforcement_detected = reinforcement_detected or (existing_context.reinforcement_detected if existing_context is not None else False)
context = ConversationContext(
thread_id=thread_id,
messages=messages,
agent_name=agent_name,
correction_detected=merged_correction_detected,
reinforcement_detected=merged_reinforcement_detected,
)
# Check if this thread already has a pending update
# If so, replace it with the newer one
self._queue = [c for c in self._queue if c.thread_id != thread_id]
self._queue.append(context)
# Reset or start the debounce timer
self._reset_timer()
logger.info("Memory update queued for thread %s, queue size: %d", thread_id, len(self._queue))
def _reset_timer(self) -> None:
"""Reset the debounce timer."""
config = get_memory_config()
# Cancel existing timer if any
if self._timer is not None:
self._timer.cancel()
# Start new timer
self._timer = threading.Timer(
config.debounce_seconds,
self._process_queue,
)
self._timer.daemon = True
self._timer.start()
logger.debug("Memory update timer set for %ss", config.debounce_seconds)
def _process_queue(self) -> None:
"""Process all queued conversation contexts."""
# Import here to avoid circular dependency
from deerflow.agents.memory.updater import MemoryUpdater
with self._lock:
if self._processing:
# Already processing, reschedule
self._reset_timer()
return
if not self._queue:
return
self._processing = True
contexts_to_process = self._queue.copy()
self._queue.clear()
self._timer = None
logger.info("Processing %d queued memory updates", len(contexts_to_process))
try:
updater = MemoryUpdater()
for context in contexts_to_process:
try:
logger.info("Updating memory for thread %s", context.thread_id)
success = updater.update_memory(
messages=context.messages,
thread_id=context.thread_id,
agent_name=context.agent_name,
correction_detected=context.correction_detected,
reinforcement_detected=context.reinforcement_detected,
)
if success:
logger.info("Memory updated successfully for thread %s", context.thread_id)
else:
logger.warning("Memory update skipped/failed for thread %s", context.thread_id)
except Exception as e:
logger.error("Error updating memory for thread %s: %s", context.thread_id, e)
# Small delay between updates to avoid rate limiting
if len(contexts_to_process) > 1:
time.sleep(0.5)
finally:
with self._lock:
self._processing = False
def flush(self) -> None:
"""Force immediate processing of the queue.
This is useful for testing or graceful shutdown.
"""
with self._lock:
if self._timer is not None:
self._timer.cancel()
self._timer = None
self._process_queue()
def clear(self) -> None:
"""Clear the queue without processing.
This is useful for testing.
"""
with self._lock:
if self._timer is not None:
self._timer.cancel()
self._timer = None
self._queue.clear()
self._processing = False
@property
def pending_count(self) -> int:
"""Get the number of pending updates."""
with self._lock:
return len(self._queue)
@property
def is_processing(self) -> bool:
"""Check if the queue is currently being processed."""
with self._lock:
return self._processing
# Global singleton instance
_memory_queue: MemoryUpdateQueue | None = None
_queue_lock = threading.Lock()
def get_memory_queue() -> MemoryUpdateQueue:
"""Get the global memory update queue singleton.
Returns:
The memory update queue instance.
"""
global _memory_queue
with _queue_lock:
if _memory_queue is None:
_memory_queue = MemoryUpdateQueue()
return _memory_queue
def reset_memory_queue() -> None:
"""Reset the global memory queue.
This is useful for testing.
"""
global _memory_queue
with _queue_lock:
if _memory_queue is not None:
_memory_queue.clear()
_memory_queue = None

View File

@@ -0,0 +1,205 @@
"""Memory storage providers."""
import abc
import json
import logging
import threading
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from deerflow.config.agents_config import AGENT_NAME_PATTERN
from deerflow.config.memory_config import get_memory_config
from deerflow.config.paths import get_paths
logger = logging.getLogger(__name__)
def utc_now_iso_z() -> str:
"""Current UTC time as ISO-8601 with ``Z`` suffix (matches prior naive-UTC output)."""
return datetime.now(UTC).isoformat().removesuffix("+00:00") + "Z"
def create_empty_memory() -> dict[str, Any]:
"""Create an empty memory structure."""
return {
"version": "1.0",
"lastUpdated": utc_now_iso_z(),
"user": {
"workContext": {"summary": "", "updatedAt": ""},
"personalContext": {"summary": "", "updatedAt": ""},
"topOfMind": {"summary": "", "updatedAt": ""},
},
"history": {
"recentMonths": {"summary": "", "updatedAt": ""},
"earlierContext": {"summary": "", "updatedAt": ""},
"longTermBackground": {"summary": "", "updatedAt": ""},
},
"facts": [],
}
class MemoryStorage(abc.ABC):
"""Abstract base class for memory storage providers."""
@abc.abstractmethod
def load(self, agent_name: str | None = None) -> dict[str, Any]:
"""Load memory data for the given agent."""
pass
@abc.abstractmethod
def reload(self, agent_name: str | None = None) -> dict[str, Any]:
"""Force reload memory data for the given agent."""
pass
@abc.abstractmethod
def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
"""Save memory data for the given agent."""
pass
class FileMemoryStorage(MemoryStorage):
"""File-based memory storage provider."""
def __init__(self):
"""Initialize the file memory storage."""
# Per-agent memory cache: keyed by agent_name (None = global)
# Value: (memory_data, file_mtime)
self._memory_cache: dict[str | None, tuple[dict[str, Any], float | None]] = {}
def _validate_agent_name(self, agent_name: str) -> None:
"""Validate that the agent name is safe to use in filesystem paths.
Uses the repository's established AGENT_NAME_PATTERN to ensure consistency
across the codebase and prevent path traversal or other problematic characters.
"""
if not agent_name:
raise ValueError("Agent name must be a non-empty string.")
if not AGENT_NAME_PATTERN.match(agent_name):
raise ValueError(f"Invalid agent name {agent_name!r}: names must match {AGENT_NAME_PATTERN.pattern}")
def _get_memory_file_path(self, agent_name: str | None = None) -> Path:
"""Get the path to the memory file."""
if agent_name is not None:
self._validate_agent_name(agent_name)
return get_paths().agent_memory_file(agent_name)
config = get_memory_config()
if config.storage_path:
p = Path(config.storage_path)
return p if p.is_absolute() else get_paths().base_dir / p
return get_paths().memory_file
def _load_memory_from_file(self, agent_name: str | None = None) -> dict[str, Any]:
"""Load memory data from file."""
file_path = self._get_memory_file_path(agent_name)
if not file_path.exists():
return create_empty_memory()
try:
with open(file_path, encoding="utf-8") as f:
data = json.load(f)
return data
except (json.JSONDecodeError, OSError) as e:
logger.warning("Failed to load memory file: %s", e)
return create_empty_memory()
def load(self, agent_name: str | None = None) -> dict[str, Any]:
"""Load memory data (cached with file modification time check)."""
file_path = self._get_memory_file_path(agent_name)
try:
current_mtime = file_path.stat().st_mtime if file_path.exists() else None
except OSError:
current_mtime = None
cached = self._memory_cache.get(agent_name)
if cached is None or cached[1] != current_mtime:
memory_data = self._load_memory_from_file(agent_name)
self._memory_cache[agent_name] = (memory_data, current_mtime)
return memory_data
return cached[0]
def reload(self, agent_name: str | None = None) -> dict[str, Any]:
"""Reload memory data from file, forcing cache invalidation."""
file_path = self._get_memory_file_path(agent_name)
memory_data = self._load_memory_from_file(agent_name)
try:
mtime = file_path.stat().st_mtime if file_path.exists() else None
except OSError:
mtime = None
self._memory_cache[agent_name] = (memory_data, mtime)
return memory_data
def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
"""Save memory data to file and update cache."""
file_path = self._get_memory_file_path(agent_name)
try:
file_path.parent.mkdir(parents=True, exist_ok=True)
memory_data["lastUpdated"] = utc_now_iso_z()
temp_path = file_path.with_suffix(".tmp")
with open(temp_path, "w", encoding="utf-8") as f:
json.dump(memory_data, f, indent=2, ensure_ascii=False)
temp_path.replace(file_path)
try:
mtime = file_path.stat().st_mtime
except OSError:
mtime = None
self._memory_cache[agent_name] = (memory_data, mtime)
logger.info("Memory saved to %s", file_path)
return True
except OSError as e:
logger.error("Failed to save memory file: %s", e)
return False
_storage_instance: MemoryStorage | None = None
_storage_lock = threading.Lock()
def get_memory_storage() -> MemoryStorage:
"""Get the configured memory storage instance."""
global _storage_instance
if _storage_instance is not None:
return _storage_instance
with _storage_lock:
if _storage_instance is not None:
return _storage_instance
config = get_memory_config()
storage_class_path = config.storage_class
try:
module_path, class_name = storage_class_path.rsplit(".", 1)
import importlib
module = importlib.import_module(module_path)
storage_class = getattr(module, class_name)
# Validate that the configured storage is a MemoryStorage implementation
if not isinstance(storage_class, type):
raise TypeError(f"Configured memory storage '{storage_class_path}' is not a class: {storage_class!r}")
if not issubclass(storage_class, MemoryStorage):
raise TypeError(f"Configured memory storage '{storage_class_path}' is not a subclass of MemoryStorage")
_storage_instance = storage_class()
except Exception as e:
logger.error(
"Failed to load memory storage %s, falling back to FileMemoryStorage: %s",
storage_class_path,
e,
)
_storage_instance = FileMemoryStorage()
return _storage_instance

View File

@@ -0,0 +1,472 @@
"""Memory updater for reading, writing, and updating memory data."""
import json
import logging
import math
import re
import uuid
from typing import Any
from deerflow.agents.memory.prompt import (
MEMORY_UPDATE_PROMPT,
format_conversation_for_update,
)
from deerflow.agents.memory.storage import (
create_empty_memory,
get_memory_storage,
utc_now_iso_z,
)
from deerflow.config.memory_config import get_memory_config
from deerflow.models import create_chat_model
logger = logging.getLogger(__name__)
def _create_empty_memory() -> dict[str, Any]:
"""Backward-compatible wrapper around the storage-layer empty-memory factory."""
return create_empty_memory()
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
"""Backward-compatible wrapper around the configured memory storage save path."""
return get_memory_storage().save(memory_data, agent_name)
def get_memory_data(agent_name: str | None = None) -> dict[str, Any]:
"""Get the current memory data via storage provider."""
return get_memory_storage().load(agent_name)
def reload_memory_data(agent_name: str | None = None) -> dict[str, Any]:
"""Reload memory data via storage provider."""
return get_memory_storage().reload(agent_name)
def import_memory_data(memory_data: dict[str, Any], agent_name: str | None = None) -> dict[str, Any]:
"""Persist imported memory data via storage provider.
Args:
memory_data: Full memory payload to persist.
agent_name: If provided, imports into per-agent memory.
Returns:
The saved memory data after storage normalization.
Raises:
OSError: If persisting the imported memory fails.
"""
storage = get_memory_storage()
if not storage.save(memory_data, agent_name):
raise OSError("Failed to save imported memory data")
return storage.load(agent_name)
def clear_memory_data(agent_name: str | None = None) -> dict[str, Any]:
"""Clear all stored memory data and persist an empty structure."""
cleared_memory = create_empty_memory()
if not _save_memory_to_file(cleared_memory, agent_name):
raise OSError("Failed to save cleared memory data")
return cleared_memory
def _validate_confidence(confidence: float) -> float:
"""Validate persisted fact confidence so stored JSON stays standards-compliant."""
if not math.isfinite(confidence) or confidence < 0 or confidence > 1:
raise ValueError("confidence")
return confidence
def create_memory_fact(
content: str,
category: str = "context",
confidence: float = 0.5,
agent_name: str | None = None,
) -> dict[str, Any]:
"""Create a new fact and persist the updated memory data."""
normalized_content = content.strip()
if not normalized_content:
raise ValueError("content")
normalized_category = category.strip() or "context"
validated_confidence = _validate_confidence(confidence)
now = utc_now_iso_z()
memory_data = get_memory_data(agent_name)
updated_memory = dict(memory_data)
facts = list(memory_data.get("facts", []))
facts.append(
{
"id": f"fact_{uuid.uuid4().hex[:8]}",
"content": normalized_content,
"category": normalized_category,
"confidence": validated_confidence,
"createdAt": now,
"source": "manual",
}
)
updated_memory["facts"] = facts
if not _save_memory_to_file(updated_memory, agent_name):
raise OSError("Failed to save memory data after creating fact")
return updated_memory
def delete_memory_fact(fact_id: str, agent_name: str | None = None) -> dict[str, Any]:
"""Delete a fact by its id and persist the updated memory data."""
memory_data = get_memory_data(agent_name)
facts = memory_data.get("facts", [])
updated_facts = [fact for fact in facts if fact.get("id") != fact_id]
if len(updated_facts) == len(facts):
raise KeyError(fact_id)
updated_memory = dict(memory_data)
updated_memory["facts"] = updated_facts
if not _save_memory_to_file(updated_memory, agent_name):
raise OSError(f"Failed to save memory data after deleting fact '{fact_id}'")
return updated_memory
def update_memory_fact(
fact_id: str,
content: str | None = None,
category: str | None = None,
confidence: float | None = None,
agent_name: str | None = None,
) -> dict[str, Any]:
"""Update an existing fact and persist the updated memory data."""
memory_data = get_memory_data(agent_name)
updated_memory = dict(memory_data)
updated_facts: list[dict[str, Any]] = []
found = False
for fact in memory_data.get("facts", []):
if fact.get("id") == fact_id:
found = True
updated_fact = dict(fact)
if content is not None:
normalized_content = content.strip()
if not normalized_content:
raise ValueError("content")
updated_fact["content"] = normalized_content
if category is not None:
updated_fact["category"] = category.strip() or "context"
if confidence is not None:
updated_fact["confidence"] = _validate_confidence(confidence)
updated_facts.append(updated_fact)
else:
updated_facts.append(fact)
if not found:
raise KeyError(fact_id)
updated_memory["facts"] = updated_facts
if not _save_memory_to_file(updated_memory, agent_name):
raise OSError(f"Failed to save memory data after updating fact '{fact_id}'")
return updated_memory
def _extract_text(content: Any) -> str:
"""Extract plain text from LLM response content (str or list of content blocks).
Modern LLMs may return structured content as a list of blocks instead of a
plain string, e.g. [{"type": "text", "text": "..."}]. Using str() on such
content produces Python repr instead of the actual text, breaking JSON
parsing downstream.
String chunks are concatenated without separators to avoid corrupting
chunked JSON/text payloads. Dict-based text blocks are treated as full text
blocks and joined with newlines for readability.
"""
if isinstance(content, str):
return content
if isinstance(content, list):
pieces: list[str] = []
pending_str_parts: list[str] = []
def flush_pending_str_parts() -> None:
if pending_str_parts:
pieces.append("".join(pending_str_parts))
pending_str_parts.clear()
for block in content:
if isinstance(block, str):
pending_str_parts.append(block)
elif isinstance(block, dict):
flush_pending_str_parts()
text_val = block.get("text")
if isinstance(text_val, str):
pieces.append(text_val)
flush_pending_str_parts()
return "\n".join(pieces)
return str(content)
# Matches sentences that describe a file-upload *event* rather than general
# file-related work. Deliberately narrow to avoid removing legitimate facts
# such as "User works with CSV files" or "prefers PDF export".
_UPLOAD_SENTENCE_RE = re.compile(
r"[^.!?]*\b(?:"
r"upload(?:ed|ing)?(?:\s+\w+){0,3}\s+(?:file|files?|document|documents?|attachment|attachments?)"
r"|file\s+upload"
r"|/mnt/user-data/uploads/"
r"|<uploaded_files>"
r")[^.!?]*[.!?]?\s*",
re.IGNORECASE,
)
def _strip_upload_mentions_from_memory(memory_data: dict[str, Any]) -> dict[str, Any]:
"""Remove sentences about file uploads from all memory summaries and facts.
Uploaded files are session-scoped; persisting upload events in long-term
memory causes the agent to search for non-existent files in future sessions.
"""
# Scrub summaries in user/history sections
for section in ("user", "history"):
section_data = memory_data.get(section, {})
for _key, val in section_data.items():
if isinstance(val, dict) and "summary" in val:
cleaned = _UPLOAD_SENTENCE_RE.sub("", val["summary"]).strip()
cleaned = re.sub(r" +", " ", cleaned)
val["summary"] = cleaned
# Also remove any facts that describe upload events
facts = memory_data.get("facts", [])
if facts:
memory_data["facts"] = [f for f in facts if not _UPLOAD_SENTENCE_RE.search(f.get("content", ""))]
return memory_data
def _fact_content_key(content: Any) -> str | None:
if not isinstance(content, str):
return None
stripped = content.strip()
if not stripped:
return None
return stripped.casefold()
class MemoryUpdater:
"""Updates memory using LLM based on conversation context."""
def __init__(self, model_name: str | None = None):
"""Initialize the memory updater.
Args:
model_name: Optional model name to use. If None, uses config or default.
"""
self._model_name = model_name
def _get_model(self):
"""Get the model for memory updates."""
config = get_memory_config()
model_name = self._model_name or config.model_name
return create_chat_model(name=model_name, thinking_enabled=False)
def update_memory(
self,
messages: list[Any],
thread_id: str | None = None,
agent_name: str | None = None,
correction_detected: bool = False,
reinforcement_detected: bool = False,
) -> bool:
"""Update memory based on conversation messages.
Args:
messages: List of conversation messages.
thread_id: Optional thread ID for tracking source.
agent_name: If provided, updates per-agent memory. If None, updates global memory.
correction_detected: Whether recent turns include an explicit correction signal.
reinforcement_detected: Whether recent turns include a positive reinforcement signal.
Returns:
True if update was successful, False otherwise.
"""
config = get_memory_config()
if not config.enabled:
return False
if not messages:
return False
try:
# Get current memory
current_memory = get_memory_data(agent_name)
# Format conversation for prompt
conversation_text = format_conversation_for_update(messages)
if not conversation_text.strip():
return False
# Build prompt
correction_hint = ""
if correction_detected:
correction_hint = (
"IMPORTANT: Explicit correction signals were detected in this conversation. "
"Pay special attention to what the agent got wrong, what the user corrected, "
"and record the correct approach as a fact with category "
'"correction" and confidence >= 0.95 when appropriate.'
)
if reinforcement_detected:
reinforcement_hint = (
"IMPORTANT: Positive reinforcement signals were detected in this conversation. "
"The user explicitly confirmed the agent's approach was correct or helpful. "
"Record the confirmed approach, style, or preference as a fact with category "
'"preference" or "behavior" and confidence >= 0.9 when appropriate.'
)
correction_hint = (correction_hint + "\n" + reinforcement_hint).strip() if correction_hint else reinforcement_hint
prompt = MEMORY_UPDATE_PROMPT.format(
current_memory=json.dumps(current_memory, indent=2),
conversation=conversation_text,
correction_hint=correction_hint,
)
# Call LLM
model = self._get_model()
response = model.invoke(prompt)
response_text = _extract_text(response.content).strip()
# Parse response
# Remove markdown code blocks if present
if response_text.startswith("```"):
lines = response_text.split("\n")
response_text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
update_data = json.loads(response_text)
# Apply updates
updated_memory = self._apply_updates(current_memory, update_data, thread_id)
# Strip file-upload mentions from all summaries before saving.
# Uploaded files are session-scoped and won't exist in future sessions,
# so recording upload events in long-term memory causes the agent to
# try (and fail) to locate those files in subsequent conversations.
updated_memory = _strip_upload_mentions_from_memory(updated_memory)
# Save
return get_memory_storage().save(updated_memory, agent_name)
except json.JSONDecodeError as e:
logger.warning("Failed to parse LLM response for memory update: %s", e)
return False
except Exception as e:
logger.exception("Memory update failed: %s", e)
return False
def _apply_updates(
self,
current_memory: dict[str, Any],
update_data: dict[str, Any],
thread_id: str | None = None,
) -> dict[str, Any]:
"""Apply LLM-generated updates to memory.
Args:
current_memory: Current memory data.
update_data: Updates from LLM.
thread_id: Optional thread ID for tracking.
Returns:
Updated memory data.
"""
config = get_memory_config()
now = utc_now_iso_z()
# Update user sections
user_updates = update_data.get("user", {})
for section in ["workContext", "personalContext", "topOfMind"]:
section_data = user_updates.get(section, {})
if section_data.get("shouldUpdate") and section_data.get("summary"):
current_memory["user"][section] = {
"summary": section_data["summary"],
"updatedAt": now,
}
# Update history sections
history_updates = update_data.get("history", {})
for section in ["recentMonths", "earlierContext", "longTermBackground"]:
section_data = history_updates.get(section, {})
if section_data.get("shouldUpdate") and section_data.get("summary"):
current_memory["history"][section] = {
"summary": section_data["summary"],
"updatedAt": now,
}
# Remove facts
facts_to_remove = set(update_data.get("factsToRemove", []))
if facts_to_remove:
current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]
# Add new facts
existing_fact_keys = {fact_key for fact_key in (_fact_content_key(fact.get("content")) for fact in current_memory.get("facts", [])) if fact_key is not None}
new_facts = update_data.get("newFacts", [])
for fact in new_facts:
confidence = fact.get("confidence", 0.5)
if confidence >= config.fact_confidence_threshold:
raw_content = fact.get("content", "")
if not isinstance(raw_content, str):
continue
normalized_content = raw_content.strip()
fact_key = _fact_content_key(normalized_content)
if fact_key is not None and fact_key in existing_fact_keys:
continue
fact_entry = {
"id": f"fact_{uuid.uuid4().hex[:8]}",
"content": normalized_content,
"category": fact.get("category", "context"),
"confidence": confidence,
"createdAt": now,
"source": thread_id or "unknown",
}
source_error = fact.get("sourceError")
if isinstance(source_error, str):
normalized_source_error = source_error.strip()
if normalized_source_error:
fact_entry["sourceError"] = normalized_source_error
current_memory["facts"].append(fact_entry)
if fact_key is not None:
existing_fact_keys.add(fact_key)
# Enforce max facts limit
if len(current_memory["facts"]) > config.max_facts:
# Sort by confidence and keep top ones
current_memory["facts"] = sorted(
current_memory["facts"],
key=lambda f: f.get("confidence", 0),
reverse=True,
)[: config.max_facts]
return current_memory
def update_memory_from_conversation(
messages: list[Any],
thread_id: str | None = None,
agent_name: str | None = None,
correction_detected: bool = False,
reinforcement_detected: bool = False,
) -> bool:
"""Convenience function to update memory from a conversation.
Args:
messages: List of conversation messages.
thread_id: Optional thread ID.
agent_name: If provided, updates per-agent memory. If None, updates global memory.
correction_detected: Whether recent turns include an explicit correction signal.
reinforcement_detected: Whether recent turns include a positive reinforcement signal.
Returns:
True if successful, False otherwise.
"""
updater = MemoryUpdater()
return updater.update_memory(messages, thread_id, agent_name, correction_detected, reinforcement_detected)

View File

@@ -0,0 +1,191 @@
"""Middleware for intercepting clarification requests and presenting them to the user."""
import json
import logging
from collections.abc import Callable
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import ToolMessage
from langgraph.graph import END
from langgraph.prebuilt.tool_node import ToolCallRequest
from langgraph.types import Command
logger = logging.getLogger(__name__)
class ClarificationMiddlewareState(AgentState):
"""Compatible with the `ThreadState` schema."""
pass
class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
"""Intercepts clarification tool calls and interrupts execution to present questions to the user.
When the model calls the `ask_clarification` tool, this middleware:
1. Intercepts the tool call before execution
2. Extracts the clarification question and metadata
3. Formats a user-friendly message
4. Returns a Command that interrupts execution and presents the question
5. Waits for user response before continuing
This replaces the tool-based approach where clarification continued the conversation flow.
"""
state_schema = ClarificationMiddlewareState
def _is_chinese(self, text: str) -> bool:
"""Check if text contains Chinese characters.
Args:
text: Text to check
Returns:
True if text contains Chinese characters
"""
return any("\u4e00" <= char <= "\u9fff" for char in text)
def _format_clarification_message(self, args: dict) -> str:
"""Format the clarification arguments into a user-friendly message.
Args:
args: The tool call arguments containing clarification details
Returns:
Formatted message string
"""
question = args.get("question", "")
clarification_type = args.get("clarification_type", "missing_info")
context = args.get("context")
options = args.get("options", [])
# Some models (e.g. Qwen3-Max) serialize array parameters as JSON strings
# instead of native arrays. Deserialize and normalize so `options`
# is always a list for the rendering logic below.
if isinstance(options, str):
try:
options = json.loads(options)
except (json.JSONDecodeError, TypeError):
options = [options]
if options is None:
options = []
elif not isinstance(options, list):
options = [options]
# Type-specific icons
type_icons = {
"missing_info": "",
"ambiguous_requirement": "🤔",
"approach_choice": "🔀",
"risk_confirmation": "⚠️",
"suggestion": "💡",
}
icon = type_icons.get(clarification_type, "")
# Build the message naturally
message_parts = []
# Add icon and question together for a more natural flow
if context:
# If there's context, present it first as background
message_parts.append(f"{icon} {context}")
message_parts.append(f"\n{question}")
else:
# Just the question with icon
message_parts.append(f"{icon} {question}")
# Add options in a cleaner format
if options and len(options) > 0:
message_parts.append("") # blank line for spacing
for i, option in enumerate(options, 1):
message_parts.append(f" {i}. {option}")
return "\n".join(message_parts)
def _handle_clarification(self, request: ToolCallRequest) -> Command:
"""Handle clarification request and return command to interrupt execution.
Args:
request: Tool call request
Returns:
Command that interrupts execution with the formatted clarification message
"""
# Extract clarification arguments
args = request.tool_call.get("args", {})
question = args.get("question", "")
logger.info("Intercepted clarification request")
logger.debug("Clarification question: %s", question)
# Format the clarification message
formatted_message = self._format_clarification_message(args)
# Get the tool call ID
tool_call_id = request.tool_call.get("id", "")
# Create a ToolMessage with the formatted question
# This will be added to the message history
tool_message = ToolMessage(
content=formatted_message,
tool_call_id=tool_call_id,
name="ask_clarification",
)
# Return a Command that:
# 1. Adds the formatted tool message
# 2. Interrupts execution by going to __end__
# Note: We don't add an extra AIMessage here - the frontend will detect
# and display ask_clarification tool messages directly
return Command(
update={"messages": [tool_message]},
goto=END,
)
@override
def wrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], ToolMessage | Command],
) -> ToolMessage | Command:
"""Intercept ask_clarification tool calls and interrupt execution (sync version).
Args:
request: Tool call request
handler: Original tool execution handler
Returns:
Command that interrupts execution with the formatted clarification message
"""
# Check if this is an ask_clarification tool call
if request.tool_call.get("name") != "ask_clarification":
# Not a clarification call, execute normally
return handler(request)
return self._handle_clarification(request)
@override
async def awrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], ToolMessage | Command],
) -> ToolMessage | Command:
"""Intercept ask_clarification tool calls and interrupt execution (async version).
Args:
request: Tool call request
handler: Original tool execution handler (async)
Returns:
Command that interrupts execution with the formatted clarification message
"""
# Check if this is an ask_clarification tool call
if request.tool_call.get("name") != "ask_clarification":
# Not a clarification call, execute normally
return await handler(request)
return self._handle_clarification(request)

View File

@@ -0,0 +1,110 @@
"""Middleware to fix dangling tool calls in message history.
A dangling tool call occurs when an AIMessage contains tool_calls but there are
no corresponding ToolMessages in the history (e.g., due to user interruption or
request cancellation). This causes LLM errors due to incomplete message format.
This middleware intercepts the model call to detect and patch such gaps by
inserting synthetic ToolMessages with an error indicator immediately after the
AIMessage that made the tool calls, ensuring correct message ordering.
Note: Uses wrap_model_call instead of before_model to ensure patches are inserted
at the correct positions (immediately after each dangling AIMessage), not appended
to the end of the message list as before_model + add_messages reducer would do.
"""
import logging
from collections.abc import Awaitable, Callable
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain.agents.middleware.types import ModelCallResult, ModelRequest, ModelResponse
from langchain_core.messages import ToolMessage
logger = logging.getLogger(__name__)
class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):
"""Inserts placeholder ToolMessages for dangling tool calls before model invocation.
Scans the message history for AIMessages whose tool_calls lack corresponding
ToolMessages, and injects synthetic error responses immediately after the
offending AIMessage so the LLM receives a well-formed conversation.
"""
def _build_patched_messages(self, messages: list) -> list | None:
"""Return a new message list with patches inserted at the correct positions.
For each AIMessage with dangling tool_calls (no corresponding ToolMessage),
a synthetic ToolMessage is inserted immediately after that AIMessage.
Returns None if no patches are needed.
"""
# Collect IDs of all existing ToolMessages
existing_tool_msg_ids: set[str] = set()
for msg in messages:
if isinstance(msg, ToolMessage):
existing_tool_msg_ids.add(msg.tool_call_id)
# Check if any patching is needed
needs_patch = False
for msg in messages:
if getattr(msg, "type", None) != "ai":
continue
for tc in getattr(msg, "tool_calls", None) or []:
tc_id = tc.get("id")
if tc_id and tc_id not in existing_tool_msg_ids:
needs_patch = True
break
if needs_patch:
break
if not needs_patch:
return None
# Build new list with patches inserted right after each dangling AIMessage
patched: list = []
patched_ids: set[str] = set()
patch_count = 0
for msg in messages:
patched.append(msg)
if getattr(msg, "type", None) != "ai":
continue
for tc in getattr(msg, "tool_calls", None) or []:
tc_id = tc.get("id")
if tc_id and tc_id not in existing_tool_msg_ids and tc_id not in patched_ids:
patched.append(
ToolMessage(
content="[Tool call was interrupted and did not return a result.]",
tool_call_id=tc_id,
name=tc.get("name", "unknown"),
status="error",
)
)
patched_ids.add(tc_id)
patch_count += 1
logger.warning(f"Injecting {patch_count} placeholder ToolMessage(s) for dangling tool calls")
return patched
@override
def wrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], ModelResponse],
) -> ModelCallResult:
patched = self._build_patched_messages(request.messages)
if patched is not None:
request = request.override(messages=patched)
return handler(request)
@override
async def awrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
) -> ModelCallResult:
patched = self._build_patched_messages(request.messages)
if patched is not None:
request = request.override(messages=patched)
return await handler(request)

View File

@@ -0,0 +1,60 @@
"""Middleware to filter deferred tool schemas from model binding.
When tool_search is enabled, MCP tools are registered in the DeferredToolRegistry
and passed to ToolNode for execution, but their schemas should NOT be sent to the
LLM via bind_tools (that's the whole point of deferral — saving context tokens).
This middleware intercepts wrap_model_call and removes deferred tools from
request.tools so that model.bind_tools only receives active tool schemas.
The agent discovers deferred tools at runtime via the tool_search tool.
"""
import logging
from collections.abc import Awaitable, Callable
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain.agents.middleware.types import ModelCallResult, ModelRequest, ModelResponse
logger = logging.getLogger(__name__)
class DeferredToolFilterMiddleware(AgentMiddleware[AgentState]):
"""Remove deferred tools from request.tools before model binding.
ToolNode still holds all tools (including deferred) for execution routing,
but the LLM only sees active tool schemas — deferred tools are discoverable
via tool_search at runtime.
"""
def _filter_tools(self, request: ModelRequest) -> ModelRequest:
from deerflow.tools.builtins.tool_search import get_deferred_registry
registry = get_deferred_registry()
if not registry:
return request
deferred_names = {e.name for e in registry.entries}
active_tools = [t for t in request.tools if getattr(t, "name", None) not in deferred_names]
if len(active_tools) < len(request.tools):
logger.debug(f"Filtered {len(request.tools) - len(active_tools)} deferred tool schema(s) from model binding")
return request.override(tools=active_tools)
@override
def wrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], ModelResponse],
) -> ModelCallResult:
return handler(self._filter_tools(request))
@override
async def awrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
) -> ModelCallResult:
return await handler(self._filter_tools(request))

View File

@@ -0,0 +1,275 @@
"""LLM error handling middleware with retry/backoff and user-facing fallbacks."""
from __future__ import annotations
import asyncio
import logging
import time
from collections.abc import Awaitable, Callable
from email.utils import parsedate_to_datetime
from typing import Any, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain.agents.middleware.types import (
ModelCallResult,
ModelRequest,
ModelResponse,
)
from langchain_core.messages import AIMessage
from langgraph.errors import GraphBubbleUp
logger = logging.getLogger(__name__)
_RETRIABLE_STATUS_CODES = {408, 409, 425, 429, 500, 502, 503, 504}
_BUSY_PATTERNS = (
"server busy",
"temporarily unavailable",
"try again later",
"please retry",
"please try again",
"overloaded",
"high demand",
"rate limit",
"负载较高",
"服务繁忙",
"稍后重试",
"请稍后重试",
)
_QUOTA_PATTERNS = (
"insufficient_quota",
"quota",
"billing",
"credit",
"payment",
"余额不足",
"超出限额",
"额度不足",
"欠费",
)
_AUTH_PATTERNS = (
"authentication",
"unauthorized",
"invalid api key",
"invalid_api_key",
"permission",
"forbidden",
"access denied",
"无权",
"未授权",
)
class LLMErrorHandlingMiddleware(AgentMiddleware[AgentState]):
"""Retry transient LLM errors and surface graceful assistant messages."""
retry_max_attempts: int = 3
retry_base_delay_ms: int = 1000
retry_cap_delay_ms: int = 8000
def _classify_error(self, exc: BaseException) -> tuple[bool, str]:
detail = _extract_error_detail(exc)
lowered = detail.lower()
error_code = _extract_error_code(exc)
status_code = _extract_status_code(exc)
if _matches_any(lowered, _QUOTA_PATTERNS) or _matches_any(str(error_code).lower(), _QUOTA_PATTERNS):
return False, "quota"
if _matches_any(lowered, _AUTH_PATTERNS):
return False, "auth"
exc_name = exc.__class__.__name__
if exc_name in {
"APITimeoutError",
"APIConnectionError",
"InternalServerError",
}:
return True, "transient"
if status_code in _RETRIABLE_STATUS_CODES:
return True, "transient"
if _matches_any(lowered, _BUSY_PATTERNS):
return True, "busy"
return False, "generic"
def _build_retry_delay_ms(self, attempt: int, exc: BaseException) -> int:
retry_after = _extract_retry_after_ms(exc)
if retry_after is not None:
return retry_after
backoff = self.retry_base_delay_ms * (2 ** max(0, attempt - 1))
return min(backoff, self.retry_cap_delay_ms)
def _build_retry_message(self, attempt: int, wait_ms: int, reason: str) -> str:
seconds = max(1, round(wait_ms / 1000))
reason_text = "provider is busy" if reason == "busy" else "provider request failed temporarily"
return f"LLM request retry {attempt}/{self.retry_max_attempts}: {reason_text}. Retrying in {seconds}s."
def _build_user_message(self, exc: BaseException, reason: str) -> str:
detail = _extract_error_detail(exc)
if reason == "quota":
return "The configured LLM provider rejected the request because the account is out of quota, billing is unavailable, or usage is restricted. Please fix the provider account and try again."
if reason == "auth":
return "The configured LLM provider rejected the request because authentication or access is invalid. Please check the provider credentials and try again."
if reason in {"busy", "transient"}:
return "The configured LLM provider is temporarily unavailable after multiple retries. Please wait a moment and continue the conversation."
return f"LLM request failed: {detail}"
def _emit_retry_event(self, attempt: int, wait_ms: int, reason: str) -> None:
try:
from langgraph.config import get_stream_writer
writer = get_stream_writer()
writer(
{
"type": "llm_retry",
"attempt": attempt,
"max_attempts": self.retry_max_attempts,
"wait_ms": wait_ms,
"reason": reason,
"message": self._build_retry_message(attempt, wait_ms, reason),
}
)
except Exception:
logger.debug("Failed to emit llm_retry event", exc_info=True)
@override
def wrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], ModelResponse],
) -> ModelCallResult:
attempt = 1
while True:
try:
return handler(request)
except GraphBubbleUp:
# Preserve LangGraph control-flow signals (interrupt/pause/resume).
raise
except Exception as exc:
retriable, reason = self._classify_error(exc)
if retriable and attempt < self.retry_max_attempts:
wait_ms = self._build_retry_delay_ms(attempt, exc)
logger.warning(
"Transient LLM error on attempt %d/%d; retrying in %dms: %s",
attempt,
self.retry_max_attempts,
wait_ms,
_extract_error_detail(exc),
)
self._emit_retry_event(attempt, wait_ms, reason)
time.sleep(wait_ms / 1000)
attempt += 1
continue
logger.warning(
"LLM call failed after %d attempt(s): %s",
attempt,
_extract_error_detail(exc),
exc_info=exc,
)
return AIMessage(content=self._build_user_message(exc, reason))
@override
async def awrap_model_call(
self,
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
) -> ModelCallResult:
attempt = 1
while True:
try:
return await handler(request)
except GraphBubbleUp:
# Preserve LangGraph control-flow signals (interrupt/pause/resume).
raise
except Exception as exc:
retriable, reason = self._classify_error(exc)
if retriable and attempt < self.retry_max_attempts:
wait_ms = self._build_retry_delay_ms(attempt, exc)
logger.warning(
"Transient LLM error on attempt %d/%d; retrying in %dms: %s",
attempt,
self.retry_max_attempts,
wait_ms,
_extract_error_detail(exc),
)
self._emit_retry_event(attempt, wait_ms, reason)
await asyncio.sleep(wait_ms / 1000)
attempt += 1
continue
logger.warning(
"LLM call failed after %d attempt(s): %s",
attempt,
_extract_error_detail(exc),
exc_info=exc,
)
return AIMessage(content=self._build_user_message(exc, reason))
def _matches_any(detail: str, patterns: tuple[str, ...]) -> bool:
return any(pattern in detail for pattern in patterns)
def _extract_error_code(exc: BaseException) -> Any:
for attr in ("code", "error_code"):
value = getattr(exc, attr, None)
if value not in (None, ""):
return value
body = getattr(exc, "body", None)
if isinstance(body, dict):
error = body.get("error")
if isinstance(error, dict):
for key in ("code", "type"):
value = error.get(key)
if value not in (None, ""):
return value
return None
def _extract_status_code(exc: BaseException) -> int | None:
for attr in ("status_code", "status"):
value = getattr(exc, attr, None)
if isinstance(value, int):
return value
response = getattr(exc, "response", None)
status = getattr(response, "status_code", None)
return status if isinstance(status, int) else None
def _extract_retry_after_ms(exc: BaseException) -> int | None:
response = getattr(exc, "response", None)
headers = getattr(response, "headers", None)
if headers is None:
return None
raw = None
header_name = ""
for key in ("retry-after-ms", "Retry-After-Ms", "retry-after", "Retry-After"):
header_name = key
if hasattr(headers, "get"):
raw = headers.get(key)
if raw:
break
if not raw:
return None
try:
multiplier = 1 if "ms" in header_name.lower() else 1000
return max(0, int(float(raw) * multiplier))
except (TypeError, ValueError):
try:
target = parsedate_to_datetime(str(raw))
delta = target.timestamp() - time.time()
return max(0, int(delta * 1000))
except (TypeError, ValueError, OverflowError):
return None
def _extract_error_detail(exc: BaseException) -> str:
detail = str(exc).strip()
if detail:
return detail
message = getattr(exc, "message", None)
if isinstance(message, str) and message.strip():
return message.strip()
return exc.__class__.__name__

View File

@@ -0,0 +1,372 @@
"""Middleware to detect and break repetitive tool call loops.
P0 safety: prevents the agent from calling the same tool with the same
arguments indefinitely until the recursion limit kills the run.
Detection strategy:
1. After each model response, hash the tool calls (name + args).
2. Track recent hashes in a sliding window.
3. If the same hash appears >= warn_threshold times, inject a
"you are repeating yourself — wrap up" system message (once per hash).
4. If it appears >= hard_limit times, strip all tool_calls from the
response so the agent is forced to produce a final text answer.
"""
import hashlib
import json
import logging
import threading
from collections import OrderedDict, defaultdict
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import HumanMessage
from langgraph.runtime import Runtime
logger = logging.getLogger(__name__)
# Defaults — can be overridden via constructor
_DEFAULT_WARN_THRESHOLD = 3 # inject warning after 3 identical calls
_DEFAULT_HARD_LIMIT = 5 # force-stop after 5 identical calls
_DEFAULT_WINDOW_SIZE = 20 # track last N tool calls
_DEFAULT_MAX_TRACKED_THREADS = 100 # LRU eviction limit
_DEFAULT_TOOL_FREQ_WARN = 30 # warn after 30 calls to the same tool type
_DEFAULT_TOOL_FREQ_HARD_LIMIT = 50 # force-stop after 50 calls to the same tool type
def _normalize_tool_call_args(raw_args: object) -> tuple[dict, str | None]:
"""Normalize tool call args to a dict plus an optional fallback key.
Some providers serialize ``args`` as a JSON string instead of a dict.
We defensively parse those cases so loop detection does not crash while
still preserving a stable fallback key for non-dict payloads.
"""
if isinstance(raw_args, dict):
return raw_args, None
if isinstance(raw_args, str):
try:
parsed = json.loads(raw_args)
except (TypeError, ValueError, json.JSONDecodeError):
return {}, raw_args
if isinstance(parsed, dict):
return parsed, None
return {}, json.dumps(parsed, sort_keys=True, default=str)
if raw_args is None:
return {}, None
return {}, json.dumps(raw_args, sort_keys=True, default=str)
def _stable_tool_key(name: str, args: dict, fallback_key: str | None) -> str:
"""Derive a stable key from salient args without overfitting to noise."""
if name == "read_file" and fallback_key is None:
path = args.get("path") or ""
start_line = args.get("start_line")
end_line = args.get("end_line")
bucket_size = 200
try:
start_line = int(start_line) if start_line is not None else 1
except (TypeError, ValueError):
start_line = 1
try:
end_line = int(end_line) if end_line is not None else start_line
except (TypeError, ValueError):
end_line = start_line
start_line, end_line = sorted((start_line, end_line))
bucket_start = max(start_line, 1)
bucket_end = max(end_line, 1)
bucket_start = (bucket_start - 1) // bucket_size
bucket_end = (bucket_end - 1) // bucket_size
return f"{path}:{bucket_start}-{bucket_end}"
# write_file / str_replace are content-sensitive: same path may be updated
# with different payloads during iteration. Using only salient fields (path)
# can collapse distinct calls, so we hash full args to reduce false positives.
if name in {"write_file", "str_replace"}:
if fallback_key is not None:
return fallback_key
return json.dumps(args, sort_keys=True, default=str)
salient_fields = ("path", "url", "query", "command", "pattern", "glob", "cmd")
stable_args = {field: args[field] for field in salient_fields if args.get(field) is not None}
if stable_args:
return json.dumps(stable_args, sort_keys=True, default=str)
if fallback_key is not None:
return fallback_key
return json.dumps(args, sort_keys=True, default=str)
def _hash_tool_calls(tool_calls: list[dict]) -> str:
"""Deterministic hash of a set of tool calls (name + stable key).
This is intended to be order-independent: the same multiset of tool calls
should always produce the same hash, regardless of their input order.
"""
# Normalize each tool call to a stable (name, key) structure.
normalized: list[str] = []
for tc in tool_calls:
name = tc.get("name", "")
args, fallback_key = _normalize_tool_call_args(tc.get("args", {}))
key = _stable_tool_key(name, args, fallback_key)
normalized.append(f"{name}:{key}")
# Sort so permutations of the same multiset of calls yield the same ordering.
normalized.sort()
blob = json.dumps(normalized, sort_keys=True, default=str)
return hashlib.md5(blob.encode()).hexdigest()[:12]
_WARNING_MSG = "[LOOP DETECTED] You are repeating the same tool calls. Stop calling tools and produce your final answer now. If you cannot complete the task, summarize what you accomplished so far."
_TOOL_FREQ_WARNING_MSG = (
"[LOOP DETECTED] You have called {tool_name} {count} times without producing a final answer. Stop calling tools and produce your final answer now. If you cannot complete the task, summarize what you accomplished so far."
)
_HARD_STOP_MSG = "[FORCED STOP] Repeated tool calls exceeded the safety limit. Producing final answer with results collected so far."
_TOOL_FREQ_HARD_STOP_MSG = "[FORCED STOP] Tool {tool_name} called {count} times — exceeded the per-tool safety limit. Producing final answer with results collected so far."
class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
"""Detects and breaks repetitive tool call loops.
Args:
warn_threshold: Number of identical tool call sets before injecting
a warning message. Default: 3.
hard_limit: Number of identical tool call sets before stripping
tool_calls entirely. Default: 5.
window_size: Size of the sliding window for tracking calls.
Default: 20.
max_tracked_threads: Maximum number of threads to track before
evicting the least recently used. Default: 100.
tool_freq_warn: Number of calls to the same tool *type* (regardless
of arguments) before injecting a frequency warning. Catches
cross-file read loops that hash-based detection misses.
Default: 30.
tool_freq_hard_limit: Number of calls to the same tool type before
forcing a stop. Default: 50.
"""
def __init__(
self,
warn_threshold: int = _DEFAULT_WARN_THRESHOLD,
hard_limit: int = _DEFAULT_HARD_LIMIT,
window_size: int = _DEFAULT_WINDOW_SIZE,
max_tracked_threads: int = _DEFAULT_MAX_TRACKED_THREADS,
tool_freq_warn: int = _DEFAULT_TOOL_FREQ_WARN,
tool_freq_hard_limit: int = _DEFAULT_TOOL_FREQ_HARD_LIMIT,
):
super().__init__()
self.warn_threshold = warn_threshold
self.hard_limit = hard_limit
self.window_size = window_size
self.max_tracked_threads = max_tracked_threads
self.tool_freq_warn = tool_freq_warn
self.tool_freq_hard_limit = tool_freq_hard_limit
self._lock = threading.Lock()
# Per-thread tracking using OrderedDict for LRU eviction
self._history: OrderedDict[str, list[str]] = OrderedDict()
self._warned: dict[str, set[str]] = defaultdict(set)
# Per-thread, per-tool-type cumulative call counts
self._tool_freq: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
self._tool_freq_warned: dict[str, set[str]] = defaultdict(set)
def _get_thread_id(self, runtime: Runtime) -> str:
"""Extract thread_id from runtime context for per-thread tracking."""
thread_id = runtime.context.get("thread_id") if runtime.context else None
if thread_id:
return thread_id
return "default"
def _evict_if_needed(self) -> None:
"""Evict least recently used threads if over the limit.
Must be called while holding self._lock.
"""
while len(self._history) > self.max_tracked_threads:
evicted_id, _ = self._history.popitem(last=False)
self._warned.pop(evicted_id, None)
self._tool_freq.pop(evicted_id, None)
self._tool_freq_warned.pop(evicted_id, None)
logger.debug("Evicted loop tracking for thread %s (LRU)", evicted_id)
def _track_and_check(self, state: AgentState, runtime: Runtime) -> tuple[str | None, bool]:
"""Track tool calls and check for loops.
Two detection layers:
1. **Hash-based** (existing): catches identical tool call sets.
2. **Frequency-based** (new): catches the same *tool type* being
called many times with varying arguments (e.g. ``read_file``
on 40 different files).
Returns:
(warning_message_or_none, should_hard_stop)
"""
messages = state.get("messages", [])
if not messages:
return None, False
last_msg = messages[-1]
if getattr(last_msg, "type", None) != "ai":
return None, False
tool_calls = getattr(last_msg, "tool_calls", None)
if not tool_calls:
return None, False
thread_id = self._get_thread_id(runtime)
call_hash = _hash_tool_calls(tool_calls)
with self._lock:
# Touch / create entry (move to end for LRU)
if thread_id in self._history:
self._history.move_to_end(thread_id)
else:
self._history[thread_id] = []
self._evict_if_needed()
history = self._history[thread_id]
history.append(call_hash)
if len(history) > self.window_size:
history[:] = history[-self.window_size :]
count = history.count(call_hash)
tool_names = [tc.get("name", "?") for tc in tool_calls]
# --- Layer 1: hash-based (identical call sets) ---
if count >= self.hard_limit:
logger.error(
"Loop hard limit reached — forcing stop",
extra={
"thread_id": thread_id,
"call_hash": call_hash,
"count": count,
"tools": tool_names,
},
)
return _HARD_STOP_MSG, True
if count >= self.warn_threshold:
warned = self._warned[thread_id]
if call_hash not in warned:
warned.add(call_hash)
logger.warning(
"Repetitive tool calls detected — injecting warning",
extra={
"thread_id": thread_id,
"call_hash": call_hash,
"count": count,
"tools": tool_names,
},
)
return _WARNING_MSG, False
# --- Layer 2: per-tool-type frequency ---
freq = self._tool_freq[thread_id]
for tc in tool_calls:
name = tc.get("name", "")
if not name:
continue
freq[name] += 1
tc_count = freq[name]
if tc_count >= self.tool_freq_hard_limit:
logger.error(
"Tool frequency hard limit reached — forcing stop",
extra={
"thread_id": thread_id,
"tool_name": name,
"count": tc_count,
},
)
return _TOOL_FREQ_HARD_STOP_MSG.format(tool_name=name, count=tc_count), True
if tc_count >= self.tool_freq_warn:
warned = self._tool_freq_warned[thread_id]
if name not in warned:
warned.add(name)
logger.warning(
"Tool frequency warning — too many calls to same tool type",
extra={
"thread_id": thread_id,
"tool_name": name,
"count": tc_count,
},
)
return _TOOL_FREQ_WARNING_MSG.format(tool_name=name, count=tc_count), False
return None, False
@staticmethod
def _append_text(content: str | list | None, text: str) -> str | list:
"""Append *text* to AIMessage content, handling str, list, and None.
When content is a list of content blocks (e.g. Anthropic thinking mode),
we append a new ``{"type": "text", ...}`` block instead of concatenating
a string to a list, which would raise ``TypeError``.
"""
if content is None:
return text
if isinstance(content, list):
return [*content, {"type": "text", "text": f"\n\n{text}"}]
if isinstance(content, str):
return content + f"\n\n{text}"
# Fallback: coerce unexpected types to str to avoid TypeError
return str(content) + f"\n\n{text}"
def _apply(self, state: AgentState, runtime: Runtime) -> dict | None:
warning, hard_stop = self._track_and_check(state, runtime)
if hard_stop:
# Strip tool_calls from the last AIMessage to force text output
messages = state.get("messages", [])
last_msg = messages[-1]
stripped_msg = last_msg.model_copy(
update={
"tool_calls": [],
"content": self._append_text(last_msg.content, warning),
}
)
return {"messages": [stripped_msg]}
if warning:
# Inject as HumanMessage instead of SystemMessage to avoid
# Anthropic's "multiple non-consecutive system messages" error.
# Anthropic models require system messages only at the start of
# the conversation; injecting one mid-conversation crashes
# langchain_anthropic's _format_messages(). HumanMessage works
# with all providers. See #1299.
return {"messages": [HumanMessage(content=warning)]}
return None
@override
def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
return self._apply(state, runtime)
@override
async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
return self._apply(state, runtime)
def reset(self, thread_id: str | None = None) -> None:
"""Clear tracking state. If thread_id given, clear only that thread."""
with self._lock:
if thread_id:
self._history.pop(thread_id, None)
self._warned.pop(thread_id, None)
self._tool_freq.pop(thread_id, None)
self._tool_freq_warned.pop(thread_id, None)
else:
self._history.clear()
self._warned.clear()
self._tool_freq.clear()
self._tool_freq_warned.clear()

View File

@@ -0,0 +1,248 @@
"""Middleware for memory mechanism."""
import logging
import re
from typing import Any, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langgraph.config import get_config
from langgraph.runtime import Runtime
from deerflow.agents.memory.queue import get_memory_queue
from deerflow.config.memory_config import get_memory_config
logger = logging.getLogger(__name__)
_UPLOAD_BLOCK_RE = re.compile(r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", re.IGNORECASE)
_CORRECTION_PATTERNS = (
re.compile(r"\bthat(?:'s| is) (?:wrong|incorrect)\b", re.IGNORECASE),
re.compile(r"\byou misunderstood\b", re.IGNORECASE),
re.compile(r"\btry again\b", re.IGNORECASE),
re.compile(r"\bredo\b", re.IGNORECASE),
re.compile(r"不对"),
re.compile(r"你理解错了"),
re.compile(r"你理解有误"),
re.compile(r"重试"),
re.compile(r"重新来"),
re.compile(r"换一种"),
re.compile(r"改用"),
)
_REINFORCEMENT_PATTERNS = (
re.compile(r"\byes[,.]?\s+(?:exactly|perfect|that(?:'s| is) (?:right|correct|it))\b", re.IGNORECASE),
re.compile(r"\bperfect(?:[.!?]|$)", re.IGNORECASE),
re.compile(r"\bexactly\s+(?:right|correct)\b", re.IGNORECASE),
re.compile(r"\bthat(?:'s| is)\s+(?:exactly\s+)?(?:right|correct|what i (?:wanted|needed|meant))\b", re.IGNORECASE),
re.compile(r"\bkeep\s+(?:doing\s+)?that\b", re.IGNORECASE),
re.compile(r"\bjust\s+(?:like\s+)?(?:that|this)\b", re.IGNORECASE),
re.compile(r"\bthis is (?:great|helpful)\b(?:[.!?]|$)", re.IGNORECASE),
re.compile(r"\bthis is what i wanted\b(?:[.!?]|$)", re.IGNORECASE),
re.compile(r"对[,]?\s*就是这样(?:[。!?!?.]|$)"),
re.compile(r"完全正确(?:[。!?!?.]|$)"),
re.compile(r"(?:对[,]?\s*)?就是这个意思(?:[。!?!?.]|$)"),
re.compile(r"正是我想要的(?:[。!?!?.]|$)"),
re.compile(r"继续保持(?:[。!?!?.]|$)"),
)
class MemoryMiddlewareState(AgentState):
"""Compatible with the `ThreadState` schema."""
pass
def _extract_message_text(message: Any) -> str:
"""Extract plain text from message content for filtering and signal detection."""
content = getattr(message, "content", "")
if isinstance(content, list):
text_parts: list[str] = []
for part in content:
if isinstance(part, str):
text_parts.append(part)
elif isinstance(part, dict):
text_val = part.get("text")
if isinstance(text_val, str):
text_parts.append(text_val)
return " ".join(text_parts)
return str(content)
def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
"""Filter messages to keep only user inputs and final assistant responses.
This filters out:
- Tool messages (intermediate tool call results)
- AI messages with tool_calls (intermediate steps, not final responses)
- The <uploaded_files> block injected by UploadsMiddleware into human messages
(file paths are session-scoped and must not persist in long-term memory).
The user's actual question is preserved; only turns whose content is entirely
the upload block (nothing remains after stripping) are dropped along with
their paired assistant response.
Only keeps:
- Human messages (with the ephemeral upload block removed)
- AI messages without tool_calls (final assistant responses), unless the
paired human turn was upload-only and had no real user text.
Args:
messages: List of all conversation messages.
Returns:
Filtered list containing only user inputs and final assistant responses.
"""
filtered = []
skip_next_ai = False
for msg in messages:
msg_type = getattr(msg, "type", None)
if msg_type == "human":
content_str = _extract_message_text(msg)
if "<uploaded_files>" in content_str:
# Strip the ephemeral upload block; keep the user's real question.
stripped = _UPLOAD_BLOCK_RE.sub("", content_str).strip()
if not stripped:
# Nothing left — the entire turn was upload bookkeeping;
# skip it and the paired assistant response.
skip_next_ai = True
continue
# Rebuild the message with cleaned content so the user's question
# is still available for memory summarisation.
from copy import copy
clean_msg = copy(msg)
clean_msg.content = stripped
filtered.append(clean_msg)
skip_next_ai = False
else:
filtered.append(msg)
skip_next_ai = False
elif msg_type == "ai":
tool_calls = getattr(msg, "tool_calls", None)
if not tool_calls:
if skip_next_ai:
skip_next_ai = False
continue
filtered.append(msg)
# Skip tool messages and AI messages with tool_calls
return filtered
def detect_correction(messages: list[Any]) -> bool:
"""Detect explicit user corrections in recent conversation turns.
The queue keeps only one pending context per thread, so callers pass the
latest filtered message list. Checking only recent user turns keeps signal
detection conservative while avoiding stale corrections from long histories.
"""
recent_user_msgs = [msg for msg in messages[-6:] if getattr(msg, "type", None) == "human"]
for msg in recent_user_msgs:
content = _extract_message_text(msg).strip()
if not content:
continue
if any(pattern.search(content) for pattern in _CORRECTION_PATTERNS):
return True
return False
def detect_reinforcement(messages: list[Any]) -> bool:
"""Detect explicit positive reinforcement signals in recent conversation turns.
Complements detect_correction() by identifying when the user confirms the
agent's approach was correct. This allows the memory system to record what
worked well, not just what went wrong.
The queue keeps only one pending context per thread, so callers pass the
latest filtered message list. Checking only recent user turns keeps signal
detection conservative while avoiding stale signals from long histories.
"""
recent_user_msgs = [msg for msg in messages[-6:] if getattr(msg, "type", None) == "human"]
for msg in recent_user_msgs:
content = _extract_message_text(msg).strip()
if not content:
continue
if any(pattern.search(content) for pattern in _REINFORCEMENT_PATTERNS):
return True
return False
class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
"""Middleware that queues conversation for memory update after agent execution.
This middleware:
1. After each agent execution, queues the conversation for memory update
2. Only includes user inputs and final assistant responses (ignores tool calls)
3. The queue uses debouncing to batch multiple updates together
4. Memory is updated asynchronously via LLM summarization
"""
state_schema = MemoryMiddlewareState
def __init__(self, agent_name: str | None = None):
"""Initialize the MemoryMiddleware.
Args:
agent_name: If provided, memory is stored per-agent. If None, uses global memory.
"""
super().__init__()
self._agent_name = agent_name
@override
def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime) -> dict | None:
"""Queue conversation for memory update after agent completes.
Args:
state: The current agent state.
runtime: The runtime context.
Returns:
None (no state changes needed from this middleware).
"""
config = get_memory_config()
if not config.enabled:
return None
# Get thread ID from runtime context first, then fall back to LangGraph's configurable metadata
thread_id = runtime.context.get("thread_id") if runtime.context else None
if thread_id is None:
config_data = get_config()
thread_id = config_data.get("configurable", {}).get("thread_id")
if not thread_id:
logger.debug("No thread_id in context, skipping memory update")
return None
# Get messages from state
messages = state.get("messages", [])
if not messages:
logger.debug("No messages in state, skipping memory update")
return None
# Filter to only keep user inputs and final assistant responses
filtered_messages = _filter_messages_for_memory(messages)
# Only queue if there's meaningful conversation
# At minimum need one user message and one assistant response
user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]
assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]
if not user_messages or not assistant_messages:
return None
# Queue the filtered conversation for memory update
correction_detected = detect_correction(filtered_messages)
reinforcement_detected = not correction_detected and detect_reinforcement(filtered_messages)
queue = get_memory_queue()
queue.add(
thread_id=thread_id,
messages=filtered_messages,
agent_name=self._agent_name,
correction_detected=correction_detected,
reinforcement_detected=reinforcement_detected,
)
return None

View File

@@ -0,0 +1,363 @@
"""SandboxAuditMiddleware - bash command security auditing."""
import json
import logging
import re
import shlex
from collections.abc import Awaitable, Callable
from datetime import UTC, datetime
from typing import override
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import ToolMessage
from langgraph.prebuilt.tool_node import ToolCallRequest
from langgraph.types import Command
from deerflow.agents.thread_state import ThreadState
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Command classification rules
# ---------------------------------------------------------------------------
# Each pattern is compiled once at import time.
_HIGH_RISK_PATTERNS: list[re.Pattern[str]] = [
# --- original rules (retained) ---
re.compile(r"rm\s+-[^\s]*r[^\s]*\s+(/\*?|~/?\*?|/home\b|/root\b)\s*$"),
re.compile(r"dd\s+if="),
re.compile(r"mkfs"),
re.compile(r"cat\s+/etc/shadow"),
re.compile(r">+\s*/etc/"),
# --- pipe to sh/bash (generalised, replaces old curl|sh rule) ---
re.compile(r"\|\s*(ba)?sh\b"),
# --- command substitution (targeted only dangerous executables) ---
re.compile(r"[`$]\(?\s*(curl|wget|bash|sh|python|ruby|perl|base64)"),
# --- base64 decode piped to execution ---
re.compile(r"base64\s+.*-d.*\|"),
# --- overwrite system binaries ---
re.compile(r">+\s*(/usr/bin/|/bin/|/sbin/)"),
# --- overwrite shell startup files ---
re.compile(r">+\s*~/?\.(bashrc|profile|zshrc|bash_profile)"),
# --- process environment leakage ---
re.compile(r"/proc/[^/]+/environ"),
# --- dynamic linker hijack (one-step escalation) ---
re.compile(r"\b(LD_PRELOAD|LD_LIBRARY_PATH)\s*="),
# --- bash built-in networking (bypasses tool allowlists) ---
re.compile(r"/dev/tcp/"),
# --- fork bomb ---
re.compile(r"\S+\(\)\s*\{[^}]*\|\s*\S+\s*&"), # :(){ :|:& };:
re.compile(r"while\s+true.*&\s*done"), # while true; do bash & done
]
_MEDIUM_RISK_PATTERNS: list[re.Pattern[str]] = [
re.compile(r"chmod\s+777"),
re.compile(r"pip3?\s+install"),
re.compile(r"apt(-get)?\s+install"),
# sudo/su: no-op under Docker root; warn so LLM is aware
re.compile(r"\b(sudo|su)\b"),
# PATH modification: long attack chain, warn rather than block
re.compile(r"\bPATH\s*="),
]
def _split_compound_command(command: str) -> list[str]:
"""Split a compound command into sub-commands (quote-aware).
Scans the raw command string so unquoted shell control operators are
recognised even when they are not surrounded by whitespace
(e.g. ``safe;rm -rf /`` or ``rm -rf /&&echo ok``). Operators inside
quotes are ignored. If the command ends with an unclosed quote or a
dangling escape, return the whole command unchanged (fail-closed —
safer to classify the unsplit string than silently drop parts).
"""
parts: list[str] = []
current: list[str] = []
in_single_quote = False
in_double_quote = False
escaping = False
index = 0
while index < len(command):
char = command[index]
if escaping:
current.append(char)
escaping = False
index += 1
continue
if char == "\\" and not in_single_quote:
current.append(char)
escaping = True
index += 1
continue
if char == "'" and not in_double_quote:
in_single_quote = not in_single_quote
current.append(char)
index += 1
continue
if char == '"' and not in_single_quote:
in_double_quote = not in_double_quote
current.append(char)
index += 1
continue
if not in_single_quote and not in_double_quote:
if command.startswith("&&", index) or command.startswith("||", index):
part = "".join(current).strip()
if part:
parts.append(part)
current = []
index += 2
continue
if char == ";":
part = "".join(current).strip()
if part:
parts.append(part)
current = []
index += 1
continue
current.append(char)
index += 1
# Unclosed quote or dangling escape → fail-closed, return whole command
if in_single_quote or in_double_quote or escaping:
return [command]
part = "".join(current).strip()
if part:
parts.append(part)
return parts if parts else [command]
def _classify_single_command(command: str) -> str:
"""Classify a single (non-compound) command. Return 'block', 'warn', or 'pass'."""
normalized = " ".join(command.split())
for pattern in _HIGH_RISK_PATTERNS:
if pattern.search(normalized):
return "block"
# Also try shlex-parsed tokens for high-risk detection
try:
tokens = shlex.split(command)
joined = " ".join(tokens)
for pattern in _HIGH_RISK_PATTERNS:
if pattern.search(joined):
return "block"
except ValueError:
# shlex.split fails on unclosed quotes — treat as suspicious
return "block"
for pattern in _MEDIUM_RISK_PATTERNS:
if pattern.search(normalized):
return "warn"
return "pass"
def _classify_command(command: str) -> str:
"""Return 'block', 'warn', or 'pass'.
Strategy:
1. First scan the *whole* raw command against high-risk patterns. This
catches structural attacks like ``while true; do bash & done`` or
``:(){ :|:& };:`` that span multiple shell statements — splitting them
on ``;`` would destroy the pattern context.
2. Then split compound commands (e.g. ``cmd1 && cmd2 ; cmd3``) and
classify each sub-command independently. The most severe verdict wins.
"""
# Pass 1: whole-command high-risk scan (catches multi-statement patterns)
normalized = " ".join(command.split())
for pattern in _HIGH_RISK_PATTERNS:
if pattern.search(normalized):
return "block"
# Pass 2: per-sub-command classification
sub_commands = _split_compound_command(command)
worst = "pass"
for sub in sub_commands:
verdict = _classify_single_command(sub)
if verdict == "block":
return "block" # short-circuit: can't get worse
if verdict == "warn":
worst = "warn"
return worst
# ---------------------------------------------------------------------------
# Middleware
# ---------------------------------------------------------------------------
class SandboxAuditMiddleware(AgentMiddleware[ThreadState]):
"""Bash command security auditing middleware.
For every ``bash`` tool call:
1. **Command classification**: regex + shlex analysis grades commands as
high-risk (block), medium-risk (warn), or safe (pass).
2. **Audit log**: every bash call is recorded as a structured JSON entry
via the standard logger (visible in langgraph.log).
High-risk commands (e.g. ``rm -rf /``, ``curl url | bash``) are blocked:
the handler is not called and an error ``ToolMessage`` is returned so the
agent loop can continue gracefully.
Medium-risk commands (e.g. ``pip install``, ``chmod 777``) are executed
normally; a warning is appended to the tool result so the LLM is aware.
"""
state_schema = ThreadState
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
def _get_thread_id(self, request: ToolCallRequest) -> str | None:
runtime = request.runtime # ToolRuntime; may be None-like in tests
if runtime is None:
return None
ctx = getattr(runtime, "context", None) or {}
thread_id = ctx.get("thread_id") if isinstance(ctx, dict) else None
if thread_id is None:
cfg = getattr(runtime, "config", None) or {}
thread_id = cfg.get("configurable", {}).get("thread_id")
return thread_id
_AUDIT_COMMAND_LIMIT = 200
def _write_audit(self, thread_id: str | None, command: str, verdict: str, *, truncate: bool = False) -> None:
audited_command = command
if truncate and len(command) > self._AUDIT_COMMAND_LIMIT:
audited_command = f"{command[: self._AUDIT_COMMAND_LIMIT]}... ({len(command)} chars)"
record = {
"timestamp": datetime.now(UTC).isoformat(),
"thread_id": thread_id or "unknown",
"command": audited_command,
"verdict": verdict,
}
logger.info("[SandboxAudit] %s", json.dumps(record, ensure_ascii=False))
def _build_block_message(self, request: ToolCallRequest, reason: str) -> ToolMessage:
tool_call_id = str(request.tool_call.get("id") or "missing_id")
return ToolMessage(
content=f"Command blocked: {reason}. Please use a safer alternative approach.",
tool_call_id=tool_call_id,
name="bash",
status="error",
)
def _append_warn_to_result(self, result: ToolMessage | Command, command: str) -> ToolMessage | Command:
"""Append a warning note to the tool result for medium-risk commands."""
if not isinstance(result, ToolMessage):
return result
warning = f"\n\n⚠️ Warning: `{command}` is a medium-risk command that may modify the runtime environment."
if isinstance(result.content, list):
new_content = list(result.content) + [{"type": "text", "text": warning}]
else:
new_content = str(result.content) + warning
return ToolMessage(
content=new_content,
tool_call_id=result.tool_call_id,
name=result.name,
status=result.status,
)
# ------------------------------------------------------------------
# Input sanitisation
# ------------------------------------------------------------------
# Normal bash commands rarely exceed a few hundred characters. 10 000 is
# well above any legitimate use case yet a tiny fraction of Linux ARG_MAX.
# Anything longer is almost certainly a payload injection or base64-encoded
# attack string.
_MAX_COMMAND_LENGTH = 10_000
def _validate_input(self, command: str) -> str | None:
"""Return ``None`` if *command* is acceptable, else a rejection reason."""
if not command.strip():
return "empty command"
if len(command) > self._MAX_COMMAND_LENGTH:
return "command too long"
if "\x00" in command:
return "null byte detected"
return None
# ------------------------------------------------------------------
# Core logic (shared between sync and async paths)
# ------------------------------------------------------------------
def _pre_process(self, request: ToolCallRequest) -> tuple[str, str | None, str, str | None]:
"""
Returns (command, thread_id, verdict, reject_reason).
verdict is 'block', 'warn', or 'pass'.
reject_reason is non-None only for input sanitisation rejections.
"""
args = request.tool_call.get("args", {})
raw_command = args.get("command")
command = raw_command if isinstance(raw_command, str) else ""
thread_id = self._get_thread_id(request)
# ① input sanitisation — reject malformed input before regex analysis
reject_reason = self._validate_input(command)
if reject_reason:
self._write_audit(thread_id, command, "block", truncate=True)
logger.warning("[SandboxAudit] INVALID INPUT thread=%s reason=%s", thread_id, reject_reason)
return command, thread_id, "block", reject_reason
# ② classify command
verdict = _classify_command(command)
# ③ audit log
self._write_audit(thread_id, command, verdict)
if verdict == "block":
logger.warning("[SandboxAudit] BLOCKED thread=%s cmd=%r", thread_id, command)
elif verdict == "warn":
logger.warning("[SandboxAudit] WARN (medium-risk) thread=%s cmd=%r", thread_id, command)
return command, thread_id, verdict, None
# ------------------------------------------------------------------
# wrap_tool_call hooks
# ------------------------------------------------------------------
@override
def wrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], ToolMessage | Command],
) -> ToolMessage | Command:
if request.tool_call.get("name") != "bash":
return handler(request)
command, _, verdict, reject_reason = self._pre_process(request)
if verdict == "block":
reason = reject_reason or "security violation detected"
return self._build_block_message(request, reason)
result = handler(request)
if verdict == "warn":
result = self._append_warn_to_result(result, command)
return result
@override
async def awrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command]],
) -> ToolMessage | Command:
if request.tool_call.get("name") != "bash":
return await handler(request)
command, _, verdict, reject_reason = self._pre_process(request)
if verdict == "block":
reason = reject_reason or "security violation detected"
return self._build_block_message(request, reason)
result = await handler(request)
if verdict == "warn":
result = self._append_warn_to_result(result, command)
return result

View File

@@ -0,0 +1,75 @@
"""Middleware to enforce maximum concurrent subagent tool calls per model response."""
import logging
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langgraph.runtime import Runtime
from deerflow.subagents.executor import MAX_CONCURRENT_SUBAGENTS
logger = logging.getLogger(__name__)
# Valid range for max_concurrent_subagents
MIN_SUBAGENT_LIMIT = 2
MAX_SUBAGENT_LIMIT = 4
def _clamp_subagent_limit(value: int) -> int:
"""Clamp subagent limit to valid range [2, 4]."""
return max(MIN_SUBAGENT_LIMIT, min(MAX_SUBAGENT_LIMIT, value))
class SubagentLimitMiddleware(AgentMiddleware[AgentState]):
"""Truncates excess 'task' tool calls from a single model response.
When an LLM generates more than max_concurrent parallel task tool calls
in one response, this middleware keeps only the first max_concurrent and
discards the rest. This is more reliable than prompt-based limits.
Args:
max_concurrent: Maximum number of concurrent subagent calls allowed.
Defaults to MAX_CONCURRENT_SUBAGENTS (3). Clamped to [2, 4].
"""
def __init__(self, max_concurrent: int = MAX_CONCURRENT_SUBAGENTS):
super().__init__()
self.max_concurrent = _clamp_subagent_limit(max_concurrent)
def _truncate_task_calls(self, state: AgentState) -> dict | None:
messages = state.get("messages", [])
if not messages:
return None
last_msg = messages[-1]
if getattr(last_msg, "type", None) != "ai":
return None
tool_calls = getattr(last_msg, "tool_calls", None)
if not tool_calls:
return None
# Count task tool calls
task_indices = [i for i, tc in enumerate(tool_calls) if tc.get("name") == "task"]
if len(task_indices) <= self.max_concurrent:
return None
# Build set of indices to drop (excess task calls beyond the limit)
indices_to_drop = set(task_indices[self.max_concurrent :])
truncated_tool_calls = [tc for i, tc in enumerate(tool_calls) if i not in indices_to_drop]
dropped_count = len(indices_to_drop)
logger.warning(f"Truncated {dropped_count} excess task tool call(s) from model response (limit: {self.max_concurrent})")
# Replace the AIMessage with truncated tool_calls (same id triggers replacement)
updated_msg = last_msg.model_copy(update={"tool_calls": truncated_tool_calls})
return {"messages": [updated_msg]}
@override
def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
return self._truncate_task_calls(state)
@override
async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
return self._truncate_task_calls(state)

View File

@@ -0,0 +1,99 @@
import logging
from typing import NotRequired, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langgraph.config import get_config
from langgraph.runtime import Runtime
from deerflow.agents.thread_state import ThreadDataState
from deerflow.config.paths import Paths, get_paths
logger = logging.getLogger(__name__)
class ThreadDataMiddlewareState(AgentState):
"""Compatible with the `ThreadState` schema."""
thread_data: NotRequired[ThreadDataState | None]
class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
"""Create thread data directories for each thread execution.
Creates the following directory structure:
- {base_dir}/threads/{thread_id}/user-data/workspace
- {base_dir}/threads/{thread_id}/user-data/uploads
- {base_dir}/threads/{thread_id}/user-data/outputs
Lifecycle Management:
- With lazy_init=True (default): Only compute paths, directories created on-demand
- With lazy_init=False: Eagerly create directories in before_agent()
"""
state_schema = ThreadDataMiddlewareState
def __init__(self, base_dir: str | None = None, lazy_init: bool = True):
"""Initialize the middleware.
Args:
base_dir: Base directory for thread data. Defaults to Paths resolution.
lazy_init: If True, defer directory creation until needed.
If False, create directories eagerly in before_agent().
Default is True for optimal performance.
"""
super().__init__()
self._paths = Paths(base_dir) if base_dir else get_paths()
self._lazy_init = lazy_init
def _get_thread_paths(self, thread_id: str) -> dict[str, str]:
"""Get the paths for a thread's data directories.
Args:
thread_id: The thread ID.
Returns:
Dictionary with workspace_path, uploads_path, and outputs_path.
"""
return {
"workspace_path": str(self._paths.sandbox_work_dir(thread_id)),
"uploads_path": str(self._paths.sandbox_uploads_dir(thread_id)),
"outputs_path": str(self._paths.sandbox_outputs_dir(thread_id)),
}
def _create_thread_directories(self, thread_id: str) -> dict[str, str]:
"""Create the thread data directories.
Args:
thread_id: The thread ID.
Returns:
Dictionary with the created directory paths.
"""
self._paths.ensure_thread_dirs(thread_id)
return self._get_thread_paths(thread_id)
@override
def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime) -> dict | None:
context = runtime.context or {}
thread_id = context.get("thread_id")
if thread_id is None:
config = get_config()
thread_id = config.get("configurable", {}).get("thread_id")
if thread_id is None:
raise ValueError("Thread ID is required in runtime context or config.configurable")
if self._lazy_init:
# Lazy initialization: only compute paths, don't create directories
paths = self._get_thread_paths(thread_id)
else:
# Eager initialization: create directories immediately
paths = self._create_thread_directories(thread_id)
logger.debug("Created thread data directories for thread %s", thread_id)
return {
"thread_data": {
**paths,
}
}

View File

@@ -0,0 +1,138 @@
"""Middleware for automatic thread title generation."""
import logging
from typing import NotRequired, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langgraph.runtime import Runtime
from deerflow.config.title_config import get_title_config
from deerflow.models import create_chat_model
logger = logging.getLogger(__name__)
class TitleMiddlewareState(AgentState):
"""Compatible with the `ThreadState` schema."""
title: NotRequired[str | None]
class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
"""Automatically generate a title for the thread after the first user message."""
state_schema = TitleMiddlewareState
def _normalize_content(self, content: object) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
parts = [self._normalize_content(item) for item in content]
return "\n".join(part for part in parts if part)
if isinstance(content, dict):
text_value = content.get("text")
if isinstance(text_value, str):
return text_value
nested_content = content.get("content")
if nested_content is not None:
return self._normalize_content(nested_content)
return ""
def _should_generate_title(self, state: TitleMiddlewareState) -> bool:
"""Check if we should generate a title for this thread."""
config = get_title_config()
if not config.enabled:
return False
# Check if thread already has a title in state
if state.get("title"):
return False
# Check if this is the first turn (has at least one user message and one assistant response)
messages = state.get("messages", [])
if len(messages) < 2:
return False
# Count user and assistant messages
user_messages = [m for m in messages if m.type == "human"]
assistant_messages = [m for m in messages if m.type == "ai"]
# Generate title after first complete exchange
return len(user_messages) == 1 and len(assistant_messages) >= 1
def _build_title_prompt(self, state: TitleMiddlewareState) -> tuple[str, str]:
"""Extract user/assistant messages and build the title prompt.
Returns (prompt_string, user_msg) so callers can use user_msg as fallback.
"""
config = get_title_config()
messages = state.get("messages", [])
user_msg_content = next((m.content for m in messages if m.type == "human"), "")
assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "")
user_msg = self._normalize_content(user_msg_content)
assistant_msg = self._normalize_content(assistant_msg_content)
prompt = config.prompt_template.format(
max_words=config.max_words,
user_msg=user_msg[:500],
assistant_msg=assistant_msg[:500],
)
return prompt, user_msg
def _parse_title(self, content: object) -> str:
"""Normalize model output into a clean title string."""
config = get_title_config()
title_content = self._normalize_content(content)
title = title_content.strip().strip('"').strip("'")
return title[: config.max_chars] if len(title) > config.max_chars else title
def _fallback_title(self, user_msg: str) -> str:
config = get_title_config()
fallback_chars = min(config.max_chars, 50)
if len(user_msg) > fallback_chars:
return user_msg[:fallback_chars].rstrip() + "..."
return user_msg if user_msg else "New Conversation"
def _generate_title_result(self, state: TitleMiddlewareState) -> dict | None:
"""Generate a local fallback title without blocking on an LLM call."""
if not self._should_generate_title(state):
return None
_, user_msg = self._build_title_prompt(state)
return {"title": self._fallback_title(user_msg)}
async def _agenerate_title_result(self, state: TitleMiddlewareState) -> dict | None:
"""Generate a title asynchronously and fall back locally on failure."""
if not self._should_generate_title(state):
return None
config = get_title_config()
prompt, user_msg = self._build_title_prompt(state)
try:
if config.model_name:
model = create_chat_model(name=config.model_name, thinking_enabled=False)
else:
model = create_chat_model(thinking_enabled=False)
response = await model.ainvoke(prompt)
title = self._parse_title(response.content)
if title:
return {"title": title}
except Exception:
logger.debug("Failed to generate async title; falling back to local title", exc_info=True)
return {"title": self._fallback_title(user_msg)}
@override
def after_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
return self._generate_title_result(state)
@override
async def aafter_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
return await self._agenerate_title_result(state)

View File

@@ -0,0 +1,100 @@
"""Middleware that extends TodoListMiddleware with context-loss detection.
When the message history is truncated (e.g., by SummarizationMiddleware), the
original `write_todos` tool call and its ToolMessage can be scrolled out of the
active context window. This middleware detects that situation and injects a
reminder message so the model still knows about the outstanding todo list.
"""
from __future__ import annotations
from typing import Any, override
from langchain.agents.middleware import TodoListMiddleware
from langchain.agents.middleware.todo import PlanningState, Todo
from langchain_core.messages import AIMessage, HumanMessage
from langgraph.runtime import Runtime
def _todos_in_messages(messages: list[Any]) -> bool:
"""Return True if any AIMessage in *messages* contains a write_todos tool call."""
for msg in messages:
if isinstance(msg, AIMessage) and msg.tool_calls:
for tc in msg.tool_calls:
if tc.get("name") == "write_todos":
return True
return False
def _reminder_in_messages(messages: list[Any]) -> bool:
"""Return True if a todo_reminder HumanMessage is already present in *messages*."""
for msg in messages:
if isinstance(msg, HumanMessage) and getattr(msg, "name", None) == "todo_reminder":
return True
return False
def _format_todos(todos: list[Todo]) -> str:
"""Format a list of Todo items into a human-readable string."""
lines: list[str] = []
for todo in todos:
status = todo.get("status", "pending")
content = todo.get("content", "")
lines.append(f"- [{status}] {content}")
return "\n".join(lines)
class TodoMiddleware(TodoListMiddleware):
"""Extends TodoListMiddleware with `write_todos` context-loss detection.
When the original `write_todos` tool call has been truncated from the message
history (e.g., after summarization), the model loses awareness of the current
todo list. This middleware detects that gap in `before_model` / `abefore_model`
and injects a reminder message so the model can continue tracking progress.
"""
@override
def before_model(
self,
state: PlanningState,
runtime: Runtime, # noqa: ARG002
) -> dict[str, Any] | None:
"""Inject a todo-list reminder when write_todos has left the context window."""
todos: list[Todo] = state.get("todos") or [] # type: ignore[assignment]
if not todos:
return None
messages = state.get("messages") or []
if _todos_in_messages(messages):
# write_todos is still visible in context — nothing to do.
return None
if _reminder_in_messages(messages):
# A reminder was already injected and hasn't been truncated yet.
return None
# The todo list exists in state but the original write_todos call is gone.
# Inject a reminder as a HumanMessage so the model stays aware.
formatted = _format_todos(todos)
reminder = HumanMessage(
name="todo_reminder",
content=(
"<system_reminder>\n"
"Your todo list from earlier is no longer visible in the current context window, "
"but it is still active. Here is the current state:\n\n"
f"{formatted}\n\n"
"Continue tracking and updating this todo list as you work. "
"Call `write_todos` whenever the status of any item changes.\n"
"</system_reminder>"
),
)
return {"messages": [reminder]}
@override
async def abefore_model(
self,
state: PlanningState,
runtime: Runtime,
) -> dict[str, Any] | None:
"""Async version of before_model."""
return self.before_model(state, runtime)

View File

@@ -0,0 +1,37 @@
"""Middleware for logging LLM token usage."""
import logging
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langgraph.runtime import Runtime
logger = logging.getLogger(__name__)
class TokenUsageMiddleware(AgentMiddleware):
"""Logs token usage from model response usage_metadata."""
@override
def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
return self._log_usage(state)
@override
async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
return self._log_usage(state)
def _log_usage(self, state: AgentState) -> None:
messages = state.get("messages", [])
if not messages:
return None
last = messages[-1]
usage = getattr(last, "usage_metadata", None)
if usage:
logger.info(
"LLM token usage: input=%s output=%s total=%s",
usage.get("input_tokens", "?"),
usage.get("output_tokens", "?"),
usage.get("total_tokens", "?"),
)
return None

View File

@@ -0,0 +1,143 @@
"""Tool error handling middleware and shared runtime middleware builders."""
import logging
from collections.abc import Awaitable, Callable
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import ToolMessage
from langgraph.errors import GraphBubbleUp
from langgraph.prebuilt.tool_node import ToolCallRequest
from langgraph.types import Command
logger = logging.getLogger(__name__)
_MISSING_TOOL_CALL_ID = "missing_tool_call_id"
class ToolErrorHandlingMiddleware(AgentMiddleware[AgentState]):
"""Convert tool exceptions into error ToolMessages so the run can continue."""
def _build_error_message(self, request: ToolCallRequest, exc: Exception) -> ToolMessage:
tool_name = str(request.tool_call.get("name") or "unknown_tool")
tool_call_id = str(request.tool_call.get("id") or _MISSING_TOOL_CALL_ID)
detail = str(exc).strip() or exc.__class__.__name__
if len(detail) > 500:
detail = detail[:497] + "..."
content = f"Error: Tool '{tool_name}' failed with {exc.__class__.__name__}: {detail}. Continue with available context, or choose an alternative tool."
return ToolMessage(
content=content,
tool_call_id=tool_call_id,
name=tool_name,
status="error",
)
@override
def wrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], ToolMessage | Command],
) -> ToolMessage | Command:
try:
return handler(request)
except GraphBubbleUp:
# Preserve LangGraph control-flow signals (interrupt/pause/resume).
raise
except Exception as exc:
logger.exception("Tool execution failed (sync): name=%s id=%s", request.tool_call.get("name"), request.tool_call.get("id"))
return self._build_error_message(request, exc)
@override
async def awrap_tool_call(
self,
request: ToolCallRequest,
handler: Callable[[ToolCallRequest], Awaitable[ToolMessage | Command]],
) -> ToolMessage | Command:
try:
return await handler(request)
except GraphBubbleUp:
# Preserve LangGraph control-flow signals (interrupt/pause/resume).
raise
except Exception as exc:
logger.exception("Tool execution failed (async): name=%s id=%s", request.tool_call.get("name"), request.tool_call.get("id"))
return self._build_error_message(request, exc)
def _build_runtime_middlewares(
*,
include_uploads: bool,
include_dangling_tool_call_patch: bool,
lazy_init: bool = True,
) -> list[AgentMiddleware]:
"""Build shared base middlewares for agent execution."""
from deerflow.agents.middlewares.llm_error_handling_middleware import LLMErrorHandlingMiddleware
from deerflow.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
from deerflow.sandbox.middleware import SandboxMiddleware
middlewares: list[AgentMiddleware] = [
ThreadDataMiddleware(lazy_init=lazy_init),
SandboxMiddleware(lazy_init=lazy_init),
]
if include_uploads:
from deerflow.agents.middlewares.uploads_middleware import UploadsMiddleware
middlewares.insert(1, UploadsMiddleware())
if include_dangling_tool_call_patch:
from deerflow.agents.middlewares.dangling_tool_call_middleware import DanglingToolCallMiddleware
middlewares.append(DanglingToolCallMiddleware())
middlewares.append(LLMErrorHandlingMiddleware())
# Guardrail middleware (if configured)
from deerflow.config.guardrails_config import get_guardrails_config
guardrails_config = get_guardrails_config()
if guardrails_config.enabled and guardrails_config.provider:
import inspect
from deerflow.guardrails.middleware import GuardrailMiddleware
from deerflow.reflection import resolve_variable
provider_cls = resolve_variable(guardrails_config.provider.use)
provider_kwargs = dict(guardrails_config.provider.config) if guardrails_config.provider.config else {}
# Pass framework hint if the provider accepts it (e.g. for config discovery).
# Built-in providers like AllowlistProvider don't need it, so only inject
# when the constructor accepts 'framework' or '**kwargs'.
if "framework" not in provider_kwargs:
try:
sig = inspect.signature(provider_cls.__init__)
if "framework" in sig.parameters or any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()):
provider_kwargs["framework"] = "deerflow"
except (ValueError, TypeError):
pass
provider = provider_cls(**provider_kwargs)
middlewares.append(GuardrailMiddleware(provider, fail_closed=guardrails_config.fail_closed, passport=guardrails_config.passport))
from deerflow.agents.middlewares.sandbox_audit_middleware import SandboxAuditMiddleware
middlewares.append(SandboxAuditMiddleware())
middlewares.append(ToolErrorHandlingMiddleware())
return middlewares
def build_lead_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentMiddleware]:
"""Middlewares shared by lead agent runtime before lead-only middlewares."""
return _build_runtime_middlewares(
include_uploads=True,
include_dangling_tool_call_patch=True,
lazy_init=lazy_init,
)
def build_subagent_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentMiddleware]:
"""Middlewares shared by subagent runtime before subagent-only middlewares."""
return _build_runtime_middlewares(
include_uploads=False,
include_dangling_tool_call_patch=True,
lazy_init=lazy_init,
)

View File

@@ -0,0 +1,293 @@
"""Middleware to inject uploaded files information into agent context."""
import logging
from pathlib import Path
from typing import NotRequired, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import HumanMessage
from langgraph.runtime import Runtime
from deerflow.config.paths import Paths, get_paths
from deerflow.utils.file_conversion import extract_outline
logger = logging.getLogger(__name__)
_OUTLINE_PREVIEW_LINES = 5
def _extract_outline_for_file(file_path: Path) -> tuple[list[dict], list[str]]:
"""Return the document outline and fallback preview for *file_path*.
Looks for a sibling ``<stem>.md`` file produced by the upload conversion
pipeline.
Returns:
(outline, preview) where:
- outline: list of ``{title, line}`` dicts (plus optional sentinel).
Empty when no headings are found or no .md exists.
- preview: first few non-empty lines of the .md, used as a content
anchor when outline is empty so the agent has some context.
Empty when outline is non-empty (no fallback needed).
"""
md_path = file_path.with_suffix(".md")
if not md_path.is_file():
return [], []
outline = extract_outline(md_path)
if outline:
logger.debug("Extracted %d outline entries from %s", len(outline), file_path.name)
return outline, []
# outline is empty — read the first few non-empty lines as a content preview
preview: list[str] = []
try:
with md_path.open(encoding="utf-8") as f:
for line in f:
stripped = line.strip()
if stripped:
preview.append(stripped)
if len(preview) >= _OUTLINE_PREVIEW_LINES:
break
except Exception:
logger.debug("Failed to read preview lines from %s", md_path, exc_info=True)
return [], preview
class UploadsMiddlewareState(AgentState):
"""State schema for uploads middleware."""
uploaded_files: NotRequired[list[dict] | None]
class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
"""Middleware to inject uploaded files information into the agent context.
Reads file metadata from the current message's additional_kwargs.files
(set by the frontend after upload) and prepends an <uploaded_files> block
to the last human message so the model knows which files are available.
"""
state_schema = UploadsMiddlewareState
def __init__(self, base_dir: str | None = None):
"""Initialize the middleware.
Args:
base_dir: Base directory for thread data. Defaults to Paths resolution.
"""
super().__init__()
self._paths = Paths(base_dir) if base_dir else get_paths()
def _format_file_entry(self, file: dict, lines: list[str]) -> None:
"""Append a single file entry (name, size, path, optional outline) to lines."""
size_kb = file["size"] / 1024
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
lines.append(f"- {file['filename']} ({size_str})")
lines.append(f" Path: {file['path']}")
outline = file.get("outline") or []
if outline:
truncated = outline[-1].get("truncated", False)
visible = [e for e in outline if not e.get("truncated")]
lines.append(" Document outline (use `read_file` with line ranges to read sections):")
for entry in visible:
lines.append(f" L{entry['line']}: {entry['title']}")
if truncated:
lines.append(f" ... (showing first {len(visible)} headings; use `read_file` to explore further)")
else:
preview = file.get("outline_preview") or []
if preview:
lines.append(" No structural headings detected. Document begins with:")
for text in preview:
lines.append(f" > {text}")
lines.append(" Use `grep` to search for keywords (e.g. `grep(pattern='keyword', path='/mnt/user-data/uploads/')`).")
lines.append("")
def _create_files_message(self, new_files: list[dict], historical_files: list[dict]) -> str:
"""Create a formatted message listing uploaded files.
Args:
new_files: Files uploaded in the current message.
historical_files: Files uploaded in previous messages.
Each file dict may contain an optional ``outline`` key — a list of
``{title, line}`` dicts extracted from the converted Markdown file.
Returns:
Formatted string inside <uploaded_files> tags.
"""
lines = ["<uploaded_files>"]
lines.append("The following files were uploaded in this message:")
lines.append("")
if new_files:
for file in new_files:
self._format_file_entry(file, lines)
else:
lines.append("(empty)")
lines.append("")
if historical_files:
lines.append("The following files were uploaded in previous messages and are still available:")
lines.append("")
for file in historical_files:
self._format_file_entry(file, lines)
lines.append("To work with these files:")
lines.append("- Read from the file first — use the outline line numbers and `read_file` to locate relevant sections.")
lines.append("- Use `grep` to search for keywords when you are not sure which section to look at")
lines.append(" (e.g. `grep(pattern='revenue', path='/mnt/user-data/uploads/')`).")
lines.append("- Use `glob` to find files by name pattern")
lines.append(" (e.g. `glob(pattern='**/*.md', path='/mnt/user-data/uploads/')`).")
lines.append("- Only fall back to web search if the file content is clearly insufficient to answer the question.")
lines.append("</uploaded_files>")
return "\n".join(lines)
def _files_from_kwargs(self, message: HumanMessage, uploads_dir: Path | None = None) -> list[dict] | None:
"""Extract file info from message additional_kwargs.files.
The frontend sends uploaded file metadata in additional_kwargs.files
after a successful upload. Each entry has: filename, size (bytes),
path (virtual path), status.
Args:
message: The human message to inspect.
uploads_dir: Physical uploads directory used to verify file existence.
When provided, entries whose files no longer exist are skipped.
Returns:
List of file dicts with virtual paths, or None if the field is absent or empty.
"""
kwargs_files = (message.additional_kwargs or {}).get("files")
if not isinstance(kwargs_files, list) or not kwargs_files:
return None
files = []
for f in kwargs_files:
if not isinstance(f, dict):
continue
filename = f.get("filename") or ""
if not filename or Path(filename).name != filename:
continue
if uploads_dir is not None and not (uploads_dir / filename).is_file():
continue
files.append(
{
"filename": filename,
"size": int(f.get("size") or 0),
"path": f"/mnt/user-data/uploads/{filename}",
"extension": Path(filename).suffix,
}
)
return files if files else None
@override
def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime) -> dict | None:
"""Inject uploaded files information before agent execution.
New files come from the current message's additional_kwargs.files.
Historical files are scanned from the thread's uploads directory,
excluding the new ones.
Prepends <uploaded_files> context to the last human message content.
The original additional_kwargs (including files metadata) is preserved
on the updated message so the frontend can read it from the stream.
Args:
state: Current agent state.
runtime: Runtime context containing thread_id.
Returns:
State updates including uploaded files list.
"""
messages = list(state.get("messages", []))
if not messages:
return None
last_message_index = len(messages) - 1
last_message = messages[last_message_index]
if not isinstance(last_message, HumanMessage):
return None
# Resolve uploads directory for existence checks
thread_id = (runtime.context or {}).get("thread_id")
if thread_id is None:
try:
from langgraph.config import get_config
thread_id = get_config().get("configurable", {}).get("thread_id")
except RuntimeError:
pass # get_config() raises outside a runnable context (e.g. unit tests)
uploads_dir = self._paths.sandbox_uploads_dir(thread_id) if thread_id else None
# Get newly uploaded files from the current message's additional_kwargs.files
new_files = self._files_from_kwargs(last_message, uploads_dir) or []
# Collect historical files from the uploads directory (all except the new ones)
new_filenames = {f["filename"] for f in new_files}
historical_files: list[dict] = []
if uploads_dir and uploads_dir.exists():
for file_path in sorted(uploads_dir.iterdir()):
if file_path.is_file() and file_path.name not in new_filenames:
stat = file_path.stat()
outline, preview = _extract_outline_for_file(file_path)
historical_files.append(
{
"filename": file_path.name,
"size": stat.st_size,
"path": f"/mnt/user-data/uploads/{file_path.name}",
"extension": file_path.suffix,
"outline": outline,
"outline_preview": preview,
}
)
# Attach outlines to new files as well
if uploads_dir:
for file in new_files:
phys_path = uploads_dir / file["filename"]
outline, preview = _extract_outline_for_file(phys_path)
file["outline"] = outline
file["outline_preview"] = preview
if not new_files and not historical_files:
return None
logger.debug(f"New files: {[f['filename'] for f in new_files]}, historical: {[f['filename'] for f in historical_files]}")
# Create files message and prepend to the last human message content
files_message = self._create_files_message(new_files, historical_files)
# Extract original content - handle both string and list formats
original_content = last_message.content
if isinstance(original_content, str):
# Simple case: string content, just prepend files message
updated_content = f"{files_message}\n\n{original_content}"
elif isinstance(original_content, list):
# Complex case: list content (multimodal), preserve all blocks
# Prepend files message as the first text block
files_block = {"type": "text", "text": f"{files_message}\n\n"}
# Keep all original blocks (including images)
updated_content = [files_block, *original_content]
else:
# Other types, preserve as-is
updated_content = original_content
# Create new message with combined content.
# Preserve additional_kwargs (including files metadata) so the frontend
# can read structured file info from the streamed message.
updated_message = HumanMessage(
content=updated_content,
id=last_message.id,
additional_kwargs=last_message.additional_kwargs,
)
messages[last_message_index] = updated_message
return {
"uploaded_files": new_files,
"messages": messages,
}

View File

@@ -0,0 +1,222 @@
"""Middleware for injecting image details into conversation before LLM call."""
import logging
from typing import override
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langgraph.runtime import Runtime
from deerflow.agents.thread_state import ThreadState
logger = logging.getLogger(__name__)
class ViewImageMiddlewareState(ThreadState):
"""Reuse the thread state so reducer-backed keys keep their annotations."""
class ViewImageMiddleware(AgentMiddleware[ViewImageMiddlewareState]):
"""Injects image details as a human message before LLM calls when view_image tools have completed.
This middleware:
1. Runs before each LLM call
2. Checks if the last assistant message contains view_image tool calls
3. Verifies all tool calls in that message have been completed (have corresponding ToolMessages)
4. If conditions are met, creates a human message with all viewed image details (including base64 data)
5. Adds the message to state so the LLM can see and analyze the images
This enables the LLM to automatically receive and analyze images that were loaded via view_image tool,
without requiring explicit user prompts to describe the images.
"""
state_schema = ViewImageMiddlewareState
def _get_last_assistant_message(self, messages: list) -> AIMessage | None:
"""Get the last assistant message from the message list.
Args:
messages: List of messages
Returns:
Last AIMessage or None if not found
"""
for msg in reversed(messages):
if isinstance(msg, AIMessage):
return msg
return None
def _has_view_image_tool(self, message: AIMessage) -> bool:
"""Check if the assistant message contains view_image tool calls.
Args:
message: Assistant message to check
Returns:
True if message contains view_image tool calls
"""
if not hasattr(message, "tool_calls") or not message.tool_calls:
return False
return any(tool_call.get("name") == "view_image" for tool_call in message.tool_calls)
def _all_tools_completed(self, messages: list, assistant_msg: AIMessage) -> bool:
"""Check if all tool calls in the assistant message have been completed.
Args:
messages: List of all messages
assistant_msg: The assistant message containing tool calls
Returns:
True if all tool calls have corresponding ToolMessages
"""
if not hasattr(assistant_msg, "tool_calls") or not assistant_msg.tool_calls:
return False
# Get all tool call IDs from the assistant message
tool_call_ids = {tool_call.get("id") for tool_call in assistant_msg.tool_calls if tool_call.get("id")}
# Find the index of the assistant message
try:
assistant_idx = messages.index(assistant_msg)
except ValueError:
return False
# Get all ToolMessages after the assistant message
completed_tool_ids = set()
for msg in messages[assistant_idx + 1 :]:
if isinstance(msg, ToolMessage) and msg.tool_call_id:
completed_tool_ids.add(msg.tool_call_id)
# Check if all tool calls have been completed
return tool_call_ids.issubset(completed_tool_ids)
def _create_image_details_message(self, state: ViewImageMiddlewareState) -> list[str | dict]:
"""Create a formatted message with all viewed image details.
Args:
state: Current state containing viewed_images
Returns:
List of content blocks (text and images) for the HumanMessage
"""
viewed_images = state.get("viewed_images", {})
if not viewed_images:
# Return a properly formatted text block, not a plain string array
return [{"type": "text", "text": "No images have been viewed."}]
# Build the message with image information
content_blocks: list[str | dict] = [{"type": "text", "text": "Here are the images you've viewed:"}]
for image_path, image_data in viewed_images.items():
mime_type = image_data.get("mime_type", "unknown")
base64_data = image_data.get("base64", "")
# Add text description
content_blocks.append({"type": "text", "text": f"\n- **{image_path}** ({mime_type})"})
# Add the actual image data so LLM can "see" it
if base64_data:
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{mime_type};base64,{base64_data}"},
}
)
return content_blocks
def _should_inject_image_message(self, state: ViewImageMiddlewareState) -> bool:
"""Determine if we should inject an image details message.
Args:
state: Current state
Returns:
True if we should inject the message
"""
messages = state.get("messages", [])
if not messages:
return False
# Get the last assistant message
last_assistant_msg = self._get_last_assistant_message(messages)
if not last_assistant_msg:
return False
# Check if it has view_image tool calls
if not self._has_view_image_tool(last_assistant_msg):
return False
# Check if all tools have been completed
if not self._all_tools_completed(messages, last_assistant_msg):
return False
# Check if we've already added an image details message
# Look for a human message after the last assistant message that contains image details
assistant_idx = messages.index(last_assistant_msg)
for msg in messages[assistant_idx + 1 :]:
if isinstance(msg, HumanMessage):
content_str = str(msg.content)
if "Here are the images you've viewed" in content_str or "Here are the details of the images you've viewed" in content_str:
# Already added, don't add again
return False
return True
def _inject_image_message(self, state: ViewImageMiddlewareState) -> dict | None:
"""Internal helper to inject image details message.
Args:
state: Current state
Returns:
State update with additional human message, or None if no update needed
"""
if not self._should_inject_image_message(state):
return None
# Create the image details message with text and image content
image_content = self._create_image_details_message(state)
# Create a new human message with mixed content (text + images)
human_msg = HumanMessage(content=image_content)
logger.debug("Injecting image details message with images before LLM call")
# Return state update with the new message
return {"messages": [human_msg]}
@override
def before_model(self, state: ViewImageMiddlewareState, runtime: Runtime) -> dict | None:
"""Inject image details message before LLM call if view_image tools have completed (sync version).
This runs before each LLM call, checking if the previous turn included view_image
tool calls that have all completed. If so, it injects a human message with the image
details so the LLM can see and analyze the images.
Args:
state: Current state
runtime: Runtime context (unused but required by interface)
Returns:
State update with additional human message, or None if no update needed
"""
return self._inject_image_message(state)
@override
async def abefore_model(self, state: ViewImageMiddlewareState, runtime: Runtime) -> dict | None:
"""Inject image details message before LLM call if view_image tools have completed (async version).
This runs before each LLM call, checking if the previous turn included view_image
tool calls that have all completed. If so, it injects a human message with the image
details so the LLM can see and analyze the images.
Args:
state: Current state
runtime: Runtime context (unused but required by interface)
Returns:
State update with additional human message, or None if no update needed
"""
return self._inject_image_message(state)

View File

@@ -0,0 +1,55 @@
from typing import Annotated, NotRequired, TypedDict
from langchain.agents import AgentState
class SandboxState(TypedDict):
sandbox_id: NotRequired[str | None]
class ThreadDataState(TypedDict):
workspace_path: NotRequired[str | None]
uploads_path: NotRequired[str | None]
outputs_path: NotRequired[str | None]
class ViewedImageData(TypedDict):
base64: str
mime_type: str
def merge_artifacts(existing: list[str] | None, new: list[str] | None) -> list[str]:
"""Reducer for artifacts list - merges and deduplicates artifacts."""
if existing is None:
return new or []
if new is None:
return existing
# Use dict.fromkeys to deduplicate while preserving order
return list(dict.fromkeys(existing + new))
def merge_viewed_images(existing: dict[str, ViewedImageData] | None, new: dict[str, ViewedImageData] | None) -> dict[str, ViewedImageData]:
"""Reducer for viewed_images dict - merges image dictionaries.
Special case: If new is an empty dict {}, it clears the existing images.
This allows middlewares to clear the viewed_images state after processing.
"""
if existing is None:
return new or {}
if new is None:
return existing
# Special case: empty dict means clear all viewed images
if len(new) == 0:
return {}
# Merge dictionaries, new values override existing ones for same keys
return {**existing, **new}
class ThreadState(AgentState):
sandbox: NotRequired[SandboxState | None]
thread_data: NotRequired[ThreadDataState | None]
title: NotRequired[str | None]
artifacts: Annotated[list[str], merge_artifacts]
todos: NotRequired[list | None]
uploaded_files: NotRequired[list[dict] | None]
viewed_images: Annotated[dict[str, ViewedImageData], merge_viewed_images] # image_path -> {base64, mime_type}