Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
354 lines
12 KiB
Python
354 lines
12 KiB
Python
"""Memory API router for retrieving and managing global memory data."""
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
from pydantic import BaseModel, Field
|
|
|
|
from deerflow.agents.memory.updater import (
|
|
clear_memory_data,
|
|
create_memory_fact,
|
|
delete_memory_fact,
|
|
get_memory_data,
|
|
import_memory_data,
|
|
reload_memory_data,
|
|
update_memory_fact,
|
|
)
|
|
from deerflow.config.memory_config import get_memory_config
|
|
|
|
router = APIRouter(prefix="/api", tags=["memory"])
|
|
|
|
|
|
class ContextSection(BaseModel):
|
|
"""Model for context sections (user and history)."""
|
|
|
|
summary: str = Field(default="", description="Summary content")
|
|
updatedAt: str = Field(default="", description="Last update timestamp")
|
|
|
|
|
|
class UserContext(BaseModel):
|
|
"""Model for user context."""
|
|
|
|
workContext: ContextSection = Field(default_factory=ContextSection)
|
|
personalContext: ContextSection = Field(default_factory=ContextSection)
|
|
topOfMind: ContextSection = Field(default_factory=ContextSection)
|
|
|
|
|
|
class HistoryContext(BaseModel):
|
|
"""Model for history context."""
|
|
|
|
recentMonths: ContextSection = Field(default_factory=ContextSection)
|
|
earlierContext: ContextSection = Field(default_factory=ContextSection)
|
|
longTermBackground: ContextSection = Field(default_factory=ContextSection)
|
|
|
|
|
|
class Fact(BaseModel):
|
|
"""Model for a memory fact."""
|
|
|
|
id: str = Field(..., description="Unique identifier for the fact")
|
|
content: str = Field(..., description="Fact content")
|
|
category: str = Field(default="context", description="Fact category")
|
|
confidence: float = Field(default=0.5, description="Confidence score (0-1)")
|
|
createdAt: str = Field(default="", description="Creation timestamp")
|
|
source: str = Field(default="unknown", description="Source thread ID")
|
|
sourceError: str | None = Field(default=None, description="Optional description of the prior mistake or wrong approach")
|
|
|
|
|
|
class MemoryResponse(BaseModel):
|
|
"""Response model for memory data."""
|
|
|
|
version: str = Field(default="1.0", description="Memory schema version")
|
|
lastUpdated: str = Field(default="", description="Last update timestamp")
|
|
user: UserContext = Field(default_factory=UserContext)
|
|
history: HistoryContext = Field(default_factory=HistoryContext)
|
|
facts: list[Fact] = Field(default_factory=list)
|
|
|
|
|
|
def _map_memory_fact_value_error(exc: ValueError) -> HTTPException:
|
|
"""Convert updater validation errors into stable API responses."""
|
|
if exc.args and exc.args[0] == "confidence":
|
|
detail = "Invalid confidence value; must be between 0 and 1."
|
|
else:
|
|
detail = "Memory fact content cannot be empty."
|
|
return HTTPException(status_code=400, detail=detail)
|
|
|
|
|
|
class FactCreateRequest(BaseModel):
|
|
"""Request model for creating a memory fact."""
|
|
|
|
content: str = Field(..., min_length=1, description="Fact content")
|
|
category: str = Field(default="context", description="Fact category")
|
|
confidence: float = Field(default=0.5, ge=0.0, le=1.0, description="Confidence score (0-1)")
|
|
|
|
|
|
class FactPatchRequest(BaseModel):
|
|
"""PATCH request model that preserves existing values for omitted fields."""
|
|
|
|
content: str | None = Field(default=None, min_length=1, description="Fact content")
|
|
category: str | None = Field(default=None, description="Fact category")
|
|
confidence: float | None = Field(default=None, ge=0.0, le=1.0, description="Confidence score (0-1)")
|
|
|
|
|
|
class MemoryConfigResponse(BaseModel):
|
|
"""Response model for memory configuration."""
|
|
|
|
enabled: bool = Field(..., description="Whether memory is enabled")
|
|
storage_path: str = Field(..., description="Path to memory storage file")
|
|
debounce_seconds: int = Field(..., description="Debounce time for memory updates")
|
|
max_facts: int = Field(..., description="Maximum number of facts to store")
|
|
fact_confidence_threshold: float = Field(..., description="Minimum confidence threshold for facts")
|
|
injection_enabled: bool = Field(..., description="Whether memory injection is enabled")
|
|
max_injection_tokens: int = Field(..., description="Maximum tokens for memory injection")
|
|
|
|
|
|
class MemoryStatusResponse(BaseModel):
|
|
"""Response model for memory status."""
|
|
|
|
config: MemoryConfigResponse
|
|
data: MemoryResponse
|
|
|
|
|
|
@router.get(
|
|
"/memory",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Get Memory Data",
|
|
description="Retrieve the current global memory data including user context, history, and facts.",
|
|
)
|
|
async def get_memory() -> MemoryResponse:
|
|
"""Get the current global memory data.
|
|
|
|
Returns:
|
|
The current memory data with user context, history, and facts.
|
|
|
|
Example Response:
|
|
```json
|
|
{
|
|
"version": "1.0",
|
|
"lastUpdated": "2024-01-15T10:30:00Z",
|
|
"user": {
|
|
"workContext": {"summary": "Working on DeerFlow project", "updatedAt": "..."},
|
|
"personalContext": {"summary": "Prefers concise responses", "updatedAt": "..."},
|
|
"topOfMind": {"summary": "Building memory API", "updatedAt": "..."}
|
|
},
|
|
"history": {
|
|
"recentMonths": {"summary": "Recent development activities", "updatedAt": "..."},
|
|
"earlierContext": {"summary": "", "updatedAt": ""},
|
|
"longTermBackground": {"summary": "", "updatedAt": ""}
|
|
},
|
|
"facts": [
|
|
{
|
|
"id": "fact_abc123",
|
|
"content": "User prefers TypeScript over JavaScript",
|
|
"category": "preference",
|
|
"confidence": 0.9,
|
|
"createdAt": "2024-01-15T10:30:00Z",
|
|
"source": "thread_xyz"
|
|
}
|
|
]
|
|
}
|
|
```
|
|
"""
|
|
memory_data = get_memory_data()
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.post(
|
|
"/memory/reload",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Reload Memory Data",
|
|
description="Reload memory data from the storage file, refreshing the in-memory cache.",
|
|
)
|
|
async def reload_memory() -> MemoryResponse:
|
|
"""Reload memory data from file.
|
|
|
|
This forces a reload of the memory data from the storage file,
|
|
useful when the file has been modified externally.
|
|
|
|
Returns:
|
|
The reloaded memory data.
|
|
"""
|
|
memory_data = reload_memory_data()
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.delete(
|
|
"/memory",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Clear All Memory Data",
|
|
description="Delete all saved memory data and reset the memory structure to an empty state.",
|
|
)
|
|
async def clear_memory() -> MemoryResponse:
|
|
"""Clear all persisted memory data."""
|
|
try:
|
|
memory_data = clear_memory_data()
|
|
except OSError as exc:
|
|
raise HTTPException(status_code=500, detail="Failed to clear memory data.") from exc
|
|
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.post(
|
|
"/memory/facts",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Create Memory Fact",
|
|
description="Create a single saved memory fact manually.",
|
|
)
|
|
async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryResponse:
|
|
"""Create a single fact manually."""
|
|
try:
|
|
memory_data = create_memory_fact(
|
|
content=request.content,
|
|
category=request.category,
|
|
confidence=request.confidence,
|
|
)
|
|
except ValueError as exc:
|
|
raise _map_memory_fact_value_error(exc) from exc
|
|
except OSError as exc:
|
|
raise HTTPException(status_code=500, detail="Failed to create memory fact.") from exc
|
|
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.delete(
|
|
"/memory/facts/{fact_id}",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Delete Memory Fact",
|
|
description="Delete a single saved memory fact by its fact id.",
|
|
)
|
|
async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
|
|
"""Delete a single fact from memory by fact id."""
|
|
try:
|
|
memory_data = delete_memory_fact(fact_id)
|
|
except KeyError as exc:
|
|
raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
|
|
except OSError as exc:
|
|
raise HTTPException(status_code=500, detail="Failed to delete memory fact.") from exc
|
|
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.patch(
|
|
"/memory/facts/{fact_id}",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Patch Memory Fact",
|
|
description="Partially update a single saved memory fact by its fact id while preserving omitted fields.",
|
|
)
|
|
async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -> MemoryResponse:
|
|
"""Partially update a single fact manually."""
|
|
try:
|
|
memory_data = update_memory_fact(
|
|
fact_id=fact_id,
|
|
content=request.content,
|
|
category=request.category,
|
|
confidence=request.confidence,
|
|
)
|
|
except ValueError as exc:
|
|
raise _map_memory_fact_value_error(exc) from exc
|
|
except KeyError as exc:
|
|
raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
|
|
except OSError as exc:
|
|
raise HTTPException(status_code=500, detail="Failed to update memory fact.") from exc
|
|
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.get(
|
|
"/memory/export",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Export Memory Data",
|
|
description="Export the current global memory data as JSON for backup or transfer.",
|
|
)
|
|
async def export_memory() -> MemoryResponse:
|
|
"""Export the current memory data."""
|
|
memory_data = get_memory_data()
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.post(
|
|
"/memory/import",
|
|
response_model=MemoryResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Import Memory Data",
|
|
description="Import and overwrite the current global memory data from a JSON payload.",
|
|
)
|
|
async def import_memory(request: MemoryResponse) -> MemoryResponse:
|
|
"""Import and persist memory data."""
|
|
try:
|
|
memory_data = import_memory_data(request.model_dump())
|
|
except OSError as exc:
|
|
raise HTTPException(status_code=500, detail="Failed to import memory data.") from exc
|
|
|
|
return MemoryResponse(**memory_data)
|
|
|
|
|
|
@router.get(
|
|
"/memory/config",
|
|
response_model=MemoryConfigResponse,
|
|
summary="Get Memory Configuration",
|
|
description="Retrieve the current memory system configuration.",
|
|
)
|
|
async def get_memory_config_endpoint() -> MemoryConfigResponse:
|
|
"""Get the memory system configuration.
|
|
|
|
Returns:
|
|
The current memory configuration settings.
|
|
|
|
Example Response:
|
|
```json
|
|
{
|
|
"enabled": true,
|
|
"storage_path": ".deer-flow/memory.json",
|
|
"debounce_seconds": 30,
|
|
"max_facts": 100,
|
|
"fact_confidence_threshold": 0.7,
|
|
"injection_enabled": true,
|
|
"max_injection_tokens": 2000
|
|
}
|
|
```
|
|
"""
|
|
config = get_memory_config()
|
|
return MemoryConfigResponse(
|
|
enabled=config.enabled,
|
|
storage_path=config.storage_path,
|
|
debounce_seconds=config.debounce_seconds,
|
|
max_facts=config.max_facts,
|
|
fact_confidence_threshold=config.fact_confidence_threshold,
|
|
injection_enabled=config.injection_enabled,
|
|
max_injection_tokens=config.max_injection_tokens,
|
|
)
|
|
|
|
|
|
@router.get(
|
|
"/memory/status",
|
|
response_model=MemoryStatusResponse,
|
|
response_model_exclude_none=True,
|
|
summary="Get Memory Status",
|
|
description="Retrieve both memory configuration and current data in a single request.",
|
|
)
|
|
async def get_memory_status() -> MemoryStatusResponse:
|
|
"""Get the memory system status including configuration and data.
|
|
|
|
Returns:
|
|
Combined memory configuration and current data.
|
|
"""
|
|
config = get_memory_config()
|
|
memory_data = get_memory_data()
|
|
|
|
return MemoryStatusResponse(
|
|
config=MemoryConfigResponse(
|
|
enabled=config.enabled,
|
|
storage_path=config.storage_path,
|
|
debounce_seconds=config.debounce_seconds,
|
|
max_facts=config.max_facts,
|
|
fact_confidence_threshold=config.fact_confidence_threshold,
|
|
injection_enabled=config.injection_enabled,
|
|
max_injection_tokens=config.max_injection_tokens,
|
|
),
|
|
data=MemoryResponse(**memory_data),
|
|
)
|