Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
353
deer-flow/backend/app/gateway/routers/memory.py
Normal file
353
deer-flow/backend/app/gateway/routers/memory.py
Normal file
@@ -0,0 +1,353 @@
|
||||
"""Memory API router for retrieving and managing global memory data."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from deerflow.agents.memory.updater import (
|
||||
clear_memory_data,
|
||||
create_memory_fact,
|
||||
delete_memory_fact,
|
||||
get_memory_data,
|
||||
import_memory_data,
|
||||
reload_memory_data,
|
||||
update_memory_fact,
|
||||
)
|
||||
from deerflow.config.memory_config import get_memory_config
|
||||
|
||||
router = APIRouter(prefix="/api", tags=["memory"])
|
||||
|
||||
|
||||
class ContextSection(BaseModel):
|
||||
"""Model for context sections (user and history)."""
|
||||
|
||||
summary: str = Field(default="", description="Summary content")
|
||||
updatedAt: str = Field(default="", description="Last update timestamp")
|
||||
|
||||
|
||||
class UserContext(BaseModel):
|
||||
"""Model for user context."""
|
||||
|
||||
workContext: ContextSection = Field(default_factory=ContextSection)
|
||||
personalContext: ContextSection = Field(default_factory=ContextSection)
|
||||
topOfMind: ContextSection = Field(default_factory=ContextSection)
|
||||
|
||||
|
||||
class HistoryContext(BaseModel):
|
||||
"""Model for history context."""
|
||||
|
||||
recentMonths: ContextSection = Field(default_factory=ContextSection)
|
||||
earlierContext: ContextSection = Field(default_factory=ContextSection)
|
||||
longTermBackground: ContextSection = Field(default_factory=ContextSection)
|
||||
|
||||
|
||||
class Fact(BaseModel):
|
||||
"""Model for a memory fact."""
|
||||
|
||||
id: str = Field(..., description="Unique identifier for the fact")
|
||||
content: str = Field(..., description="Fact content")
|
||||
category: str = Field(default="context", description="Fact category")
|
||||
confidence: float = Field(default=0.5, description="Confidence score (0-1)")
|
||||
createdAt: str = Field(default="", description="Creation timestamp")
|
||||
source: str = Field(default="unknown", description="Source thread ID")
|
||||
sourceError: str | None = Field(default=None, description="Optional description of the prior mistake or wrong approach")
|
||||
|
||||
|
||||
class MemoryResponse(BaseModel):
|
||||
"""Response model for memory data."""
|
||||
|
||||
version: str = Field(default="1.0", description="Memory schema version")
|
||||
lastUpdated: str = Field(default="", description="Last update timestamp")
|
||||
user: UserContext = Field(default_factory=UserContext)
|
||||
history: HistoryContext = Field(default_factory=HistoryContext)
|
||||
facts: list[Fact] = Field(default_factory=list)
|
||||
|
||||
|
||||
def _map_memory_fact_value_error(exc: ValueError) -> HTTPException:
|
||||
"""Convert updater validation errors into stable API responses."""
|
||||
if exc.args and exc.args[0] == "confidence":
|
||||
detail = "Invalid confidence value; must be between 0 and 1."
|
||||
else:
|
||||
detail = "Memory fact content cannot be empty."
|
||||
return HTTPException(status_code=400, detail=detail)
|
||||
|
||||
|
||||
class FactCreateRequest(BaseModel):
|
||||
"""Request model for creating a memory fact."""
|
||||
|
||||
content: str = Field(..., min_length=1, description="Fact content")
|
||||
category: str = Field(default="context", description="Fact category")
|
||||
confidence: float = Field(default=0.5, ge=0.0, le=1.0, description="Confidence score (0-1)")
|
||||
|
||||
|
||||
class FactPatchRequest(BaseModel):
|
||||
"""PATCH request model that preserves existing values for omitted fields."""
|
||||
|
||||
content: str | None = Field(default=None, min_length=1, description="Fact content")
|
||||
category: str | None = Field(default=None, description="Fact category")
|
||||
confidence: float | None = Field(default=None, ge=0.0, le=1.0, description="Confidence score (0-1)")
|
||||
|
||||
|
||||
class MemoryConfigResponse(BaseModel):
|
||||
"""Response model for memory configuration."""
|
||||
|
||||
enabled: bool = Field(..., description="Whether memory is enabled")
|
||||
storage_path: str = Field(..., description="Path to memory storage file")
|
||||
debounce_seconds: int = Field(..., description="Debounce time for memory updates")
|
||||
max_facts: int = Field(..., description="Maximum number of facts to store")
|
||||
fact_confidence_threshold: float = Field(..., description="Minimum confidence threshold for facts")
|
||||
injection_enabled: bool = Field(..., description="Whether memory injection is enabled")
|
||||
max_injection_tokens: int = Field(..., description="Maximum tokens for memory injection")
|
||||
|
||||
|
||||
class MemoryStatusResponse(BaseModel):
|
||||
"""Response model for memory status."""
|
||||
|
||||
config: MemoryConfigResponse
|
||||
data: MemoryResponse
|
||||
|
||||
|
||||
@router.get(
|
||||
"/memory",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Get Memory Data",
|
||||
description="Retrieve the current global memory data including user context, history, and facts.",
|
||||
)
|
||||
async def get_memory() -> MemoryResponse:
|
||||
"""Get the current global memory data.
|
||||
|
||||
Returns:
|
||||
The current memory data with user context, history, and facts.
|
||||
|
||||
Example Response:
|
||||
```json
|
||||
{
|
||||
"version": "1.0",
|
||||
"lastUpdated": "2024-01-15T10:30:00Z",
|
||||
"user": {
|
||||
"workContext": {"summary": "Working on DeerFlow project", "updatedAt": "..."},
|
||||
"personalContext": {"summary": "Prefers concise responses", "updatedAt": "..."},
|
||||
"topOfMind": {"summary": "Building memory API", "updatedAt": "..."}
|
||||
},
|
||||
"history": {
|
||||
"recentMonths": {"summary": "Recent development activities", "updatedAt": "..."},
|
||||
"earlierContext": {"summary": "", "updatedAt": ""},
|
||||
"longTermBackground": {"summary": "", "updatedAt": ""}
|
||||
},
|
||||
"facts": [
|
||||
{
|
||||
"id": "fact_abc123",
|
||||
"content": "User prefers TypeScript over JavaScript",
|
||||
"category": "preference",
|
||||
"confidence": 0.9,
|
||||
"createdAt": "2024-01-15T10:30:00Z",
|
||||
"source": "thread_xyz"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
"""
|
||||
memory_data = get_memory_data()
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/memory/reload",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Reload Memory Data",
|
||||
description="Reload memory data from the storage file, refreshing the in-memory cache.",
|
||||
)
|
||||
async def reload_memory() -> MemoryResponse:
|
||||
"""Reload memory data from file.
|
||||
|
||||
This forces a reload of the memory data from the storage file,
|
||||
useful when the file has been modified externally.
|
||||
|
||||
Returns:
|
||||
The reloaded memory data.
|
||||
"""
|
||||
memory_data = reload_memory_data()
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/memory",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Clear All Memory Data",
|
||||
description="Delete all saved memory data and reset the memory structure to an empty state.",
|
||||
)
|
||||
async def clear_memory() -> MemoryResponse:
|
||||
"""Clear all persisted memory data."""
|
||||
try:
|
||||
memory_data = clear_memory_data()
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to clear memory data.") from exc
|
||||
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/memory/facts",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Create Memory Fact",
|
||||
description="Create a single saved memory fact manually.",
|
||||
)
|
||||
async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryResponse:
|
||||
"""Create a single fact manually."""
|
||||
try:
|
||||
memory_data = create_memory_fact(
|
||||
content=request.content,
|
||||
category=request.category,
|
||||
confidence=request.confidence,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise _map_memory_fact_value_error(exc) from exc
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to create memory fact.") from exc
|
||||
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/memory/facts/{fact_id}",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Delete Memory Fact",
|
||||
description="Delete a single saved memory fact by its fact id.",
|
||||
)
|
||||
async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
|
||||
"""Delete a single fact from memory by fact id."""
|
||||
try:
|
||||
memory_data = delete_memory_fact(fact_id)
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to delete memory fact.") from exc
|
||||
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/memory/facts/{fact_id}",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Patch Memory Fact",
|
||||
description="Partially update a single saved memory fact by its fact id while preserving omitted fields.",
|
||||
)
|
||||
async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -> MemoryResponse:
|
||||
"""Partially update a single fact manually."""
|
||||
try:
|
||||
memory_data = update_memory_fact(
|
||||
fact_id=fact_id,
|
||||
content=request.content,
|
||||
category=request.category,
|
||||
confidence=request.confidence,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise _map_memory_fact_value_error(exc) from exc
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to update memory fact.") from exc
|
||||
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/memory/export",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Export Memory Data",
|
||||
description="Export the current global memory data as JSON for backup or transfer.",
|
||||
)
|
||||
async def export_memory() -> MemoryResponse:
|
||||
"""Export the current memory data."""
|
||||
memory_data = get_memory_data()
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/memory/import",
|
||||
response_model=MemoryResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Import Memory Data",
|
||||
description="Import and overwrite the current global memory data from a JSON payload.",
|
||||
)
|
||||
async def import_memory(request: MemoryResponse) -> MemoryResponse:
|
||||
"""Import and persist memory data."""
|
||||
try:
|
||||
memory_data = import_memory_data(request.model_dump())
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to import memory data.") from exc
|
||||
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/memory/config",
|
||||
response_model=MemoryConfigResponse,
|
||||
summary="Get Memory Configuration",
|
||||
description="Retrieve the current memory system configuration.",
|
||||
)
|
||||
async def get_memory_config_endpoint() -> MemoryConfigResponse:
|
||||
"""Get the memory system configuration.
|
||||
|
||||
Returns:
|
||||
The current memory configuration settings.
|
||||
|
||||
Example Response:
|
||||
```json
|
||||
{
|
||||
"enabled": true,
|
||||
"storage_path": ".deer-flow/memory.json",
|
||||
"debounce_seconds": 30,
|
||||
"max_facts": 100,
|
||||
"fact_confidence_threshold": 0.7,
|
||||
"injection_enabled": true,
|
||||
"max_injection_tokens": 2000
|
||||
}
|
||||
```
|
||||
"""
|
||||
config = get_memory_config()
|
||||
return MemoryConfigResponse(
|
||||
enabled=config.enabled,
|
||||
storage_path=config.storage_path,
|
||||
debounce_seconds=config.debounce_seconds,
|
||||
max_facts=config.max_facts,
|
||||
fact_confidence_threshold=config.fact_confidence_threshold,
|
||||
injection_enabled=config.injection_enabled,
|
||||
max_injection_tokens=config.max_injection_tokens,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/memory/status",
|
||||
response_model=MemoryStatusResponse,
|
||||
response_model_exclude_none=True,
|
||||
summary="Get Memory Status",
|
||||
description="Retrieve both memory configuration and current data in a single request.",
|
||||
)
|
||||
async def get_memory_status() -> MemoryStatusResponse:
|
||||
"""Get the memory system status including configuration and data.
|
||||
|
||||
Returns:
|
||||
Combined memory configuration and current data.
|
||||
"""
|
||||
config = get_memory_config()
|
||||
memory_data = get_memory_data()
|
||||
|
||||
return MemoryStatusResponse(
|
||||
config=MemoryConfigResponse(
|
||||
enabled=config.enabled,
|
||||
storage_path=config.storage_path,
|
||||
debounce_seconds=config.debounce_seconds,
|
||||
max_facts=config.max_facts,
|
||||
fact_confidence_threshold=config.fact_confidence_threshold,
|
||||
injection_enabled=config.injection_enabled,
|
||||
max_injection_tokens=config.max_injection_tokens,
|
||||
),
|
||||
data=MemoryResponse(**memory_data),
|
||||
)
|
||||
Reference in New Issue
Block a user