Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
"""Store provider for the DeerFlow runtime.
|
||||
|
||||
Re-exports the public API of both the async provider (for long-running
|
||||
servers) and the sync provider (for CLI tools and the embedded client).
|
||||
|
||||
Async usage (FastAPI lifespan)::
|
||||
|
||||
from deerflow.runtime.store import make_store
|
||||
|
||||
async with make_store() as store:
|
||||
app.state.store = store
|
||||
|
||||
Sync usage (CLI / DeerFlowClient)::
|
||||
|
||||
from deerflow.runtime.store import get_store, store_context
|
||||
|
||||
store = get_store() # singleton
|
||||
with store_context() as store: ... # one-shot
|
||||
"""
|
||||
|
||||
from .async_provider import make_store
|
||||
from .provider import get_store, reset_store, store_context
|
||||
|
||||
__all__ = [
|
||||
# async
|
||||
"make_store",
|
||||
# sync
|
||||
"get_store",
|
||||
"reset_store",
|
||||
"store_context",
|
||||
]
|
||||
@@ -0,0 +1,28 @@
|
||||
"""Shared SQLite connection utilities for store and checkpointer providers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
|
||||
from deerflow.config.paths import resolve_path
|
||||
|
||||
|
||||
def resolve_sqlite_conn_str(raw: str) -> str:
|
||||
"""Return a SQLite connection string ready for use with store/checkpointer backends.
|
||||
|
||||
SQLite special strings (``":memory:"`` and ``file:`` URIs) are returned
|
||||
unchanged. Plain filesystem paths — relative or absolute — are resolved
|
||||
to an absolute string via :func:`resolve_path`.
|
||||
"""
|
||||
if raw == ":memory:" or raw.startswith("file:"):
|
||||
return raw
|
||||
return str(resolve_path(raw))
|
||||
|
||||
|
||||
def ensure_sqlite_parent_dir(conn_str: str) -> None:
|
||||
"""Create parent directory for a SQLite filesystem path.
|
||||
|
||||
No-op for in-memory databases (``":memory:"``) and ``file:`` URIs.
|
||||
"""
|
||||
if conn_str != ":memory:" and not conn_str.startswith("file:"):
|
||||
pathlib.Path(conn_str).parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -0,0 +1,113 @@
|
||||
"""Async Store factory — backend mirrors the configured checkpointer.
|
||||
|
||||
The store and checkpointer share the same ``checkpointer`` section in
|
||||
*config.yaml* so they always use the same persistence backend:
|
||||
|
||||
- ``type: memory`` → :class:`langgraph.store.memory.InMemoryStore`
|
||||
- ``type: sqlite`` → :class:`langgraph.store.sqlite.aio.AsyncSqliteStore`
|
||||
- ``type: postgres`` → :class:`langgraph.store.postgres.aio.AsyncPostgresStore`
|
||||
|
||||
Usage (e.g. FastAPI lifespan)::
|
||||
|
||||
from deerflow.runtime.store import make_store
|
||||
|
||||
async with make_store() as store:
|
||||
app.state.store = store
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
from langgraph.store.base import BaseStore
|
||||
|
||||
from deerflow.config.app_config import get_app_config
|
||||
from deerflow.runtime.store.provider import POSTGRES_CONN_REQUIRED, POSTGRES_STORE_INSTALL, SQLITE_STORE_INSTALL, ensure_sqlite_parent_dir, resolve_sqlite_conn_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal backend factory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def _async_store(config) -> AsyncIterator[BaseStore]:
|
||||
"""Async context manager that constructs and tears down a Store.
|
||||
|
||||
The ``config`` argument is a :class:`deerflow.config.checkpointer_config.CheckpointerConfig`
|
||||
instance — the same object used by the checkpointer factory.
|
||||
"""
|
||||
if config.type == "memory":
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
|
||||
logger.info("Store: using InMemoryStore (in-process, not persistent)")
|
||||
yield InMemoryStore()
|
||||
return
|
||||
|
||||
if config.type == "sqlite":
|
||||
try:
|
||||
from langgraph.store.sqlite.aio import AsyncSqliteStore
|
||||
except ImportError as exc:
|
||||
raise ImportError(SQLITE_STORE_INSTALL) from exc
|
||||
|
||||
conn_str = resolve_sqlite_conn_str(config.connection_string or "store.db")
|
||||
ensure_sqlite_parent_dir(conn_str)
|
||||
|
||||
async with AsyncSqliteStore.from_conn_string(conn_str) as store:
|
||||
await store.setup()
|
||||
logger.info("Store: using AsyncSqliteStore (%s)", conn_str)
|
||||
yield store
|
||||
return
|
||||
|
||||
if config.type == "postgres":
|
||||
try:
|
||||
from langgraph.store.postgres.aio import AsyncPostgresStore # type: ignore[import]
|
||||
except ImportError as exc:
|
||||
raise ImportError(POSTGRES_STORE_INSTALL) from exc
|
||||
|
||||
if not config.connection_string:
|
||||
raise ValueError(POSTGRES_CONN_REQUIRED)
|
||||
|
||||
async with AsyncPostgresStore.from_conn_string(config.connection_string) as store:
|
||||
await store.setup()
|
||||
logger.info("Store: using AsyncPostgresStore")
|
||||
yield store
|
||||
return
|
||||
|
||||
raise ValueError(f"Unknown store backend type: {config.type!r}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public async context manager
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def make_store() -> AsyncIterator[BaseStore]:
|
||||
"""Async context manager that yields a Store whose backend matches the
|
||||
configured checkpointer.
|
||||
|
||||
Reads from the same ``checkpointer`` section of *config.yaml* used by
|
||||
:func:`deerflow.agents.checkpointer.async_provider.make_checkpointer` so
|
||||
that both singletons always use the same persistence technology::
|
||||
|
||||
async with make_store() as store:
|
||||
app.state.store = store
|
||||
|
||||
Yields an :class:`~langgraph.store.memory.InMemoryStore` when no
|
||||
``checkpointer`` section is configured (emits a WARNING in that case).
|
||||
"""
|
||||
config = get_app_config()
|
||||
|
||||
if config.checkpointer is None:
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
|
||||
logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
|
||||
yield InMemoryStore()
|
||||
return
|
||||
|
||||
async with _async_store(config.checkpointer) as store:
|
||||
yield store
|
||||
@@ -0,0 +1,188 @@
|
||||
"""Sync Store factory.
|
||||
|
||||
Provides a **sync singleton** and a **sync context manager** for CLI tools
|
||||
and the embedded :class:`~deerflow.client.DeerFlowClient`.
|
||||
|
||||
The backend mirrors the configured checkpointer so that both always use the
|
||||
same persistence technology. Supported backends: memory, sqlite, postgres.
|
||||
|
||||
Usage::
|
||||
|
||||
from deerflow.runtime.store.provider import get_store, store_context
|
||||
|
||||
# Singleton — reused across calls, closed on process exit
|
||||
store = get_store()
|
||||
|
||||
# One-shot — fresh connection, closed on block exit
|
||||
with store_context() as store:
|
||||
store.put(("ns",), "key", {"value": 1})
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
from collections.abc import Iterator
|
||||
|
||||
from langgraph.store.base import BaseStore
|
||||
|
||||
from deerflow.config.app_config import get_app_config
|
||||
from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Error message constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SQLITE_STORE_INSTALL = "langgraph-checkpoint-sqlite is required for the SQLite store. Install it with: uv add langgraph-checkpoint-sqlite"
|
||||
POSTGRES_STORE_INSTALL = "langgraph-checkpoint-postgres is required for the PostgreSQL store. Install it with: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool"
|
||||
POSTGRES_CONN_REQUIRED = "checkpointer.connection_string is required for the postgres backend"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sync factory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _sync_store_cm(config) -> Iterator[BaseStore]:
|
||||
"""Context manager that creates and tears down a sync Store.
|
||||
|
||||
The ``config`` argument is a
|
||||
:class:`~deerflow.config.checkpointer_config.CheckpointerConfig` instance —
|
||||
the same object used by the checkpointer factory.
|
||||
"""
|
||||
if config.type == "memory":
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
|
||||
logger.info("Store: using InMemoryStore (in-process, not persistent)")
|
||||
yield InMemoryStore()
|
||||
return
|
||||
|
||||
if config.type == "sqlite":
|
||||
try:
|
||||
from langgraph.store.sqlite import SqliteStore
|
||||
except ImportError as exc:
|
||||
raise ImportError(SQLITE_STORE_INSTALL) from exc
|
||||
|
||||
conn_str = resolve_sqlite_conn_str(config.connection_string or "store.db")
|
||||
ensure_sqlite_parent_dir(conn_str)
|
||||
|
||||
with SqliteStore.from_conn_string(conn_str) as store:
|
||||
store.setup()
|
||||
logger.info("Store: using SqliteStore (%s)", conn_str)
|
||||
yield store
|
||||
return
|
||||
|
||||
if config.type == "postgres":
|
||||
try:
|
||||
from langgraph.store.postgres import PostgresStore # type: ignore[import]
|
||||
except ImportError as exc:
|
||||
raise ImportError(POSTGRES_STORE_INSTALL) from exc
|
||||
|
||||
if not config.connection_string:
|
||||
raise ValueError(POSTGRES_CONN_REQUIRED)
|
||||
|
||||
with PostgresStore.from_conn_string(config.connection_string) as store:
|
||||
store.setup()
|
||||
logger.info("Store: using PostgresStore")
|
||||
yield store
|
||||
return
|
||||
|
||||
raise ValueError(f"Unknown store backend type: {config.type!r}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sync singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_store: BaseStore | None = None
|
||||
_store_ctx = None # open context manager keeping the connection alive
|
||||
|
||||
|
||||
def get_store() -> BaseStore:
|
||||
"""Return the global sync Store singleton, creating it on first call.
|
||||
|
||||
Returns an :class:`~langgraph.store.memory.InMemoryStore` when no
|
||||
checkpointer is configured in *config.yaml* (emits a WARNING in that case).
|
||||
|
||||
Raises:
|
||||
ImportError: If the required package for the configured backend is not installed.
|
||||
ValueError: If ``connection_string`` is missing for a backend that requires it.
|
||||
"""
|
||||
global _store, _store_ctx
|
||||
|
||||
if _store is not None:
|
||||
return _store
|
||||
|
||||
# Lazily load app config, mirroring the checkpointer singleton pattern so
|
||||
# that tests that set the global checkpointer config explicitly remain isolated.
|
||||
from deerflow.config.app_config import _app_config
|
||||
from deerflow.config.checkpointer_config import get_checkpointer_config
|
||||
|
||||
config = get_checkpointer_config()
|
||||
|
||||
if config is None and _app_config is None:
|
||||
try:
|
||||
get_app_config()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
config = get_checkpointer_config()
|
||||
|
||||
if config is None:
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
|
||||
logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
|
||||
_store = InMemoryStore()
|
||||
return _store
|
||||
|
||||
_store_ctx = _sync_store_cm(config)
|
||||
_store = _store_ctx.__enter__()
|
||||
return _store
|
||||
|
||||
|
||||
def reset_store() -> None:
|
||||
"""Reset the sync singleton, forcing recreation on the next call.
|
||||
|
||||
Closes any open backend connections and clears the cached instance.
|
||||
Useful in tests or after a configuration change.
|
||||
"""
|
||||
global _store, _store_ctx
|
||||
if _store_ctx is not None:
|
||||
try:
|
||||
_store_ctx.__exit__(None, None, None)
|
||||
except Exception:
|
||||
logger.warning("Error during store cleanup", exc_info=True)
|
||||
_store_ctx = None
|
||||
_store = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sync context manager
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def store_context() -> Iterator[BaseStore]:
|
||||
"""Sync context manager that yields a Store and cleans up on exit.
|
||||
|
||||
Unlike :func:`get_store`, this does **not** cache the instance — each
|
||||
``with`` block creates and destroys its own connection. Use it in CLI
|
||||
scripts or tests where you want deterministic cleanup::
|
||||
|
||||
with store_context() as store:
|
||||
store.put(("threads",), thread_id, {...})
|
||||
|
||||
Yields an :class:`~langgraph.store.memory.InMemoryStore` when no
|
||||
checkpointer is configured in *config.yaml*.
|
||||
"""
|
||||
config = get_app_config()
|
||||
if config.checkpointer is None:
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
|
||||
logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
|
||||
yield InMemoryStore()
|
||||
return
|
||||
|
||||
with _sync_store_cm(config.checkpointer) as store:
|
||||
yield store
|
||||
Reference in New Issue
Block a user