Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
150 lines
4.9 KiB
Python
150 lines
4.9 KiB
Python
import os
|
|
import threading
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
_config_lock = threading.Lock()
|
|
|
|
|
|
class LangSmithTracingConfig(BaseModel):
|
|
"""Configuration for LangSmith tracing."""
|
|
|
|
enabled: bool = Field(...)
|
|
api_key: str | None = Field(...)
|
|
project: str = Field(...)
|
|
endpoint: str = Field(...)
|
|
|
|
@property
|
|
def is_configured(self) -> bool:
|
|
return self.enabled and bool(self.api_key)
|
|
|
|
def validate(self) -> None:
|
|
if self.enabled and not self.api_key:
|
|
raise ValueError("LangSmith tracing is enabled but LANGSMITH_API_KEY (or LANGCHAIN_API_KEY) is not set.")
|
|
|
|
|
|
class LangfuseTracingConfig(BaseModel):
|
|
"""Configuration for Langfuse tracing."""
|
|
|
|
enabled: bool = Field(...)
|
|
public_key: str | None = Field(...)
|
|
secret_key: str | None = Field(...)
|
|
host: str = Field(...)
|
|
|
|
@property
|
|
def is_configured(self) -> bool:
|
|
return self.enabled and bool(self.public_key) and bool(self.secret_key)
|
|
|
|
def validate(self) -> None:
|
|
if not self.enabled:
|
|
return
|
|
missing: list[str] = []
|
|
if not self.public_key:
|
|
missing.append("LANGFUSE_PUBLIC_KEY")
|
|
if not self.secret_key:
|
|
missing.append("LANGFUSE_SECRET_KEY")
|
|
if missing:
|
|
raise ValueError(f"Langfuse tracing is enabled but required settings are missing: {', '.join(missing)}")
|
|
|
|
|
|
class TracingConfig(BaseModel):
|
|
"""Tracing configuration for supported providers."""
|
|
|
|
langsmith: LangSmithTracingConfig = Field(...)
|
|
langfuse: LangfuseTracingConfig = Field(...)
|
|
|
|
@property
|
|
def is_configured(self) -> bool:
|
|
return bool(self.enabled_providers)
|
|
|
|
@property
|
|
def explicitly_enabled_providers(self) -> list[str]:
|
|
enabled: list[str] = []
|
|
if self.langsmith.enabled:
|
|
enabled.append("langsmith")
|
|
if self.langfuse.enabled:
|
|
enabled.append("langfuse")
|
|
return enabled
|
|
|
|
@property
|
|
def enabled_providers(self) -> list[str]:
|
|
enabled: list[str] = []
|
|
if self.langsmith.is_configured:
|
|
enabled.append("langsmith")
|
|
if self.langfuse.is_configured:
|
|
enabled.append("langfuse")
|
|
return enabled
|
|
|
|
def validate_enabled(self) -> None:
|
|
self.langsmith.validate()
|
|
self.langfuse.validate()
|
|
|
|
|
|
_tracing_config: TracingConfig | None = None
|
|
|
|
|
|
_TRUTHY_VALUES = {"1", "true", "yes", "on"}
|
|
|
|
|
|
def _env_flag_preferred(*names: str) -> bool:
|
|
"""Return the boolean value of the first env var that is present and non-empty."""
|
|
for name in names:
|
|
value = os.environ.get(name)
|
|
if value is not None and value.strip():
|
|
return value.strip().lower() in _TRUTHY_VALUES
|
|
return False
|
|
|
|
|
|
def _first_env_value(*names: str) -> str | None:
|
|
"""Return the first non-empty environment value from candidate names."""
|
|
for name in names:
|
|
value = os.environ.get(name)
|
|
if value and value.strip():
|
|
return value.strip()
|
|
return None
|
|
|
|
|
|
def get_tracing_config() -> TracingConfig:
|
|
"""Get the current tracing configuration from environment variables."""
|
|
global _tracing_config
|
|
if _tracing_config is not None:
|
|
return _tracing_config
|
|
with _config_lock:
|
|
if _tracing_config is not None:
|
|
return _tracing_config
|
|
_tracing_config = TracingConfig(
|
|
langsmith=LangSmithTracingConfig(
|
|
enabled=_env_flag_preferred("LANGSMITH_TRACING", "LANGCHAIN_TRACING_V2", "LANGCHAIN_TRACING"),
|
|
api_key=_first_env_value("LANGSMITH_API_KEY", "LANGCHAIN_API_KEY"),
|
|
project=_first_env_value("LANGSMITH_PROJECT", "LANGCHAIN_PROJECT") or "deer-flow",
|
|
endpoint=_first_env_value("LANGSMITH_ENDPOINT", "LANGCHAIN_ENDPOINT") or "https://api.smith.langchain.com",
|
|
),
|
|
langfuse=LangfuseTracingConfig(
|
|
enabled=_env_flag_preferred("LANGFUSE_TRACING"),
|
|
public_key=_first_env_value("LANGFUSE_PUBLIC_KEY"),
|
|
secret_key=_first_env_value("LANGFUSE_SECRET_KEY"),
|
|
host=_first_env_value("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com",
|
|
),
|
|
)
|
|
return _tracing_config
|
|
|
|
|
|
def get_enabled_tracing_providers() -> list[str]:
|
|
"""Return the configured tracing providers that are enabled and complete."""
|
|
return get_tracing_config().enabled_providers
|
|
|
|
|
|
def get_explicitly_enabled_tracing_providers() -> list[str]:
|
|
"""Return tracing providers explicitly enabled by config, even if incomplete."""
|
|
return get_tracing_config().explicitly_enabled_providers
|
|
|
|
|
|
def validate_enabled_tracing_providers() -> None:
|
|
"""Validate that any explicitly enabled providers are fully configured."""
|
|
get_tracing_config().validate_enabled()
|
|
|
|
|
|
def is_tracing_enabled() -> bool:
|
|
"""Check if any tracing provider is enabled and fully configured."""
|
|
return get_tracing_config().is_configured
|