Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
74 lines
3.2 KiB
Python
74 lines
3.2 KiB
Python
"""Patched ChatDeepSeek that preserves reasoning_content in multi-turn conversations.
|
|
|
|
This module provides a patched version of ChatDeepSeek that properly handles
|
|
reasoning_content when sending messages back to the API. The original implementation
|
|
stores reasoning_content in additional_kwargs but doesn't include it when making
|
|
subsequent API calls, which causes errors with APIs that require reasoning_content
|
|
on all assistant messages when thinking mode is enabled.
|
|
"""
|
|
|
|
from typing import Any
|
|
|
|
from langchain_core.language_models import LanguageModelInput
|
|
from langchain_core.messages import AIMessage
|
|
from langchain_deepseek import ChatDeepSeek
|
|
|
|
|
|
class PatchedChatDeepSeek(ChatDeepSeek):
|
|
"""ChatDeepSeek with proper reasoning_content preservation.
|
|
|
|
When using thinking/reasoning enabled models, the API expects reasoning_content
|
|
to be present on ALL assistant messages in multi-turn conversations. This patched
|
|
version ensures reasoning_content from additional_kwargs is included in the
|
|
request payload.
|
|
"""
|
|
|
|
@classmethod
|
|
def is_lc_serializable(cls) -> bool:
|
|
return True
|
|
|
|
@property
|
|
def lc_secrets(self) -> dict[str, str]:
|
|
return {"api_key": "DEEPSEEK_API_KEY", "openai_api_key": "DEEPSEEK_API_KEY"}
|
|
|
|
def _get_request_payload(
|
|
self,
|
|
input_: LanguageModelInput,
|
|
*,
|
|
stop: list[str] | None = None,
|
|
**kwargs: Any,
|
|
) -> dict:
|
|
"""Get request payload with reasoning_content preserved.
|
|
|
|
Overrides the parent method to inject reasoning_content from
|
|
additional_kwargs into assistant messages in the payload.
|
|
"""
|
|
# Get the original messages before conversion
|
|
original_messages = self._convert_input(input_).to_messages()
|
|
|
|
# Call parent to get the base payload
|
|
payload = super()._get_request_payload(input_, stop=stop, **kwargs)
|
|
|
|
# Match payload messages with original messages to restore reasoning_content
|
|
payload_messages = payload.get("messages", [])
|
|
|
|
# The payload messages and original messages should be in the same order
|
|
# Iterate through both and match by position
|
|
if len(payload_messages) == len(original_messages):
|
|
for payload_msg, orig_msg in zip(payload_messages, original_messages):
|
|
if payload_msg.get("role") == "assistant" and isinstance(orig_msg, AIMessage):
|
|
reasoning_content = orig_msg.additional_kwargs.get("reasoning_content")
|
|
if reasoning_content is not None:
|
|
payload_msg["reasoning_content"] = reasoning_content
|
|
else:
|
|
# Fallback: match by counting assistant messages
|
|
ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
|
|
assistant_payloads = [(i, m) for i, m in enumerate(payload_messages) if m.get("role") == "assistant"]
|
|
|
|
for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages):
|
|
reasoning_content = ai_msg.additional_kwargs.get("reasoning_content")
|
|
if reasoning_content is not None:
|
|
payload_messages[idx]["reasoning_content"] = reasoning_content
|
|
|
|
return payload
|