Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
176
deer-flow/backend/tests/test_patched_openai.py
Normal file
176
deer-flow/backend/tests/test_patched_openai.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Tests for deerflow.models.patched_openai.PatchedChatOpenAI.
|
||||
|
||||
These tests verify that _restore_tool_call_signatures correctly re-injects
|
||||
``thought_signature`` onto tool-call objects stored in
|
||||
``additional_kwargs["tool_calls"]``, covering id-based matching, positional
|
||||
fallback, camelCase keys, and several edge-cases.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from deerflow.models.patched_openai import _restore_tool_call_signatures
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RAW_TC_SIGNED = {
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "web_fetch", "arguments": '{"url":"http://example.com"}'},
|
||||
"thought_signature": "SIG_A==",
|
||||
}
|
||||
|
||||
RAW_TC_UNSIGNED = {
|
||||
"id": "call_2",
|
||||
"type": "function",
|
||||
"function": {"name": "bash", "arguments": '{"cmd":"ls"}'},
|
||||
}
|
||||
|
||||
PAYLOAD_TC_1 = {
|
||||
"type": "function",
|
||||
"id": "call_1",
|
||||
"function": {"name": "web_fetch", "arguments": '{"url":"http://example.com"}'},
|
||||
}
|
||||
|
||||
PAYLOAD_TC_2 = {
|
||||
"type": "function",
|
||||
"id": "call_2",
|
||||
"function": {"name": "bash", "arguments": '{"cmd":"ls"}'},
|
||||
}
|
||||
|
||||
|
||||
def _ai_msg_with_raw_tool_calls(raw_tool_calls: list[dict]) -> AIMessage:
|
||||
return AIMessage(content="", additional_kwargs={"tool_calls": raw_tool_calls})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core: signed tool-call restoration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_tool_call_signature_restored_by_id():
|
||||
"""thought_signature is copied to the payload tool-call matched by id."""
|
||||
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]}
|
||||
orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_A=="
|
||||
|
||||
|
||||
def test_tool_call_signature_for_parallel_calls():
|
||||
"""For parallel function calls, only the first has a signature (per Gemini spec)."""
|
||||
payload_msg = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [PAYLOAD_TC_1.copy(), PAYLOAD_TC_2.copy()],
|
||||
}
|
||||
orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED, RAW_TC_UNSIGNED])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_A=="
|
||||
assert "thought_signature" not in payload_msg["tool_calls"][1]
|
||||
|
||||
|
||||
def test_tool_call_signature_camel_case():
|
||||
"""thoughtSignature (camelCase) from some gateways is also handled."""
|
||||
raw_camel = {
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "web_fetch", "arguments": "{}"},
|
||||
"thoughtSignature": "SIG_CAMEL==",
|
||||
}
|
||||
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]}
|
||||
orig = _ai_msg_with_raw_tool_calls([raw_camel])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_CAMEL=="
|
||||
|
||||
|
||||
def test_tool_call_signature_positional_fallback():
|
||||
"""When ids don't match, falls back to positional matching."""
|
||||
raw_no_id = {
|
||||
"type": "function",
|
||||
"function": {"name": "web_fetch", "arguments": "{}"},
|
||||
"thought_signature": "SIG_POS==",
|
||||
}
|
||||
payload_tc = {
|
||||
"type": "function",
|
||||
"id": "call_99",
|
||||
"function": {"name": "web_fetch", "arguments": "{}"},
|
||||
}
|
||||
payload_msg = {"role": "assistant", "content": None, "tool_calls": [payload_tc]}
|
||||
orig = _ai_msg_with_raw_tool_calls([raw_no_id])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert payload_tc["thought_signature"] == "SIG_POS=="
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge cases: no-op scenarios for tool-call signatures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_tool_call_no_raw_tool_calls_is_noop():
|
||||
"""No change when additional_kwargs has no tool_calls."""
|
||||
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]}
|
||||
orig = AIMessage(content="", additional_kwargs={})
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert "thought_signature" not in payload_msg["tool_calls"][0]
|
||||
|
||||
|
||||
def test_tool_call_no_payload_tool_calls_is_noop():
|
||||
"""No change when payload has no tool_calls."""
|
||||
payload_msg = {"role": "assistant", "content": "just text"}
|
||||
orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert "tool_calls" not in payload_msg
|
||||
|
||||
|
||||
def test_tool_call_unsigned_raw_entries_is_noop():
|
||||
"""No signature added when raw tool-calls have no thought_signature."""
|
||||
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_2.copy()]}
|
||||
orig = _ai_msg_with_raw_tool_calls([RAW_TC_UNSIGNED])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert "thought_signature" not in payload_msg["tool_calls"][0]
|
||||
|
||||
|
||||
def test_tool_call_multiple_sequential_signatures():
|
||||
"""Sequential tool calls each carry their own signature."""
|
||||
raw_tc_a = {
|
||||
"id": "call_a",
|
||||
"type": "function",
|
||||
"function": {"name": "check_flight", "arguments": "{}"},
|
||||
"thought_signature": "SIG_STEP1==",
|
||||
}
|
||||
raw_tc_b = {
|
||||
"id": "call_b",
|
||||
"type": "function",
|
||||
"function": {"name": "book_taxi", "arguments": "{}"},
|
||||
"thought_signature": "SIG_STEP2==",
|
||||
}
|
||||
payload_tc_a = {"type": "function", "id": "call_a", "function": {"name": "check_flight", "arguments": "{}"}}
|
||||
payload_tc_b = {"type": "function", "id": "call_b", "function": {"name": "book_taxi", "arguments": "{}"}}
|
||||
payload_msg = {"role": "assistant", "content": None, "tool_calls": [payload_tc_a, payload_tc_b]}
|
||||
orig = _ai_msg_with_raw_tool_calls([raw_tc_a, raw_tc_b])
|
||||
|
||||
_restore_tool_call_signatures(payload_msg, orig)
|
||||
|
||||
assert payload_tc_a["thought_signature"] == "SIG_STEP1=="
|
||||
assert payload_tc_b["thought_signature"] == "SIG_STEP2=="
|
||||
|
||||
|
||||
# Integration behavior for PatchedChatOpenAI is validated indirectly via
|
||||
# _restore_tool_call_signatures unit coverage above.
|
||||
Reference in New Issue
Block a user