Files
deerflow-factory/deer-flow/backend/tests/test_patched_openai.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

177 lines
6.2 KiB
Python

"""Tests for deerflow.models.patched_openai.PatchedChatOpenAI.
These tests verify that _restore_tool_call_signatures correctly re-injects
``thought_signature`` onto tool-call objects stored in
``additional_kwargs["tool_calls"]``, covering id-based matching, positional
fallback, camelCase keys, and several edge-cases.
"""
from __future__ import annotations
from langchain_core.messages import AIMessage
from deerflow.models.patched_openai import _restore_tool_call_signatures
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
RAW_TC_SIGNED = {
"id": "call_1",
"type": "function",
"function": {"name": "web_fetch", "arguments": '{"url":"http://example.com"}'},
"thought_signature": "SIG_A==",
}
RAW_TC_UNSIGNED = {
"id": "call_2",
"type": "function",
"function": {"name": "bash", "arguments": '{"cmd":"ls"}'},
}
PAYLOAD_TC_1 = {
"type": "function",
"id": "call_1",
"function": {"name": "web_fetch", "arguments": '{"url":"http://example.com"}'},
}
PAYLOAD_TC_2 = {
"type": "function",
"id": "call_2",
"function": {"name": "bash", "arguments": '{"cmd":"ls"}'},
}
def _ai_msg_with_raw_tool_calls(raw_tool_calls: list[dict]) -> AIMessage:
return AIMessage(content="", additional_kwargs={"tool_calls": raw_tool_calls})
# ---------------------------------------------------------------------------
# Core: signed tool-call restoration
# ---------------------------------------------------------------------------
def test_tool_call_signature_restored_by_id():
"""thought_signature is copied to the payload tool-call matched by id."""
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]}
orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED])
_restore_tool_call_signatures(payload_msg, orig)
assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_A=="
def test_tool_call_signature_for_parallel_calls():
"""For parallel function calls, only the first has a signature (per Gemini spec)."""
payload_msg = {
"role": "assistant",
"content": None,
"tool_calls": [PAYLOAD_TC_1.copy(), PAYLOAD_TC_2.copy()],
}
orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED, RAW_TC_UNSIGNED])
_restore_tool_call_signatures(payload_msg, orig)
assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_A=="
assert "thought_signature" not in payload_msg["tool_calls"][1]
def test_tool_call_signature_camel_case():
"""thoughtSignature (camelCase) from some gateways is also handled."""
raw_camel = {
"id": "call_1",
"type": "function",
"function": {"name": "web_fetch", "arguments": "{}"},
"thoughtSignature": "SIG_CAMEL==",
}
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]}
orig = _ai_msg_with_raw_tool_calls([raw_camel])
_restore_tool_call_signatures(payload_msg, orig)
assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_CAMEL=="
def test_tool_call_signature_positional_fallback():
"""When ids don't match, falls back to positional matching."""
raw_no_id = {
"type": "function",
"function": {"name": "web_fetch", "arguments": "{}"},
"thought_signature": "SIG_POS==",
}
payload_tc = {
"type": "function",
"id": "call_99",
"function": {"name": "web_fetch", "arguments": "{}"},
}
payload_msg = {"role": "assistant", "content": None, "tool_calls": [payload_tc]}
orig = _ai_msg_with_raw_tool_calls([raw_no_id])
_restore_tool_call_signatures(payload_msg, orig)
assert payload_tc["thought_signature"] == "SIG_POS=="
# ---------------------------------------------------------------------------
# Edge cases: no-op scenarios for tool-call signatures
# ---------------------------------------------------------------------------
def test_tool_call_no_raw_tool_calls_is_noop():
"""No change when additional_kwargs has no tool_calls."""
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]}
orig = AIMessage(content="", additional_kwargs={})
_restore_tool_call_signatures(payload_msg, orig)
assert "thought_signature" not in payload_msg["tool_calls"][0]
def test_tool_call_no_payload_tool_calls_is_noop():
"""No change when payload has no tool_calls."""
payload_msg = {"role": "assistant", "content": "just text"}
orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED])
_restore_tool_call_signatures(payload_msg, orig)
assert "tool_calls" not in payload_msg
def test_tool_call_unsigned_raw_entries_is_noop():
"""No signature added when raw tool-calls have no thought_signature."""
payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_2.copy()]}
orig = _ai_msg_with_raw_tool_calls([RAW_TC_UNSIGNED])
_restore_tool_call_signatures(payload_msg, orig)
assert "thought_signature" not in payload_msg["tool_calls"][0]
def test_tool_call_multiple_sequential_signatures():
"""Sequential tool calls each carry their own signature."""
raw_tc_a = {
"id": "call_a",
"type": "function",
"function": {"name": "check_flight", "arguments": "{}"},
"thought_signature": "SIG_STEP1==",
}
raw_tc_b = {
"id": "call_b",
"type": "function",
"function": {"name": "book_taxi", "arguments": "{}"},
"thought_signature": "SIG_STEP2==",
}
payload_tc_a = {"type": "function", "id": "call_a", "function": {"name": "check_flight", "arguments": "{}"}}
payload_tc_b = {"type": "function", "id": "call_b", "function": {"name": "book_taxi", "arguments": "{}"}}
payload_msg = {"role": "assistant", "content": None, "tool_calls": [payload_tc_a, payload_tc_b]}
orig = _ai_msg_with_raw_tool_calls([raw_tc_a, raw_tc_b])
_restore_tool_call_signatures(payload_msg, orig)
assert payload_tc_a["thought_signature"] == "SIG_STEP1=="
assert payload_tc_b["thought_signature"] == "SIG_STEP2=="
# Integration behavior for PatchedChatOpenAI is validated indirectly via
# _restore_tool_call_signatures unit coverage above.