Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
160 lines
4.4 KiB
Python
160 lines
4.4 KiB
Python
"""Tests for deerflow.runtime.serialization."""
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
class _FakePydanticV2:
|
|
"""Object with model_dump (Pydantic v2)."""
|
|
|
|
def model_dump(self):
|
|
return {"key": "v2"}
|
|
|
|
|
|
class _FakePydanticV1:
|
|
"""Object with dict (Pydantic v1)."""
|
|
|
|
def dict(self):
|
|
return {"key": "v1"}
|
|
|
|
|
|
class _Unprintable:
|
|
"""Object whose str() raises."""
|
|
|
|
def __str__(self):
|
|
raise RuntimeError("no str")
|
|
|
|
def __repr__(self):
|
|
return "<Unprintable>"
|
|
|
|
|
|
def test_serialize_none():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
assert serialize_lc_object(None) is None
|
|
|
|
|
|
def test_serialize_primitives():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
assert serialize_lc_object("hello") == "hello"
|
|
assert serialize_lc_object(42) == 42
|
|
assert serialize_lc_object(3.14) == 3.14
|
|
assert serialize_lc_object(True) is True
|
|
|
|
|
|
def test_serialize_dict():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
obj = {"a": _FakePydanticV2(), "b": [1, "two"]}
|
|
result = serialize_lc_object(obj)
|
|
assert result == {"a": {"key": "v2"}, "b": [1, "two"]}
|
|
|
|
|
|
def test_serialize_list():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
result = serialize_lc_object([_FakePydanticV1(), 1])
|
|
assert result == [{"key": "v1"}, 1]
|
|
|
|
|
|
def test_serialize_tuple():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
result = serialize_lc_object((_FakePydanticV2(),))
|
|
assert result == [{"key": "v2"}]
|
|
|
|
|
|
def test_serialize_pydantic_v2():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
assert serialize_lc_object(_FakePydanticV2()) == {"key": "v2"}
|
|
|
|
|
|
def test_serialize_pydantic_v1():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
assert serialize_lc_object(_FakePydanticV1()) == {"key": "v1"}
|
|
|
|
|
|
def test_serialize_fallback_str():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
result = serialize_lc_object(object())
|
|
assert isinstance(result, str)
|
|
|
|
|
|
def test_serialize_fallback_repr():
|
|
from deerflow.runtime.serialization import serialize_lc_object
|
|
|
|
assert serialize_lc_object(_Unprintable()) == "<Unprintable>"
|
|
|
|
|
|
def test_serialize_channel_values_strips_pregel_keys():
|
|
from deerflow.runtime.serialization import serialize_channel_values
|
|
|
|
raw = {
|
|
"messages": ["hello"],
|
|
"__pregel_tasks": "internal",
|
|
"__pregel_resuming": True,
|
|
"__interrupt__": "stop",
|
|
"title": "Test",
|
|
}
|
|
result = serialize_channel_values(raw)
|
|
assert "messages" in result
|
|
assert "title" in result
|
|
assert "__pregel_tasks" not in result
|
|
assert "__pregel_resuming" not in result
|
|
assert "__interrupt__" not in result
|
|
|
|
|
|
def test_serialize_channel_values_serializes_objects():
|
|
from deerflow.runtime.serialization import serialize_channel_values
|
|
|
|
result = serialize_channel_values({"obj": _FakePydanticV2()})
|
|
assert result == {"obj": {"key": "v2"}}
|
|
|
|
|
|
def test_serialize_messages_tuple():
|
|
from deerflow.runtime.serialization import serialize_messages_tuple
|
|
|
|
chunk = _FakePydanticV2()
|
|
metadata = {"langgraph_node": "agent"}
|
|
result = serialize_messages_tuple((chunk, metadata))
|
|
assert result == [{"key": "v2"}, {"langgraph_node": "agent"}]
|
|
|
|
|
|
def test_serialize_messages_tuple_non_dict_metadata():
|
|
from deerflow.runtime.serialization import serialize_messages_tuple
|
|
|
|
result = serialize_messages_tuple((_FakePydanticV2(), "not-a-dict"))
|
|
assert result == [{"key": "v2"}, {}]
|
|
|
|
|
|
def test_serialize_messages_tuple_fallback():
|
|
from deerflow.runtime.serialization import serialize_messages_tuple
|
|
|
|
result = serialize_messages_tuple("not-a-tuple")
|
|
assert result == "not-a-tuple"
|
|
|
|
|
|
def test_serialize_dispatcher_messages_mode():
|
|
from deerflow.runtime.serialization import serialize
|
|
|
|
chunk = _FakePydanticV2()
|
|
result = serialize((chunk, {"node": "x"}), mode="messages")
|
|
assert result == [{"key": "v2"}, {"node": "x"}]
|
|
|
|
|
|
def test_serialize_dispatcher_values_mode():
|
|
from deerflow.runtime.serialization import serialize
|
|
|
|
result = serialize({"msg": "hi", "__pregel_tasks": "x"}, mode="values")
|
|
assert result == {"msg": "hi"}
|
|
|
|
|
|
def test_serialize_dispatcher_default_mode():
|
|
from deerflow.runtime.serialization import serialize
|
|
|
|
result = serialize(_FakePydanticV1())
|
|
assert result == {"key": "v1"}
|