Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions
--- a/deer-flow/backend/tests/test_suggestions_router.py
+++ b/deer-flow/backend/tests/test_suggestions_router.py
@@ -0,0 +1,102 @@
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
+from app.gateway.routers import suggestions
+
+
+def test_strip_markdown_code_fence_removes_wrapping():
+    text = '```json\n["a"]\n```'
+    assert suggestions._strip_markdown_code_fence(text) == '["a"]'
+
+
+def test_strip_markdown_code_fence_no_fence_keeps_content():
+    text = '  ["a"]  '
+    assert suggestions._strip_markdown_code_fence(text) == '["a"]'
+
+
+def test_parse_json_string_list_filters_invalid_items():
+    text = '```json\n["a", " ", 1, "b"]\n```'
+    assert suggestions._parse_json_string_list(text) == ["a", "b"]
+
+
+def test_parse_json_string_list_rejects_non_list():
+    text = '{"a": 1}'
+    assert suggestions._parse_json_string_list(text) is None
+
+
+def test_format_conversation_formats_roles():
+    messages = [
+        suggestions.SuggestionMessage(role="User", content="Hi"),
+        suggestions.SuggestionMessage(role="assistant", content="Hello"),
+        suggestions.SuggestionMessage(role="system", content="note"),
+    ]
+    assert suggestions._format_conversation(messages) == "User: Hi\nAssistant: Hello\nsystem: note"
+
+
+def test_generate_suggestions_parses_and_limits(monkeypatch):
+    req = suggestions.SuggestionsRequest(
+        messages=[
+            suggestions.SuggestionMessage(role="user", content="Hi"),
+            suggestions.SuggestionMessage(role="assistant", content="Hello"),
+        ],
+        n=3,
+        model_name=None,
+    )
+    fake_model = MagicMock()
+    fake_model.ainvoke = AsyncMock(return_value=MagicMock(content='```json\n["Q1", "Q2", "Q3", "Q4"]\n```'))
+    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
+
+    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+
+    assert result.suggestions == ["Q1", "Q2", "Q3"]
+
+
+def test_generate_suggestions_parses_list_block_content(monkeypatch):
+    req = suggestions.SuggestionsRequest(
+        messages=[
+            suggestions.SuggestionMessage(role="user", content="Hi"),
+            suggestions.SuggestionMessage(role="assistant", content="Hello"),
+        ],
+        n=2,
+        model_name=None,
+    )
+    fake_model = MagicMock()
+    fake_model.ainvoke = AsyncMock(return_value=MagicMock(content=[{"type": "text", "text": '```json\n["Q1", "Q2"]\n```'}]))
+    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
+
+    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+
+    assert result.suggestions == ["Q1", "Q2"]
+
+
+def test_generate_suggestions_parses_output_text_block_content(monkeypatch):
+    req = suggestions.SuggestionsRequest(
+        messages=[
+            suggestions.SuggestionMessage(role="user", content="Hi"),
+            suggestions.SuggestionMessage(role="assistant", content="Hello"),
+        ],
+        n=2,
+        model_name=None,
+    )
+    fake_model = MagicMock()
+    fake_model.ainvoke = AsyncMock(return_value=MagicMock(content=[{"type": "output_text", "text": '```json\n["Q1", "Q2"]\n```'}]))
+    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
+
+    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+
+    assert result.suggestions == ["Q1", "Q2"]
+
+
+def test_generate_suggestions_returns_empty_on_model_error(monkeypatch):
+    req = suggestions.SuggestionsRequest(
+        messages=[suggestions.SuggestionMessage(role="user", content="Hi")],
+        n=2,
+        model_name=None,
+    )
+    fake_model = MagicMock()
+    fake_model.ainvoke = AsyncMock(side_effect=RuntimeError("boom"))
+    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
+
+    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+
+    assert result.suggestions == []