Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
7
deer-flow/backend/tests/_disabled_native/conftest.py
Normal file
7
deer-flow/backend/tests/_disabled_native/conftest.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Quarantine: tests for legacy unhardened web providers.
|
||||
|
||||
These tests are kept on disk for reference but excluded from collection
|
||||
because the underlying tools.py modules now raise on import.
|
||||
"""
|
||||
|
||||
collect_ignore_glob = ["*.py"]
|
||||
260
deer-flow/backend/tests/_disabled_native/test_exa_tools.py
Normal file
260
deer-flow/backend/tests/_disabled_native/test_exa_tools.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""Unit tests for the Exa community tools."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_app_config():
|
||||
"""Mock the app config to return tool configurations."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {
|
||||
"max_results": 5,
|
||||
"search_type": "auto",
|
||||
"contents_max_characters": 1000,
|
||||
"api_key": "test-api-key",
|
||||
}
|
||||
mock_config.return_value.get_tool_config.return_value = tool_config
|
||||
yield mock_config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_exa_client():
|
||||
"""Mock the Exa client."""
|
||||
with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
|
||||
mock_client = MagicMock()
|
||||
mock_exa_cls.return_value = mock_client
|
||||
yield mock_client
|
||||
|
||||
|
||||
class TestWebSearchTool:
|
||||
def test_basic_search(self, mock_app_config, mock_exa_client):
|
||||
"""Test basic web search returns normalized results."""
|
||||
mock_result_1 = MagicMock()
|
||||
mock_result_1.title = "Test Title 1"
|
||||
mock_result_1.url = "https://example.com/1"
|
||||
mock_result_1.highlights = ["This is a highlight about the topic."]
|
||||
|
||||
mock_result_2 = MagicMock()
|
||||
mock_result_2.title = "Test Title 2"
|
||||
mock_result_2.url = "https://example.com/2"
|
||||
mock_result_2.highlights = ["First highlight.", "Second highlight."]
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result_1, mock_result_2]
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test query"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert len(parsed) == 2
|
||||
assert parsed[0]["title"] == "Test Title 1"
|
||||
assert parsed[0]["url"] == "https://example.com/1"
|
||||
assert parsed[0]["snippet"] == "This is a highlight about the topic."
|
||||
assert parsed[1]["snippet"] == "First highlight.\nSecond highlight."
|
||||
|
||||
mock_exa_client.search.assert_called_once_with(
|
||||
"test query",
|
||||
type="auto",
|
||||
num_results=5,
|
||||
contents={"highlights": {"max_characters": 1000}},
|
||||
)
|
||||
|
||||
def test_search_with_custom_config(self, mock_exa_client):
|
||||
"""Test search respects custom configuration values."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {
|
||||
"max_results": 10,
|
||||
"search_type": "neural",
|
||||
"contents_max_characters": 2000,
|
||||
"api_key": "test-key",
|
||||
}
|
||||
mock_config.return_value.get_tool_config.return_value = tool_config
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = []
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
web_search_tool.invoke({"query": "neural search"})
|
||||
|
||||
mock_exa_client.search.assert_called_once_with(
|
||||
"neural search",
|
||||
type="neural",
|
||||
num_results=10,
|
||||
contents={"highlights": {"max_characters": 2000}},
|
||||
)
|
||||
|
||||
def test_search_with_no_highlights(self, mock_app_config, mock_exa_client):
|
||||
"""Test search handles results with no highlights."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "No Highlights"
|
||||
mock_result.url = "https://example.com/empty"
|
||||
mock_result.highlights = None
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed[0]["snippet"] == ""
|
||||
|
||||
def test_search_empty_results(self, mock_app_config, mock_exa_client):
|
||||
"""Test search with no results returns empty list."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = []
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "nothing"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed == []
|
||||
|
||||
def test_search_error_handling(self, mock_app_config, mock_exa_client):
|
||||
"""Test search returns error string on exception."""
|
||||
mock_exa_client.search.side_effect = Exception("API rate limit exceeded")
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "error"})
|
||||
|
||||
assert result == "Error: API rate limit exceeded"
|
||||
|
||||
|
||||
class TestWebFetchTool:
|
||||
def test_basic_fetch(self, mock_app_config, mock_exa_client):
|
||||
"""Test basic web fetch returns formatted content."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Fetched Page"
|
||||
mock_result.text = "This is the page content."
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result == "# Fetched Page\n\nThis is the page content."
|
||||
mock_exa_client.get_contents.assert_called_once_with(
|
||||
["https://example.com"],
|
||||
text={"max_characters": 4096},
|
||||
)
|
||||
|
||||
def test_fetch_no_title(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch with missing title uses 'Untitled'."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = None
|
||||
mock_result.text = "Content without title."
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result.startswith("# Untitled\n\n")
|
||||
|
||||
def test_fetch_no_results(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch with no results returns error."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = []
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com/404"})
|
||||
|
||||
assert result == "Error: No results found"
|
||||
|
||||
def test_fetch_error_handling(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch returns error string on exception."""
|
||||
mock_exa_client.get_contents.side_effect = Exception("Connection timeout")
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result == "Error: Connection timeout"
|
||||
|
||||
def test_fetch_reads_web_fetch_config(self, mock_exa_client):
|
||||
"""Test that web_fetch_tool reads 'web_fetch' config, not 'web_search'."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {"api_key": "exa-fetch-key"}
|
||||
mock_config.return_value.get_tool_config.return_value = tool_config
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Page"
|
||||
mock_result.text = "Content."
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
mock_config.return_value.get_tool_config.assert_any_call("web_fetch")
|
||||
|
||||
def test_fetch_uses_independent_api_key(self, mock_exa_client):
|
||||
"""Test mixed-provider config: web_fetch uses its own api_key, not web_search's."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
|
||||
mock_exa_cls.return_value = mock_exa_client
|
||||
fetch_config = MagicMock()
|
||||
fetch_config.model_extra = {"api_key": "exa-fetch-key"}
|
||||
|
||||
def get_tool_config(name):
|
||||
if name == "web_fetch":
|
||||
return fetch_config
|
||||
return None
|
||||
|
||||
mock_config.return_value.get_tool_config.side_effect = get_tool_config
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Page"
|
||||
mock_result.text = "Content."
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
mock_exa_cls.assert_called_once_with(api_key="exa-fetch-key")
|
||||
|
||||
def test_fetch_truncates_long_content(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch truncates content to 4096 characters."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Long Page"
|
||||
mock_result.text = "x" * 5000
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
# "# Long Page\n\n" is 14 chars, content truncated to 4096
|
||||
content_after_header = result.split("\n\n", 1)[1]
|
||||
assert len(content_after_header) == 4096
|
||||
@@ -0,0 +1,66 @@
|
||||
"""Unit tests for the Firecrawl community tools."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
class TestWebSearchTool:
|
||||
@patch("deerflow.community.firecrawl.tools.FirecrawlApp")
|
||||
@patch("deerflow.community.firecrawl.tools.get_app_config")
|
||||
def test_search_uses_web_search_config(self, mock_get_app_config, mock_firecrawl_cls):
|
||||
search_config = MagicMock()
|
||||
search_config.model_extra = {"api_key": "firecrawl-search-key", "max_results": 7}
|
||||
mock_get_app_config.return_value.get_tool_config.return_value = search_config
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.web = [
|
||||
MagicMock(title="Result", url="https://example.com", description="Snippet"),
|
||||
]
|
||||
mock_firecrawl_cls.return_value.search.return_value = mock_result
|
||||
|
||||
from deerflow.community.firecrawl.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test query"})
|
||||
|
||||
assert json.loads(result) == [
|
||||
{
|
||||
"title": "Result",
|
||||
"url": "https://example.com",
|
||||
"snippet": "Snippet",
|
||||
}
|
||||
]
|
||||
mock_get_app_config.return_value.get_tool_config.assert_called_with("web_search")
|
||||
mock_firecrawl_cls.assert_called_once_with(api_key="firecrawl-search-key")
|
||||
mock_firecrawl_cls.return_value.search.assert_called_once_with("test query", limit=7)
|
||||
|
||||
|
||||
class TestWebFetchTool:
|
||||
@patch("deerflow.community.firecrawl.tools.FirecrawlApp")
|
||||
@patch("deerflow.community.firecrawl.tools.get_app_config")
|
||||
def test_fetch_uses_web_fetch_config(self, mock_get_app_config, mock_firecrawl_cls):
|
||||
fetch_config = MagicMock()
|
||||
fetch_config.model_extra = {"api_key": "firecrawl-fetch-key"}
|
||||
|
||||
def get_tool_config(name):
|
||||
if name == "web_fetch":
|
||||
return fetch_config
|
||||
return None
|
||||
|
||||
mock_get_app_config.return_value.get_tool_config.side_effect = get_tool_config
|
||||
|
||||
mock_scrape_result = MagicMock()
|
||||
mock_scrape_result.markdown = "Fetched markdown"
|
||||
mock_scrape_result.metadata = MagicMock(title="Fetched Page")
|
||||
mock_firecrawl_cls.return_value.scrape.return_value = mock_scrape_result
|
||||
|
||||
from deerflow.community.firecrawl.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result == "# Fetched Page\n\nFetched markdown"
|
||||
mock_get_app_config.return_value.get_tool_config.assert_any_call("web_fetch")
|
||||
mock_firecrawl_cls.assert_called_once_with(api_key="firecrawl-fetch-key")
|
||||
mock_firecrawl_cls.return_value.scrape.assert_called_once_with(
|
||||
"https://example.com",
|
||||
formats=["markdown"],
|
||||
)
|
||||
@@ -0,0 +1,348 @@
|
||||
"""Tests for InfoQuest client and tools."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from deerflow.community.infoquest import tools
|
||||
from deerflow.community.infoquest.infoquest_client import InfoQuestClient
|
||||
|
||||
|
||||
class TestInfoQuestClient:
|
||||
def test_infoquest_client_initialization(self):
|
||||
"""Test InfoQuestClient initialization with different parameters."""
|
||||
# Test with default parameters
|
||||
client = InfoQuestClient()
|
||||
assert client.fetch_time == -1
|
||||
assert client.fetch_timeout == -1
|
||||
assert client.fetch_navigation_timeout == -1
|
||||
assert client.search_time_range == -1
|
||||
|
||||
# Test with custom parameters
|
||||
client = InfoQuestClient(fetch_time=10, fetch_timeout=30, fetch_navigation_timeout=60, search_time_range=24)
|
||||
assert client.fetch_time == 10
|
||||
assert client.fetch_timeout == 30
|
||||
assert client.fetch_navigation_timeout == 60
|
||||
assert client.search_time_range == 24
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_fetch_success(self, mock_post):
|
||||
"""Test successful fetch operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = json.dumps({"reader_result": "<html><body>Test content</body></html>"})
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.fetch("https://example.com")
|
||||
|
||||
assert result == "<html><body>Test content</body></html>"
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert args[0] == "https://reader.infoquest.bytepluses.com"
|
||||
assert kwargs["json"]["url"] == "https://example.com"
|
||||
assert kwargs["json"]["format"] == "HTML"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_fetch_non_200_status(self, mock_post):
|
||||
"""Test fetch operation with non-200 status code."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 404
|
||||
mock_response.text = "Not Found"
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.fetch("https://example.com")
|
||||
|
||||
assert result == "Error: fetch API returned status 404: Not Found"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_fetch_empty_response(self, mock_post):
|
||||
"""Test fetch operation with empty response."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = ""
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.fetch("https://example.com")
|
||||
|
||||
assert result == "Error: no result found"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_web_search_raw_results_success(self, mock_post):
|
||||
"""Test successful web_search_raw_results operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"organic": [{"title": "Test Result", "desc": "Test description", "url": "https://example.com"}]}}}], "images_results": []}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.web_search_raw_results("test query", "")
|
||||
|
||||
assert "search_result" in result
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert args[0] == "https://search.infoquest.bytepluses.com"
|
||||
assert kwargs["json"]["query"] == "test query"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_web_search_success(self, mock_post):
|
||||
"""Test successful web_search operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"organic": [{"title": "Test Result", "desc": "Test description", "url": "https://example.com"}]}}}], "images_results": []}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.web_search("test query")
|
||||
|
||||
# Check if result is a valid JSON string with expected content
|
||||
result_data = json.loads(result)
|
||||
assert len(result_data) == 1
|
||||
assert result_data[0]["title"] == "Test Result"
|
||||
assert result_data[0]["url"] == "https://example.com"
|
||||
|
||||
def test_clean_results(self):
|
||||
"""Test clean_results method with sample raw results."""
|
||||
raw_results = [
|
||||
{
|
||||
"content": {
|
||||
"results": {
|
||||
"organic": [{"title": "Test Page", "desc": "Page description", "url": "https://example.com/page1"}],
|
||||
"top_stories": {"items": [{"title": "Test News", "source": "Test Source", "time_frame": "2 hours ago", "url": "https://example.com/news1"}]},
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
cleaned = InfoQuestClient.clean_results(raw_results)
|
||||
|
||||
assert len(cleaned) == 2
|
||||
assert cleaned[0]["type"] == "page"
|
||||
assert cleaned[0]["title"] == "Test Page"
|
||||
assert cleaned[1]["type"] == "news"
|
||||
assert cleaned[1]["title"] == "Test News"
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_web_search_tool(self, mock_get_client):
|
||||
"""Test web_search_tool function."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.web_search.return_value = json.dumps([])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = tools.web_search_tool.run("test query")
|
||||
|
||||
assert result == json.dumps([])
|
||||
mock_get_client.assert_called_once()
|
||||
mock_client.web_search.assert_called_once_with("test query")
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_web_fetch_tool(self, mock_get_client):
|
||||
"""Test web_fetch_tool function."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.fetch.return_value = "<html><body>Test content</body></html>"
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = tools.web_fetch_tool.run("https://example.com")
|
||||
|
||||
assert result == "# Untitled\n\nTest content"
|
||||
mock_get_client.assert_called_once()
|
||||
mock_client.fetch.assert_called_once_with("https://example.com")
|
||||
|
||||
@patch("deerflow.community.infoquest.tools.get_app_config")
|
||||
def test_get_infoquest_client(self, mock_get_app_config):
|
||||
"""Test _get_infoquest_client function with config."""
|
||||
mock_config = MagicMock()
|
||||
# Add image_search config to the side_effect
|
||||
mock_config.get_tool_config.side_effect = [
|
||||
MagicMock(model_extra={"search_time_range": 24}), # web_search config
|
||||
MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60}), # web_fetch config
|
||||
MagicMock(model_extra={"image_search_time_range": 7, "image_size": "l"}), # image_search config
|
||||
]
|
||||
mock_get_app_config.return_value = mock_config
|
||||
|
||||
client = tools._get_infoquest_client()
|
||||
|
||||
assert client.search_time_range == 24
|
||||
assert client.fetch_time == 10
|
||||
assert client.fetch_timeout == 30
|
||||
assert client.fetch_navigation_timeout == 60
|
||||
assert client.image_search_time_range == 7
|
||||
assert client.image_size == "l"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_web_search_api_error(self, mock_post):
|
||||
"""Test web_search operation with API error."""
|
||||
mock_post.side_effect = Exception("Connection error")
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.web_search("test query")
|
||||
|
||||
assert "Error" in result
|
||||
|
||||
def test_clean_results_with_image_search(self):
|
||||
"""Test clean_results_with_image_search method with sample raw results."""
|
||||
raw_results = [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image 1", "url": "https://example.com/page1"}]}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 1
|
||||
assert cleaned[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
assert cleaned[0]["title"] == "Test Image 1"
|
||||
|
||||
def test_clean_results_with_image_search_empty(self):
|
||||
"""Test clean_results_with_image_search method with empty results."""
|
||||
raw_results = [{"content": {"results": {"images_results": []}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 0
|
||||
|
||||
def test_clean_results_with_image_search_no_images(self):
|
||||
"""Test clean_results_with_image_search method with no images_results field."""
|
||||
raw_results = [{"content": {"results": {"organic": [{"title": "Test Page"}]}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 0
|
||||
|
||||
|
||||
class TestImageSearch:
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_raw_results_success(self, mock_post):
|
||||
"""Test successful image_search_raw_results operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.image_search_raw_results("test query")
|
||||
|
||||
assert "search_result" in result
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert args[0] == "https://search.infoquest.bytepluses.com"
|
||||
assert kwargs["json"]["query"] == "test query"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_raw_results_with_parameters(self, mock_post):
|
||||
"""Test image_search_raw_results with all parameters."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient(image_search_time_range=30, image_size="l")
|
||||
client.image_search_raw_results(query="cat", site="unsplash.com", output_format="JSON")
|
||||
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert kwargs["json"]["query"] == "cat"
|
||||
assert kwargs["json"]["time_range"] == 30
|
||||
assert kwargs["json"]["site"] == "unsplash.com"
|
||||
assert kwargs["json"]["image_size"] == "l"
|
||||
assert kwargs["json"]["format"] == "JSON"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_raw_results_invalid_time_range(self, mock_post):
|
||||
"""Test image_search_raw_results with invalid time_range parameter."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": []}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Create client with invalid time_range (should be ignored)
|
||||
client = InfoQuestClient(image_search_time_range=400, image_size="x")
|
||||
client.image_search_raw_results(
|
||||
query="test",
|
||||
site="",
|
||||
)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert kwargs["json"]["query"] == "test"
|
||||
assert "time_range" not in kwargs["json"]
|
||||
assert "image_size" not in kwargs["json"]
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_success(self, mock_post):
|
||||
"""Test successful image_search operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.image_search("cat")
|
||||
|
||||
# Check if result is a valid JSON string with expected content
|
||||
result_data = json.loads(result)
|
||||
|
||||
assert len(result_data) == 1
|
||||
|
||||
assert result_data[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
|
||||
assert result_data[0]["title"] == "Test Image"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_with_all_parameters(self, mock_post):
|
||||
"""Test image_search with all optional parameters."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Create client with image search parameters
|
||||
client = InfoQuestClient(image_search_time_range=7, image_size="m")
|
||||
client.image_search(query="dog", site="flickr.com", output_format="JSON")
|
||||
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert kwargs["json"]["query"] == "dog"
|
||||
assert kwargs["json"]["time_range"] == 7
|
||||
assert kwargs["json"]["site"] == "flickr.com"
|
||||
assert kwargs["json"]["image_size"] == "m"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_api_error(self, mock_post):
|
||||
"""Test image_search operation with API error."""
|
||||
mock_post.side_effect = Exception("Connection error")
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.image_search("test query")
|
||||
|
||||
assert "Error" in result
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_image_search_tool(self, mock_get_client):
|
||||
"""Test image_search_tool function."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = tools.image_search_tool.run({"query": "test query"})
|
||||
|
||||
# Check if result is a valid JSON string
|
||||
result_data = json.loads(result)
|
||||
assert len(result_data) == 1
|
||||
assert result_data[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
mock_get_client.assert_called_once()
|
||||
mock_client.image_search.assert_called_once_with("test query")
|
||||
|
||||
# In /Users/bytedance/python/deer-flowv2/deer-flow/backend/tests/test_infoquest_client.py
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_image_search_tool_with_parameters(self, mock_get_client):
|
||||
"""Test image_search_tool function with all parameters (extra parameters will be ignored)."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
# Pass all parameters as a dictionary (extra parameters will be ignored)
|
||||
tools.image_search_tool.run({"query": "sunset", "time_range": 30, "site": "unsplash.com", "image_size": "l"})
|
||||
|
||||
mock_get_client.assert_called_once()
|
||||
# image_search_tool only passes query to client.image_search
|
||||
# site parameter is empty string by default
|
||||
mock_client.image_search.assert_called_once_with("sunset")
|
||||
177
deer-flow/backend/tests/_disabled_native/test_jina_client.py
Normal file
177
deer-flow/backend/tests/_disabled_native/test_jina_client.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""Tests for JinaClient async crawl method."""
|
||||
|
||||
import logging
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
import deerflow.community.jina_ai.jina_client as jina_client_module
|
||||
from deerflow.community.jina_ai.jina_client import JinaClient
|
||||
from deerflow.community.jina_ai.tools import web_fetch_tool
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def jina_client():
|
||||
return JinaClient()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_success(jina_client, monkeypatch):
|
||||
"""Test successful crawl returns response text."""
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
return httpx.Response(200, text="<html><body>Hello</body></html>", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
result = await jina_client.crawl("https://example.com")
|
||||
assert result == "<html><body>Hello</body></html>"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_non_200_status(jina_client, monkeypatch):
|
||||
"""Test that non-200 status returns error message."""
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
return httpx.Response(429, text="Rate limited", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
result = await jina_client.crawl("https://example.com")
|
||||
assert result.startswith("Error:")
|
||||
assert "429" in result
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_empty_response(jina_client, monkeypatch):
|
||||
"""Test that empty response returns error message."""
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
return httpx.Response(200, text="", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
result = await jina_client.crawl("https://example.com")
|
||||
assert result.startswith("Error:")
|
||||
assert "empty" in result.lower()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_whitespace_only_response(jina_client, monkeypatch):
|
||||
"""Test that whitespace-only response returns error message."""
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
return httpx.Response(200, text=" \n ", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
result = await jina_client.crawl("https://example.com")
|
||||
assert result.startswith("Error:")
|
||||
assert "empty" in result.lower()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_network_error(jina_client, monkeypatch):
|
||||
"""Test that network errors are handled gracefully."""
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
raise httpx.ConnectError("Connection refused")
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
result = await jina_client.crawl("https://example.com")
|
||||
assert result.startswith("Error:")
|
||||
assert "failed" in result.lower()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_passes_headers(jina_client, monkeypatch):
|
||||
"""Test that correct headers are sent."""
|
||||
captured_headers = {}
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
captured_headers.update(kwargs.get("headers", {}))
|
||||
return httpx.Response(200, text="ok", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
await jina_client.crawl("https://example.com", return_format="markdown", timeout=30)
|
||||
assert captured_headers["X-Return-Format"] == "markdown"
|
||||
assert captured_headers["X-Timeout"] == "30"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_includes_api_key_when_set(jina_client, monkeypatch):
|
||||
"""Test that Authorization header is set when JINA_API_KEY is available."""
|
||||
captured_headers = {}
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
captured_headers.update(kwargs.get("headers", {}))
|
||||
return httpx.Response(200, text="ok", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
monkeypatch.setenv("JINA_API_KEY", "test-key-123")
|
||||
await jina_client.crawl("https://example.com")
|
||||
assert captured_headers["Authorization"] == "Bearer test-key-123"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_warns_once_when_api_key_missing(jina_client, monkeypatch, caplog):
|
||||
"""Test that the missing API key warning is logged only once."""
|
||||
jina_client_module._api_key_warned = False
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
return httpx.Response(200, text="ok", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
monkeypatch.delenv("JINA_API_KEY", raising=False)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="deerflow.community.jina_ai.jina_client"):
|
||||
await jina_client.crawl("https://example.com")
|
||||
await jina_client.crawl("https://example.com")
|
||||
|
||||
warning_count = sum(1 for record in caplog.records if "Jina API key is not set" in record.message)
|
||||
assert warning_count == 1
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_no_auth_header_without_api_key(jina_client, monkeypatch):
|
||||
"""Test that no Authorization header is set when JINA_API_KEY is not available."""
|
||||
jina_client_module._api_key_warned = False
|
||||
captured_headers = {}
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
captured_headers.update(kwargs.get("headers", {}))
|
||||
return httpx.Response(200, text="ok", request=httpx.Request("POST", url))
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
monkeypatch.delenv("JINA_API_KEY", raising=False)
|
||||
await jina_client.crawl("https://example.com")
|
||||
assert "Authorization" not in captured_headers
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_web_fetch_tool_returns_error_on_crawl_failure(monkeypatch):
|
||||
"""Test that web_fetch_tool short-circuits and returns the error string when crawl fails."""
|
||||
|
||||
async def mock_crawl(self, url, **kwargs):
|
||||
return "Error: Jina API returned status 429: Rate limited"
|
||||
|
||||
mock_config = MagicMock()
|
||||
mock_config.get_tool_config.return_value = None
|
||||
monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config)
|
||||
monkeypatch.setattr(JinaClient, "crawl", mock_crawl)
|
||||
result = await web_fetch_tool.ainvoke("https://example.com")
|
||||
assert result.startswith("Error:")
|
||||
assert "429" in result
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_web_fetch_tool_returns_markdown_on_success(monkeypatch):
|
||||
"""Test that web_fetch_tool returns extracted markdown on successful crawl."""
|
||||
|
||||
async def mock_crawl(self, url, **kwargs):
|
||||
return "<html><body><p>Hello world</p></body></html>"
|
||||
|
||||
mock_config = MagicMock()
|
||||
mock_config.get_tool_config.return_value = None
|
||||
monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config)
|
||||
monkeypatch.setattr(JinaClient, "crawl", mock_crawl)
|
||||
result = await web_fetch_tool.ainvoke("https://example.com")
|
||||
assert "Hello world" in result
|
||||
assert not result.startswith("Error:")
|
||||
Reference in New Issue
Block a user