Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
1043 lines
37 KiB
Python
1043 lines
37 KiB
Python
"""Tests for subagent executor async/sync execution paths.
|
||
|
||
Covers:
|
||
- SubagentExecutor.execute() synchronous execution path
|
||
- SubagentExecutor._aexecute() asynchronous execution path
|
||
- asyncio.run() properly executes async workflow within thread pool context
|
||
- Error handling in both sync and async paths
|
||
- Async tool support (MCP tools)
|
||
- Cooperative cancellation via cancel_event
|
||
|
||
Note: Due to circular import issues in the main codebase, conftest.py mocks
|
||
deerflow.subagents.executor. This test file uses delayed import via fixture to test
|
||
the real implementation in isolation.
|
||
"""
|
||
|
||
import asyncio
|
||
import sys
|
||
import threading
|
||
from datetime import datetime
|
||
from unittest.mock import MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
# Module names that need to be mocked to break circular imports
|
||
_MOCKED_MODULE_NAMES = [
|
||
"deerflow.agents",
|
||
"deerflow.agents.thread_state",
|
||
"deerflow.agents.middlewares",
|
||
"deerflow.agents.middlewares.thread_data_middleware",
|
||
"deerflow.sandbox",
|
||
"deerflow.sandbox.middleware",
|
||
"deerflow.sandbox.security",
|
||
"deerflow.models",
|
||
]
|
||
|
||
|
||
@pytest.fixture(scope="session", autouse=True)
|
||
def _setup_executor_classes():
|
||
"""Set up mocked modules and import real executor classes.
|
||
|
||
This fixture runs once per session and yields the executor classes.
|
||
It handles module cleanup to avoid affecting other test files.
|
||
"""
|
||
# Save original modules
|
||
original_modules = {name: sys.modules.get(name) for name in _MOCKED_MODULE_NAMES}
|
||
original_executor = sys.modules.get("deerflow.subagents.executor")
|
||
|
||
# Remove mocked executor if exists (from conftest.py)
|
||
if "deerflow.subagents.executor" in sys.modules:
|
||
del sys.modules["deerflow.subagents.executor"]
|
||
|
||
# Set up mocks
|
||
for name in _MOCKED_MODULE_NAMES:
|
||
sys.modules[name] = MagicMock()
|
||
|
||
# Import real classes inside fixture
|
||
from langchain_core.messages import AIMessage, HumanMessage
|
||
|
||
from deerflow.subagents.config import SubagentConfig
|
||
from deerflow.subagents.executor import (
|
||
SubagentExecutor,
|
||
SubagentResult,
|
||
SubagentStatus,
|
||
)
|
||
|
||
# Store classes in a dict to yield
|
||
classes = {
|
||
"AIMessage": AIMessage,
|
||
"HumanMessage": HumanMessage,
|
||
"SubagentConfig": SubagentConfig,
|
||
"SubagentExecutor": SubagentExecutor,
|
||
"SubagentResult": SubagentResult,
|
||
"SubagentStatus": SubagentStatus,
|
||
}
|
||
|
||
yield classes
|
||
|
||
# Cleanup: Restore original modules
|
||
for name in _MOCKED_MODULE_NAMES:
|
||
if original_modules[name] is not None:
|
||
sys.modules[name] = original_modules[name]
|
||
elif name in sys.modules:
|
||
del sys.modules[name]
|
||
|
||
# Restore executor module (conftest.py mock)
|
||
if original_executor is not None:
|
||
sys.modules["deerflow.subagents.executor"] = original_executor
|
||
elif "deerflow.subagents.executor" in sys.modules:
|
||
del sys.modules["deerflow.subagents.executor"]
|
||
|
||
|
||
# Helper classes that wrap real classes for testing
|
||
class MockHumanMessage:
|
||
"""Mock HumanMessage for testing - wraps real class from fixture."""
|
||
|
||
def __init__(self, content, _classes=None):
|
||
self._content = content
|
||
self._classes = _classes
|
||
|
||
def _get_real(self):
|
||
return self._classes["HumanMessage"](content=self._content)
|
||
|
||
|
||
class MockAIMessage:
|
||
"""Mock AIMessage for testing - wraps real class from fixture."""
|
||
|
||
def __init__(self, content, msg_id=None, _classes=None):
|
||
self._content = content
|
||
self._msg_id = msg_id
|
||
self._classes = _classes
|
||
|
||
def _get_real(self):
|
||
msg = self._classes["AIMessage"](content=self._content)
|
||
if self._msg_id:
|
||
msg.id = self._msg_id
|
||
return msg
|
||
|
||
|
||
async def async_iterator(items):
|
||
"""Helper to create an async iterator from a list."""
|
||
for item in items:
|
||
yield item
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Fixtures
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
@pytest.fixture
|
||
def classes(_setup_executor_classes):
|
||
"""Provide access to executor classes."""
|
||
return _setup_executor_classes
|
||
|
||
|
||
@pytest.fixture
|
||
def base_config(classes):
|
||
"""Return a basic subagent config for testing."""
|
||
return classes["SubagentConfig"](
|
||
name="test-agent",
|
||
description="Test agent",
|
||
system_prompt="You are a test agent.",
|
||
max_turns=10,
|
||
timeout_seconds=60,
|
||
)
|
||
|
||
|
||
@pytest.fixture
|
||
def mock_agent():
|
||
"""Return a properly configured mock agent with async stream."""
|
||
agent = MagicMock()
|
||
agent.astream = MagicMock()
|
||
return agent
|
||
|
||
|
||
# Helper to create real message objects
|
||
class _MsgHelper:
|
||
"""Helper to create real message objects from fixture classes."""
|
||
|
||
def __init__(self, classes):
|
||
self.classes = classes
|
||
|
||
def human(self, content):
|
||
return self.classes["HumanMessage"](content=content)
|
||
|
||
def ai(self, content, msg_id=None):
|
||
msg = self.classes["AIMessage"](content=content)
|
||
if msg_id:
|
||
msg.id = msg_id
|
||
return msg
|
||
|
||
|
||
@pytest.fixture
|
||
def msg(classes):
|
||
"""Provide message factory."""
|
||
return _MsgHelper(classes)
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Async Execution Path Tests
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
class TestAsyncExecutionPath:
|
||
"""Test _aexecute() async execution path."""
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_success(self, classes, base_config, mock_agent, msg):
|
||
"""Test successful async execution returns completed result."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
final_message = msg.ai("Task completed successfully", "msg-1")
|
||
final_state = {
|
||
"messages": [
|
||
msg.human("Do something"),
|
||
final_message,
|
||
]
|
||
}
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
trace_id="test-trace",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Do something")
|
||
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert result.result == "Task completed successfully"
|
||
assert result.error is None
|
||
assert result.started_at is not None
|
||
assert result.completed_at is not None
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_collects_ai_messages(self, classes, base_config, mock_agent, msg):
|
||
"""Test that AI messages are collected during streaming."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
msg1 = msg.ai("First response", "msg-1")
|
||
msg2 = msg.ai("Second response", "msg-2")
|
||
|
||
chunk1 = {"messages": [msg.human("Task"), msg1]}
|
||
chunk2 = {"messages": [msg.human("Task"), msg1, msg2]}
|
||
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([chunk1, chunk2])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert len(result.ai_messages) == 2
|
||
assert result.ai_messages[0]["id"] == "msg-1"
|
||
assert result.ai_messages[1]["id"] == "msg-2"
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_handles_duplicate_messages(self, classes, base_config, mock_agent, msg):
|
||
"""Test that duplicate AI messages are not added."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
|
||
msg1 = msg.ai("Response", "msg-1")
|
||
|
||
# Same message appears in multiple chunks
|
||
chunk1 = {"messages": [msg.human("Task"), msg1]}
|
||
chunk2 = {"messages": [msg.human("Task"), msg1]}
|
||
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([chunk1, chunk2])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
assert len(result.ai_messages) == 1
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_handles_list_content(self, classes, base_config, mock_agent, msg):
|
||
"""Test handling of list-type content in AIMessage."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
final_message = msg.ai([{"text": "Part 1"}, {"text": "Part 2"}])
|
||
final_state = {
|
||
"messages": [
|
||
msg.human("Task"),
|
||
final_message,
|
||
]
|
||
}
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert "Part 1" in result.result
|
||
assert "Part 2" in result.result
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_handles_agent_exception(self, classes, base_config, mock_agent):
|
||
"""Test that exceptions during execution are caught and returned as FAILED."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
mock_agent.astream.side_effect = Exception("Agent error")
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
assert result.status == SubagentStatus.FAILED
|
||
assert "Agent error" in result.error
|
||
assert result.completed_at is not None
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_no_final_state(self, classes, base_config, mock_agent):
|
||
"""Test handling when no final state is returned."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert result.result == "No response generated"
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_no_ai_message_in_state(self, classes, base_config, mock_agent, msg):
|
||
"""Test fallback when no AIMessage found in final state."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
final_state = {"messages": [msg.human("Task")]}
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
# Should fallback to string representation of last message
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert "Task" in result.result
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Sync Execution Path Tests
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
class TestSyncExecutionPath:
|
||
"""Test execute() synchronous execution path with asyncio.run()."""
|
||
|
||
def test_execute_runs_async_in_event_loop(self, classes, base_config, mock_agent, msg):
|
||
"""Test that execute() runs _aexecute() in a new event loop via asyncio.run()."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
final_message = msg.ai("Sync result", "msg-1")
|
||
final_state = {
|
||
"messages": [
|
||
msg.human("Task"),
|
||
final_message,
|
||
]
|
||
}
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = executor.execute("Task")
|
||
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert result.result == "Sync result"
|
||
|
||
def test_execute_in_thread_pool_context(self, classes, base_config, msg):
|
||
"""Test that execute() works correctly when called from a thread pool.
|
||
|
||
This simulates the real-world usage where execute() is called from
|
||
_execution_pool in execute_async().
|
||
"""
|
||
from concurrent.futures import ThreadPoolExecutor
|
||
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
final_message = msg.ai("Thread pool result", "msg-1")
|
||
final_state = {
|
||
"messages": [
|
||
msg.human("Task"),
|
||
final_message,
|
||
]
|
||
}
|
||
|
||
def run_in_thread():
|
||
mock_agent = MagicMock()
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
return executor.execute("Task")
|
||
|
||
# Execute in thread pool (simulating _execution_pool usage)
|
||
with ThreadPoolExecutor(max_workers=1) as pool:
|
||
future = pool.submit(run_in_thread)
|
||
result = future.result(timeout=5)
|
||
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert result.result == "Thread pool result"
|
||
|
||
@pytest.mark.anyio
|
||
async def test_execute_in_running_event_loop_uses_isolated_thread(self, classes, base_config, mock_agent, msg):
|
||
"""Test that execute() uses the isolated-thread path inside a running loop."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
execution_threads = []
|
||
final_state = {
|
||
"messages": [
|
||
msg.human("Task"),
|
||
msg.ai("Async loop result", "msg-1"),
|
||
]
|
||
}
|
||
|
||
async def mock_astream(*args, **kwargs):
|
||
execution_threads.append(threading.current_thread().name)
|
||
yield final_state
|
||
|
||
mock_agent.astream = mock_astream
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
with patch.object(executor, "_execute_in_isolated_loop", wraps=executor._execute_in_isolated_loop) as isolated:
|
||
result = executor.execute("Task")
|
||
|
||
assert isolated.call_count == 1
|
||
assert execution_threads
|
||
assert all(name.startswith("subagent-isolated-") for name in execution_threads)
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert result.result == "Async loop result"
|
||
|
||
def test_execute_handles_asyncio_run_failure(self, classes, base_config):
|
||
"""Test handling when asyncio.run() itself fails."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_aexecute") as mock_aexecute:
|
||
mock_aexecute.side_effect = Exception("Asyncio run error")
|
||
|
||
result = executor.execute("Task")
|
||
|
||
assert result.status == SubagentStatus.FAILED
|
||
assert "Asyncio run error" in result.error
|
||
assert result.completed_at is not None
|
||
|
||
def test_execute_with_result_holder(self, classes, base_config, mock_agent, msg):
|
||
"""Test execute() updates provided result_holder in real-time."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
msg1 = msg.ai("Step 1", "msg-1")
|
||
chunk1 = {"messages": [msg.human("Task"), msg1]}
|
||
|
||
mock_agent.astream = lambda *args, **kwargs: async_iterator([chunk1])
|
||
|
||
# Pre-create result holder (as done in execute_async)
|
||
result_holder = SubagentResult(
|
||
task_id="predefined-id",
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.RUNNING,
|
||
started_at=datetime.now(),
|
||
)
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = executor.execute("Task", result_holder=result_holder)
|
||
|
||
# Should be the same object
|
||
assert result is result_holder
|
||
assert result.task_id == "predefined-id"
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Async Tool Support Tests (MCP Tools)
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
class TestAsyncToolSupport:
|
||
"""Test that async-only tools (like MCP tools) work correctly."""
|
||
|
||
@pytest.mark.anyio
|
||
async def test_async_tool_called_in_astream(self, classes, base_config, msg):
|
||
"""Test that async tools are properly awaited in astream.
|
||
|
||
This verifies the fix for: async MCP tools not being executed properly
|
||
because they were being called synchronously.
|
||
"""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
async_tool_calls = []
|
||
|
||
async def mock_async_tool(*args, **kwargs):
|
||
async_tool_calls.append("called")
|
||
await asyncio.sleep(0.01) # Simulate async work
|
||
return {"result": "async tool result"}
|
||
|
||
mock_agent = MagicMock()
|
||
|
||
# Simulate agent that calls async tools during streaming
|
||
async def mock_astream(*args, **kwargs):
|
||
await mock_async_tool()
|
||
yield {
|
||
"messages": [
|
||
msg.human("Task"),
|
||
msg.ai("Done", "msg-1"),
|
||
]
|
||
}
|
||
|
||
mock_agent.astream = mock_astream
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task")
|
||
|
||
assert len(async_tool_calls) == 1
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
|
||
def test_sync_execute_with_async_tools(self, classes, base_config, msg):
|
||
"""Test that sync execute() properly runs async tools via asyncio.run()."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
async_tool_calls = []
|
||
|
||
async def mock_async_tool():
|
||
async_tool_calls.append("called")
|
||
await asyncio.sleep(0.01)
|
||
return {"result": "async result"}
|
||
|
||
mock_agent = MagicMock()
|
||
|
||
async def mock_astream(*args, **kwargs):
|
||
await mock_async_tool()
|
||
yield {
|
||
"messages": [
|
||
msg.human("Task"),
|
||
msg.ai("Done", "msg-1"),
|
||
]
|
||
}
|
||
|
||
mock_agent.astream = mock_astream
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = executor.execute("Task")
|
||
|
||
assert len(async_tool_calls) == 1
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Thread Safety Tests
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
class TestThreadSafety:
|
||
"""Test thread safety of executor operations."""
|
||
|
||
def test_multiple_executors_in_parallel(self, classes, base_config, msg):
|
||
"""Test multiple executors running in parallel via thread pool."""
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
results = []
|
||
|
||
def execute_task(task_id: int):
|
||
def make_astream(*args, **kwargs):
|
||
return async_iterator(
|
||
[
|
||
{
|
||
"messages": [
|
||
msg.human(f"Task {task_id}"),
|
||
msg.ai(f"Result {task_id}", f"msg-{task_id}"),
|
||
]
|
||
}
|
||
]
|
||
)
|
||
|
||
mock_agent = MagicMock()
|
||
mock_agent.astream = make_astream
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id=f"thread-{task_id}",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
return executor.execute(f"Task {task_id}")
|
||
|
||
# Execute multiple tasks in parallel
|
||
with ThreadPoolExecutor(max_workers=3) as pool:
|
||
futures = [pool.submit(execute_task, i) for i in range(5)]
|
||
for future in as_completed(futures):
|
||
results.append(future.result())
|
||
|
||
assert len(results) == 5
|
||
for result in results:
|
||
assert result.status == SubagentStatus.COMPLETED
|
||
assert "Result" in result.result
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Cleanup Background Task Tests
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
class TestCleanupBackgroundTask:
|
||
"""Test cleanup_background_task function for race condition prevention."""
|
||
|
||
@pytest.fixture
|
||
def executor_module(self, _setup_executor_classes):
|
||
"""Import the executor module with real classes."""
|
||
# Re-import to get the real module with cleanup_background_task
|
||
import importlib
|
||
|
||
from deerflow.subagents import executor
|
||
|
||
return importlib.reload(executor)
|
||
|
||
def test_cleanup_removes_terminal_completed_task(self, executor_module, classes):
|
||
"""Test that cleanup removes a COMPLETED task."""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
# Add a completed task
|
||
task_id = "test-completed-task"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.COMPLETED,
|
||
result="done",
|
||
completed_at=datetime.now(),
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
# Cleanup should remove it
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
assert task_id not in executor_module._background_tasks
|
||
|
||
def test_cleanup_removes_terminal_failed_task(self, executor_module, classes):
|
||
"""Test that cleanup removes a FAILED task."""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-failed-task"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.FAILED,
|
||
error="error",
|
||
completed_at=datetime.now(),
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
assert task_id not in executor_module._background_tasks
|
||
|
||
def test_cleanup_removes_terminal_timed_out_task(self, executor_module, classes):
|
||
"""Test that cleanup removes a TIMED_OUT task."""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-timedout-task"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.TIMED_OUT,
|
||
error="timeout",
|
||
completed_at=datetime.now(),
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
assert task_id not in executor_module._background_tasks
|
||
|
||
def test_cleanup_skips_running_task(self, executor_module, classes):
|
||
"""Test that cleanup does NOT remove a RUNNING task.
|
||
|
||
This prevents race conditions where task_tool calls cleanup
|
||
while the background executor is still updating the task.
|
||
"""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-running-task"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.RUNNING,
|
||
started_at=datetime.now(),
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
# Should still be present because it's RUNNING
|
||
assert task_id in executor_module._background_tasks
|
||
|
||
def test_cleanup_skips_pending_task(self, executor_module, classes):
|
||
"""Test that cleanup does NOT remove a PENDING task."""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-pending-task"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.PENDING,
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
assert task_id in executor_module._background_tasks
|
||
|
||
def test_cleanup_handles_unknown_task_gracefully(self, executor_module):
|
||
"""Test that cleanup doesn't raise for unknown task IDs."""
|
||
# Should not raise
|
||
executor_module.cleanup_background_task("nonexistent-task")
|
||
|
||
def test_cleanup_removes_task_with_completed_at_even_if_running(self, executor_module, classes):
|
||
"""Test that cleanup removes task if completed_at is set, even if status is RUNNING.
|
||
|
||
This is a safety net: if completed_at is set, the task is considered done
|
||
regardless of status.
|
||
"""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-completed-at-task"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.RUNNING, # Status not terminal
|
||
completed_at=datetime.now(), # But completed_at is set
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
# Should be removed because completed_at is set
|
||
assert task_id not in executor_module._background_tasks
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Cooperative Cancellation Tests
|
||
# -----------------------------------------------------------------------------
|
||
|
||
|
||
class TestCooperativeCancellation:
|
||
"""Test cooperative cancellation via cancel_event."""
|
||
|
||
@pytest.fixture
|
||
def executor_module(self, _setup_executor_classes):
|
||
"""Import the executor module with real classes."""
|
||
import importlib
|
||
|
||
from deerflow.subagents import executor
|
||
|
||
return importlib.reload(executor)
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_cancelled_before_streaming(self, classes, base_config, mock_agent, msg):
|
||
"""Test that _aexecute returns CANCELLED when cancel_event is set before streaming."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
# The agent should never be called
|
||
call_count = 0
|
||
|
||
async def mock_astream(*args, **kwargs):
|
||
nonlocal call_count
|
||
call_count += 1
|
||
yield {"messages": [msg.human("Task"), msg.ai("Done", "msg-1")]}
|
||
|
||
mock_agent.astream = mock_astream
|
||
|
||
# Pre-create result holder with cancel_event already set
|
||
result_holder = SubagentResult(
|
||
task_id="cancel-before",
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.RUNNING,
|
||
started_at=datetime.now(),
|
||
)
|
||
result_holder.cancel_event.set()
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task", result_holder=result_holder)
|
||
|
||
assert result.status == SubagentStatus.CANCELLED
|
||
assert result.error == "Cancelled by user"
|
||
assert result.completed_at is not None
|
||
assert call_count == 0 # astream was never entered
|
||
|
||
@pytest.mark.anyio
|
||
async def test_aexecute_cancelled_mid_stream(self, classes, base_config, msg):
|
||
"""Test that _aexecute returns CANCELLED when cancel_event is set during streaming."""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
cancel_event = threading.Event()
|
||
|
||
async def mock_astream(*args, **kwargs):
|
||
yield {"messages": [msg.human("Task"), msg.ai("Partial", "msg-1")]}
|
||
# Simulate cancellation during streaming
|
||
cancel_event.set()
|
||
yield {"messages": [msg.human("Task"), msg.ai("Should not appear", "msg-2")]}
|
||
|
||
mock_agent = MagicMock()
|
||
mock_agent.astream = mock_astream
|
||
|
||
result_holder = SubagentResult(
|
||
task_id="cancel-mid",
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.RUNNING,
|
||
started_at=datetime.now(),
|
||
)
|
||
result_holder.cancel_event = cancel_event
|
||
|
||
executor = SubagentExecutor(
|
||
config=base_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
)
|
||
|
||
with patch.object(executor, "_create_agent", return_value=mock_agent):
|
||
result = await executor._aexecute("Task", result_holder=result_holder)
|
||
|
||
assert result.status == SubagentStatus.CANCELLED
|
||
assert result.error == "Cancelled by user"
|
||
assert result.completed_at is not None
|
||
|
||
def test_request_cancel_sets_event(self, executor_module, classes):
|
||
"""Test that request_cancel_background_task sets the cancel_event."""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-cancel-event"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.RUNNING,
|
||
started_at=datetime.now(),
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
assert not result.cancel_event.is_set()
|
||
|
||
executor_module.request_cancel_background_task(task_id)
|
||
|
||
assert result.cancel_event.is_set()
|
||
|
||
def test_request_cancel_nonexistent_task_is_noop(self, executor_module):
|
||
"""Test that requesting cancellation on a nonexistent task does not raise."""
|
||
executor_module.request_cancel_background_task("nonexistent-task")
|
||
|
||
def test_timeout_does_not_overwrite_cancelled(self, executor_module, classes, base_config, msg):
|
||
"""Test that the real timeout handler does not overwrite CANCELLED status.
|
||
|
||
This exercises the actual execute_async → run_task → FuturesTimeoutError
|
||
code path in executor.py. We make execute() block so the timeout fires
|
||
deterministically, pre-set the task to CANCELLED, and verify the RUNNING
|
||
guard preserves it. Uses threading.Event for synchronisation instead of
|
||
wall-clock sleeps.
|
||
"""
|
||
SubagentExecutor = classes["SubagentExecutor"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
short_config = classes["SubagentConfig"](
|
||
name="test-agent",
|
||
description="Test agent",
|
||
system_prompt="You are a test agent.",
|
||
max_turns=10,
|
||
timeout_seconds=0.05, # 50ms – just enough for the future to time out
|
||
)
|
||
|
||
# Synchronisation primitives
|
||
execute_entered = threading.Event() # signals that execute() has started
|
||
execute_release = threading.Event() # lets execute() return
|
||
run_task_done = threading.Event() # signals that run_task() has finished
|
||
|
||
# A blocking execute() replacement so we control the timing exactly
|
||
def blocking_execute(task, result_holder=None):
|
||
# Cooperative cancellation: honour cancel_event like real _aexecute
|
||
if result_holder and result_holder.cancel_event.is_set():
|
||
result_holder.status = SubagentStatus.CANCELLED
|
||
result_holder.error = "Cancelled by user"
|
||
result_holder.completed_at = datetime.now()
|
||
execute_entered.set()
|
||
return result_holder
|
||
execute_entered.set()
|
||
execute_release.wait(timeout=5)
|
||
# Return a minimal completed result (will be ignored because timeout fires first)
|
||
from deerflow.subagents.executor import SubagentResult as _R
|
||
|
||
return _R(task_id="x", trace_id="t", status=SubagentStatus.COMPLETED, result="late")
|
||
|
||
executor = SubagentExecutor(
|
||
config=short_config,
|
||
tools=[],
|
||
thread_id="test-thread",
|
||
trace_id="test-trace",
|
||
)
|
||
|
||
# Wrap _scheduler_pool.submit so we know when run_task finishes
|
||
original_scheduler_submit = executor_module._scheduler_pool.submit
|
||
|
||
def tracked_submit(fn, *args, **kwargs):
|
||
def wrapper():
|
||
try:
|
||
fn(*args, **kwargs)
|
||
finally:
|
||
run_task_done.set()
|
||
|
||
return original_scheduler_submit(wrapper)
|
||
|
||
with patch.object(executor, "execute", blocking_execute), patch.object(executor_module._scheduler_pool, "submit", tracked_submit):
|
||
task_id = executor.execute_async("Task")
|
||
|
||
# Wait until execute() is entered (i.e. it's running in _execution_pool)
|
||
assert execute_entered.wait(timeout=3), "execute() was never called"
|
||
|
||
# Set CANCELLED on the result before the timeout handler runs.
|
||
# The 50ms timeout will fire while execute() is blocked.
|
||
with executor_module._background_tasks_lock:
|
||
executor_module._background_tasks[task_id].status = SubagentStatus.CANCELLED
|
||
executor_module._background_tasks[task_id].error = "Cancelled by user"
|
||
executor_module._background_tasks[task_id].completed_at = datetime.now()
|
||
|
||
# Wait for run_task to finish — the FuturesTimeoutError handler has
|
||
# now executed and (should have) left CANCELLED intact.
|
||
assert run_task_done.wait(timeout=5), "run_task() did not finish"
|
||
|
||
# Only NOW release the blocked execute() so the thread pool worker
|
||
# can be reclaimed. This MUST come after run_task_done to avoid a
|
||
# race where execute() returns before the timeout fires.
|
||
execute_release.set()
|
||
|
||
result = executor_module._background_tasks.get(task_id)
|
||
assert result is not None
|
||
# The RUNNING guard in the FuturesTimeoutError handler must have
|
||
# preserved CANCELLED instead of overwriting with TIMED_OUT.
|
||
assert result.status.value == SubagentStatus.CANCELLED.value
|
||
assert result.error == "Cancelled by user"
|
||
assert result.completed_at is not None
|
||
|
||
def test_cleanup_removes_cancelled_task(self, executor_module, classes):
|
||
"""Test that cleanup removes a CANCELLED task (terminal state)."""
|
||
SubagentResult = classes["SubagentResult"]
|
||
SubagentStatus = classes["SubagentStatus"]
|
||
|
||
task_id = "test-cancelled-cleanup"
|
||
result = SubagentResult(
|
||
task_id=task_id,
|
||
trace_id="test-trace",
|
||
status=SubagentStatus.CANCELLED,
|
||
error="Cancelled by user",
|
||
completed_at=datetime.now(),
|
||
)
|
||
executor_module._background_tasks[task_id] = result
|
||
|
||
executor_module.cleanup_background_task(task_id)
|
||
|
||
assert task_id not in executor_module._background_tasks
|