Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
174 lines
6.1 KiB
Python
174 lines
6.1 KiB
Python
"""MessageBus — async pub/sub hub that decouples channels from the agent dispatcher."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from collections.abc import Callable, Coroutine
|
|
from dataclasses import dataclass, field
|
|
from enum import StrEnum
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Message types
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class InboundMessageType(StrEnum):
|
|
"""Types of messages arriving from IM channels."""
|
|
|
|
CHAT = "chat"
|
|
COMMAND = "command"
|
|
|
|
|
|
@dataclass
|
|
class InboundMessage:
|
|
"""A message arriving from an IM channel toward the agent dispatcher.
|
|
|
|
Attributes:
|
|
channel_name: Name of the source channel (e.g. "feishu", "slack").
|
|
chat_id: Platform-specific chat/conversation identifier.
|
|
user_id: Platform-specific user identifier.
|
|
text: The message text.
|
|
msg_type: Whether this is a regular chat message or a command.
|
|
thread_ts: Optional platform thread identifier (for threaded replies).
|
|
topic_id: Conversation topic identifier used to map to a DeerFlow thread.
|
|
Messages sharing the same ``topic_id`` within a ``chat_id`` will
|
|
reuse the same DeerFlow thread. When ``None``, each message
|
|
creates a new thread (one-shot Q&A).
|
|
files: Optional list of file attachments (platform-specific dicts).
|
|
metadata: Arbitrary extra data from the channel.
|
|
created_at: Unix timestamp when the message was created.
|
|
"""
|
|
|
|
channel_name: str
|
|
chat_id: str
|
|
user_id: str
|
|
text: str
|
|
msg_type: InboundMessageType = InboundMessageType.CHAT
|
|
thread_ts: str | None = None
|
|
topic_id: str | None = None
|
|
files: list[dict[str, Any]] = field(default_factory=list)
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
created_at: float = field(default_factory=time.time)
|
|
|
|
|
|
@dataclass
|
|
class ResolvedAttachment:
|
|
"""A file attachment resolved to a host filesystem path, ready for upload.
|
|
|
|
Attributes:
|
|
virtual_path: Original virtual path (e.g. /mnt/user-data/outputs/report.pdf).
|
|
actual_path: Resolved host filesystem path.
|
|
filename: Basename of the file.
|
|
mime_type: MIME type (e.g. "application/pdf").
|
|
size: File size in bytes.
|
|
is_image: True for image/* MIME types (platforms may handle images differently).
|
|
"""
|
|
|
|
virtual_path: str
|
|
actual_path: Path
|
|
filename: str
|
|
mime_type: str
|
|
size: int
|
|
is_image: bool
|
|
|
|
|
|
@dataclass
|
|
class OutboundMessage:
|
|
"""A message from the agent dispatcher back to a channel.
|
|
|
|
Attributes:
|
|
channel_name: Target channel name (used for routing).
|
|
chat_id: Target chat/conversation identifier.
|
|
thread_id: DeerFlow thread ID that produced this response.
|
|
text: The response text.
|
|
artifacts: List of artifact paths produced by the agent.
|
|
is_final: Whether this is the final message in the response stream.
|
|
thread_ts: Optional platform thread identifier for threaded replies.
|
|
metadata: Arbitrary extra data.
|
|
created_at: Unix timestamp.
|
|
"""
|
|
|
|
channel_name: str
|
|
chat_id: str
|
|
thread_id: str
|
|
text: str
|
|
artifacts: list[str] = field(default_factory=list)
|
|
attachments: list[ResolvedAttachment] = field(default_factory=list)
|
|
is_final: bool = True
|
|
thread_ts: str | None = None
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
created_at: float = field(default_factory=time.time)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MessageBus
|
|
# ---------------------------------------------------------------------------
|
|
|
|
OutboundCallback = Callable[[OutboundMessage], Coroutine[Any, Any, None]]
|
|
|
|
|
|
class MessageBus:
|
|
"""Async pub/sub hub connecting channels and the agent dispatcher.
|
|
|
|
Channels publish inbound messages; the dispatcher consumes them.
|
|
The dispatcher publishes outbound messages; channels receive them
|
|
via registered callbacks.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._inbound_queue: asyncio.Queue[InboundMessage] = asyncio.Queue()
|
|
self._outbound_listeners: list[OutboundCallback] = []
|
|
|
|
# -- inbound -----------------------------------------------------------
|
|
|
|
async def publish_inbound(self, msg: InboundMessage) -> None:
|
|
"""Enqueue an inbound message from a channel."""
|
|
await self._inbound_queue.put(msg)
|
|
logger.info(
|
|
"[Bus] inbound enqueued: channel=%s, chat_id=%s, type=%s, queue_size=%d",
|
|
msg.channel_name,
|
|
msg.chat_id,
|
|
msg.msg_type.value,
|
|
self._inbound_queue.qsize(),
|
|
)
|
|
|
|
async def get_inbound(self) -> InboundMessage:
|
|
"""Block until the next inbound message is available."""
|
|
return await self._inbound_queue.get()
|
|
|
|
@property
|
|
def inbound_queue(self) -> asyncio.Queue[InboundMessage]:
|
|
return self._inbound_queue
|
|
|
|
# -- outbound ----------------------------------------------------------
|
|
|
|
def subscribe_outbound(self, callback: OutboundCallback) -> None:
|
|
"""Register an async callback for outbound messages."""
|
|
self._outbound_listeners.append(callback)
|
|
|
|
def unsubscribe_outbound(self, callback: OutboundCallback) -> None:
|
|
"""Remove a previously registered outbound callback."""
|
|
self._outbound_listeners = [cb for cb in self._outbound_listeners if cb is not callback]
|
|
|
|
async def publish_outbound(self, msg: OutboundMessage) -> None:
|
|
"""Dispatch an outbound message to all registered listeners."""
|
|
logger.info(
|
|
"[Bus] outbound dispatching: channel=%s, chat_id=%s, listeners=%d, text_len=%d",
|
|
msg.channel_name,
|
|
msg.chat_id,
|
|
len(self._outbound_listeners),
|
|
len(msg.text),
|
|
)
|
|
for callback in self._outbound_listeners:
|
|
try:
|
|
await callback(msg)
|
|
except Exception:
|
|
logger.exception("Error in outbound callback for channel=%s", msg.channel_name)
|