Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
127 lines
4.6 KiB
Python
127 lines
4.6 KiB
Python
"""Abstract base class for IM channels."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any
|
|
|
|
from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Channel(ABC):
|
|
"""Base class for all IM channel implementations.
|
|
|
|
Each channel connects to an external messaging platform and:
|
|
1. Receives messages, wraps them as InboundMessage, publishes to the bus.
|
|
2. Subscribes to outbound messages and sends replies back to the platform.
|
|
|
|
Subclasses must implement ``start``, ``stop``, and ``send``.
|
|
"""
|
|
|
|
def __init__(self, name: str, bus: MessageBus, config: dict[str, Any]) -> None:
|
|
self.name = name
|
|
self.bus = bus
|
|
self.config = config
|
|
self._running = False
|
|
|
|
@property
|
|
def is_running(self) -> bool:
|
|
return self._running
|
|
|
|
# -- lifecycle ---------------------------------------------------------
|
|
|
|
@abstractmethod
|
|
async def start(self) -> None:
|
|
"""Start listening for messages from the external platform."""
|
|
|
|
@abstractmethod
|
|
async def stop(self) -> None:
|
|
"""Gracefully stop the channel."""
|
|
|
|
# -- outbound ----------------------------------------------------------
|
|
|
|
@abstractmethod
|
|
async def send(self, msg: OutboundMessage) -> None:
|
|
"""Send a message back to the external platform.
|
|
|
|
The implementation should use ``msg.chat_id`` and ``msg.thread_ts``
|
|
to route the reply to the correct conversation/thread.
|
|
"""
|
|
|
|
async def send_file(self, msg: OutboundMessage, attachment: ResolvedAttachment) -> bool:
|
|
"""Upload a single file attachment to the platform.
|
|
|
|
Returns True if the upload succeeded, False otherwise.
|
|
Default implementation returns False (no file upload support).
|
|
"""
|
|
return False
|
|
|
|
# -- helpers -----------------------------------------------------------
|
|
|
|
def _make_inbound(
|
|
self,
|
|
chat_id: str,
|
|
user_id: str,
|
|
text: str,
|
|
*,
|
|
msg_type: InboundMessageType = InboundMessageType.CHAT,
|
|
thread_ts: str | None = None,
|
|
files: list[dict[str, Any]] | None = None,
|
|
metadata: dict[str, Any] | None = None,
|
|
) -> InboundMessage:
|
|
"""Convenience factory for creating InboundMessage instances."""
|
|
return InboundMessage(
|
|
channel_name=self.name,
|
|
chat_id=chat_id,
|
|
user_id=user_id,
|
|
text=text,
|
|
msg_type=msg_type,
|
|
thread_ts=thread_ts,
|
|
files=files or [],
|
|
metadata=metadata or {},
|
|
)
|
|
|
|
async def _on_outbound(self, msg: OutboundMessage) -> None:
|
|
"""Outbound callback registered with the bus.
|
|
|
|
Only forwards messages targeted at this channel.
|
|
Sends the text message first, then uploads any file attachments.
|
|
File uploads are skipped entirely when the text send fails to avoid
|
|
partial deliveries (files without accompanying text).
|
|
"""
|
|
if msg.channel_name == self.name:
|
|
try:
|
|
await self.send(msg)
|
|
except Exception:
|
|
logger.exception("Failed to send outbound message on channel %s", self.name)
|
|
return # Do not attempt file uploads when the text message failed
|
|
|
|
for attachment in msg.attachments:
|
|
try:
|
|
success = await self.send_file(msg, attachment)
|
|
if not success:
|
|
logger.warning("[%s] file upload skipped for %s", self.name, attachment.filename)
|
|
except Exception:
|
|
logger.exception("[%s] failed to upload file %s", self.name, attachment.filename)
|
|
|
|
async def receive_file(self, msg: InboundMessage, thread_id: str) -> InboundMessage:
|
|
"""
|
|
Optionally process and materialize inbound file attachments for this channel.
|
|
|
|
By default, this method does nothing and simply returns the original message.
|
|
Subclasses (e.g. FeishuChannel) may override this to download files (images, documents, etc)
|
|
referenced in msg.files, save them to the sandbox, and update msg.text to include
|
|
the sandbox file paths for downstream model consumption.
|
|
|
|
Args:
|
|
msg: The inbound message, possibly containing file metadata in msg.files.
|
|
thread_id: The resolved DeerFlow thread ID for sandbox path context.
|
|
|
|
Returns:
|
|
The (possibly modified) InboundMessage, with text and/or files updated as needed.
|
|
"""
|
|
return msg
|