Files
deerflow-factory/deer-flow/backend/app/channels/base.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

127 lines
4.6 KiB
Python

"""Abstract base class for IM channels."""
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from typing import Any
from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
logger = logging.getLogger(__name__)
class Channel(ABC):
"""Base class for all IM channel implementations.
Each channel connects to an external messaging platform and:
1. Receives messages, wraps them as InboundMessage, publishes to the bus.
2. Subscribes to outbound messages and sends replies back to the platform.
Subclasses must implement ``start``, ``stop``, and ``send``.
"""
def __init__(self, name: str, bus: MessageBus, config: dict[str, Any]) -> None:
self.name = name
self.bus = bus
self.config = config
self._running = False
@property
def is_running(self) -> bool:
return self._running
# -- lifecycle ---------------------------------------------------------
@abstractmethod
async def start(self) -> None:
"""Start listening for messages from the external platform."""
@abstractmethod
async def stop(self) -> None:
"""Gracefully stop the channel."""
# -- outbound ----------------------------------------------------------
@abstractmethod
async def send(self, msg: OutboundMessage) -> None:
"""Send a message back to the external platform.
The implementation should use ``msg.chat_id`` and ``msg.thread_ts``
to route the reply to the correct conversation/thread.
"""
async def send_file(self, msg: OutboundMessage, attachment: ResolvedAttachment) -> bool:
"""Upload a single file attachment to the platform.
Returns True if the upload succeeded, False otherwise.
Default implementation returns False (no file upload support).
"""
return False
# -- helpers -----------------------------------------------------------
def _make_inbound(
self,
chat_id: str,
user_id: str,
text: str,
*,
msg_type: InboundMessageType = InboundMessageType.CHAT,
thread_ts: str | None = None,
files: list[dict[str, Any]] | None = None,
metadata: dict[str, Any] | None = None,
) -> InboundMessage:
"""Convenience factory for creating InboundMessage instances."""
return InboundMessage(
channel_name=self.name,
chat_id=chat_id,
user_id=user_id,
text=text,
msg_type=msg_type,
thread_ts=thread_ts,
files=files or [],
metadata=metadata or {},
)
async def _on_outbound(self, msg: OutboundMessage) -> None:
"""Outbound callback registered with the bus.
Only forwards messages targeted at this channel.
Sends the text message first, then uploads any file attachments.
File uploads are skipped entirely when the text send fails to avoid
partial deliveries (files without accompanying text).
"""
if msg.channel_name == self.name:
try:
await self.send(msg)
except Exception:
logger.exception("Failed to send outbound message on channel %s", self.name)
return # Do not attempt file uploads when the text message failed
for attachment in msg.attachments:
try:
success = await self.send_file(msg, attachment)
if not success:
logger.warning("[%s] file upload skipped for %s", self.name, attachment.filename)
except Exception:
logger.exception("[%s] failed to upload file %s", self.name, attachment.filename)
async def receive_file(self, msg: InboundMessage, thread_id: str) -> InboundMessage:
"""
Optionally process and materialize inbound file attachments for this channel.
By default, this method does nothing and simply returns the original message.
Subclasses (e.g. FeishuChannel) may override this to download files (images, documents, etc)
referenced in msg.files, save them to the sandbox, and update msg.text to include
the sandbox file paths for downstream model consumption.
Args:
msg: The inbound message, possibly containing file metadata in msg.files.
thread_id: The resolved DeerFlow thread ID for sandbox path context.
Returns:
The (possibly modified) InboundMessage, with text and/or files updated as needed.
"""
return msg