Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
881 lines
34 KiB
YAML
881 lines
34 KiB
YAML
# Configuration for the DeerFlow application
|
|
#
|
|
# Guidelines:
|
|
# - Copy this file to `config.yaml` and customize it for your environment
|
|
# - The default path of this configuration file is `config.yaml` in the current working directory.
|
|
# However you can change it using the `DEER_FLOW_CONFIG_PATH` environment variable.
|
|
# - Environment variables are available for all field values. Example: `api_key: $OPENAI_API_KEY`
|
|
# - The `use` path is a string that looks like "package_name.sub_package_name.module_name:class_name/variable_name".
|
|
|
|
# ============================================================================
|
|
# Config Version (used to detect outdated config files)
|
|
# ============================================================================
|
|
# Bump this number when the config schema changes.
|
|
# Run `make config-upgrade` to merge new fields into your local config.yaml.
|
|
config_version: 6
|
|
|
|
# ============================================================================
|
|
# Logging
|
|
# ============================================================================
|
|
# Log level for deerflow modules (debug/info/warning/error)
|
|
log_level: info
|
|
|
|
# ============================================================================
|
|
# Token Usage Tracking
|
|
# ============================================================================
|
|
# Track LLM token usage per model call (input/output/total tokens)
|
|
# Logs at info level via TokenUsageMiddleware
|
|
token_usage:
|
|
enabled: false
|
|
|
|
# ============================================================================
|
|
# Models Configuration
|
|
# ============================================================================
|
|
# Configure available LLM models for the agent to use
|
|
|
|
models:
|
|
# Example: Volcengine (Doubao) model
|
|
# - name: doubao-seed-1.8
|
|
# display_name: Doubao-Seed-1.8
|
|
# use: deerflow.models.patched_deepseek:PatchedChatDeepSeek
|
|
# model: doubao-seed-1-8-251228
|
|
# api_base: https://ark.cn-beijing.volces.com/api/v3
|
|
# api_key: $VOLCENGINE_API_KEY
|
|
# timeout: 600.0
|
|
# max_retries: 2
|
|
# supports_thinking: true
|
|
# supports_vision: true
|
|
# supports_reasoning_effort: true
|
|
# when_thinking_enabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: enabled
|
|
# when_thinking_disabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: disabled
|
|
|
|
# Example: OpenAI model
|
|
# - name: gpt-4
|
|
# display_name: GPT-4
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: gpt-4
|
|
# api_key: $OPENAI_API_KEY # Use environment variable
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 4096
|
|
# temperature: 0.7
|
|
# supports_vision: true # Enable vision support for view_image tool
|
|
|
|
# Example: OpenAI Responses API model
|
|
# - name: gpt-5-responses
|
|
# display_name: GPT-5 (Responses API)
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: gpt-5
|
|
# api_key: $OPENAI_API_KEY
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# use_responses_api: true
|
|
# output_version: responses/v1
|
|
# supports_vision: true
|
|
|
|
# Example: Ollama (native provider — preserves thinking/reasoning content)
|
|
#
|
|
# IMPORTANT: Use langchain_ollama:ChatOllama instead of langchain_openai:ChatOpenAI
|
|
# for Ollama models. The OpenAI-compatible endpoint (/v1/chat/completions) does NOT
|
|
# return reasoning_content as a separate field — thinking content is either flattened
|
|
# into <think> tags or dropped entirely (ollama/ollama#15293). The native Ollama API
|
|
# (/api/chat) correctly separates thinking from response content.
|
|
#
|
|
# Install: cd backend && uv pip install 'deerflow-harness[ollama]'
|
|
#
|
|
# - name: qwen3-local
|
|
# display_name: Qwen3 32B (Ollama)
|
|
# use: langchain_ollama:ChatOllama
|
|
# model: qwen3:32b
|
|
# base_url: http://localhost:11434 # No /v1 suffix — uses native /api/chat
|
|
# num_predict: 8192
|
|
# temperature: 0.7
|
|
# reasoning: true # Passes think:true to Ollama native API
|
|
# supports_thinking: true
|
|
# supports_vision: false
|
|
#
|
|
# - name: gemma4-local
|
|
# display_name: Gemma 4 27B (Ollama)
|
|
# use: langchain_ollama:ChatOllama
|
|
# model: gemma4:27b
|
|
# base_url: http://localhost:11434
|
|
# num_predict: 8192
|
|
# temperature: 0.7
|
|
# reasoning: true
|
|
# supports_thinking: true
|
|
# supports_vision: true
|
|
#
|
|
# For Docker deployments, use host.docker.internal instead of localhost:
|
|
# base_url: http://host.docker.internal:11434
|
|
|
|
# Example: Anthropic Claude model
|
|
# - name: claude-3-5-sonnet
|
|
# display_name: Claude 3.5 Sonnet
|
|
# use: langchain_anthropic:ChatAnthropic
|
|
# model: claude-3-5-sonnet-20241022
|
|
# api_key: $ANTHROPIC_API_KEY
|
|
# default_request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 8192
|
|
# supports_vision: true # Enable vision support for view_image tool
|
|
# when_thinking_enabled:
|
|
# thinking:
|
|
# type: enabled
|
|
# when_thinking_disabled:
|
|
# thinking:
|
|
# type: disabled
|
|
|
|
# Example: Google Gemini model (native SDK, no thinking support)
|
|
# - name: gemini-2.5-pro
|
|
# display_name: Gemini 2.5 Pro
|
|
# use: langchain_google_genai:ChatGoogleGenerativeAI
|
|
# model: gemini-2.5-pro
|
|
# gemini_api_key: $GEMINI_API_KEY
|
|
# timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 8192
|
|
# supports_vision: true
|
|
|
|
# Example: Gemini model via OpenAI-compatible gateway (with thinking support)
|
|
# Use PatchedChatOpenAI so that tool-call thought_signature values on tool_calls
|
|
# are preserved across multi-turn tool-call conversations — required by the
|
|
# Gemini API when thinking is enabled. See:
|
|
# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thought-signatures
|
|
# - name: gemini-2.5-pro-thinking
|
|
# display_name: Gemini 2.5 Pro (Thinking)
|
|
# use: deerflow.models.patched_openai:PatchedChatOpenAI
|
|
# model: google/gemini-2.5-pro-preview # model name as expected by your gateway
|
|
# api_key: $GEMINI_API_KEY
|
|
# base_url: https://<your-openai-compat-gateway>/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 16384
|
|
# supports_thinking: true
|
|
# supports_vision: true
|
|
# when_thinking_enabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: enabled
|
|
# when_thinking_disabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: disabled
|
|
|
|
# Example: DeepSeek model (with thinking support)
|
|
# - name: deepseek-v3
|
|
# display_name: DeepSeek V3 (Thinking)
|
|
# use: deerflow.models.patched_deepseek:PatchedChatDeepSeek
|
|
# model: deepseek-reasoner
|
|
# api_key: $DEEPSEEK_API_KEY
|
|
# timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 8192
|
|
# supports_thinking: true
|
|
# supports_vision: false # DeepSeek V3 does not support vision
|
|
# when_thinking_enabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: enabled
|
|
# when_thinking_disabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: disabled
|
|
|
|
# Example: Kimi K2.5 model
|
|
# - name: kimi-k2.5
|
|
# display_name: Kimi K2.5
|
|
# use: deerflow.models.patched_deepseek:PatchedChatDeepSeek
|
|
# model: kimi-k2.5
|
|
# api_base: https://api.moonshot.cn/v1
|
|
# api_key: $MOONSHOT_API_KEY
|
|
# timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 32768
|
|
# supports_thinking: true
|
|
# supports_vision: true # Check your specific model's capabilities
|
|
# when_thinking_enabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: enabled
|
|
# when_thinking_disabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: disabled
|
|
|
|
# Example: Novita AI (OpenAI-compatible)
|
|
# Novita provides an OpenAI-compatible API with competitive pricing
|
|
# See: https://novita.ai
|
|
# - name: novita-deepseek-v3.2
|
|
# display_name: Novita DeepSeek V3.2
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: deepseek/deepseek-v3.2
|
|
# api_key: $NOVITA_API_KEY
|
|
# base_url: https://api.novita.ai/openai
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 4096
|
|
# temperature: 0.7
|
|
# supports_thinking: true
|
|
# supports_vision: true
|
|
# when_thinking_enabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: enabled
|
|
# when_thinking_disabled:
|
|
# extra_body:
|
|
# thinking:
|
|
# type: disabled
|
|
|
|
# Example: MiniMax (OpenAI-compatible) - International Edition
|
|
# MiniMax provides high-performance models with 204K context window
|
|
# Docs: https://platform.minimax.io/docs/api-reference/text-openai-api
|
|
# - name: minimax-m2.5
|
|
# display_name: MiniMax M2.5
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: MiniMax-M2.5
|
|
# api_key: $MINIMAX_API_KEY
|
|
# base_url: https://api.minimax.io/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 4096
|
|
# temperature: 1.0 # MiniMax requires temperature in (0.0, 1.0]
|
|
# supports_vision: true
|
|
# supports_thinking: true
|
|
|
|
# - name: minimax-m2.5-highspeed
|
|
# display_name: MiniMax M2.5 Highspeed
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: MiniMax-M2.5-highspeed
|
|
# api_key: $MINIMAX_API_KEY
|
|
# base_url: https://api.minimax.io/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 4096
|
|
# temperature: 1.0 # MiniMax requires temperature in (0.0, 1.0]
|
|
# supports_vision: true
|
|
# supports_thinking: true
|
|
|
|
# Example: MiniMax (OpenAI-compatible) - CN 中国区用户
|
|
# MiniMax provides high-performance models with 204K context window
|
|
# Docs: https://platform.minimaxi.com/docs/api-reference/text-openai-api
|
|
# - name: minimax-m2.7
|
|
# display_name: MiniMax M2.7
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: MiniMax-M2.7
|
|
# api_key: $MINIMAX_API_KEY
|
|
# base_url: https://api.minimaxi.com/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 4096
|
|
# temperature: 1.0 # MiniMax requires temperature in (0.0, 1.0]
|
|
# supports_vision: true
|
|
# supports_thinking: true
|
|
|
|
# - name: minimax-m2.7-highspeed
|
|
# display_name: MiniMax M2.7 Highspeed
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: MiniMax-M2.7-highspeed
|
|
# api_key: $MINIMAX_API_KEY
|
|
# base_url: https://api.minimaxi.com/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 4096
|
|
# temperature: 1.0 # MiniMax requires temperature in (0.0, 1.0]
|
|
# supports_vision: true
|
|
# supports_thinking: true
|
|
|
|
# Example: OpenRouter (OpenAI-compatible)
|
|
# OpenRouter models use the same ChatOpenAI + base_url pattern as other OpenAI-compatible gateways.
|
|
# - name: openrouter-gemini-2.5-flash
|
|
# display_name: Gemini 2.5 Flash (OpenRouter)
|
|
# use: langchain_openai:ChatOpenAI
|
|
# model: google/gemini-2.5-flash-preview
|
|
# api_key: $OPENAI_API_KEY
|
|
# base_url: https://openrouter.ai/api/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 8192
|
|
# temperature: 0.7
|
|
|
|
# Example: vLLM 0.19.0 (OpenAI-compatible, with reasoning toggle)
|
|
# DeerFlow's vLLM provider preserves vLLM reasoning across tool-call turns and
|
|
# toggles Qwen-style reasoning by writing
|
|
# extra_body.chat_template_kwargs.enable_thinking=true/false.
|
|
# Some reasoning models also require the server to be started with
|
|
# `vllm serve ... --reasoning-parser <parser>`.
|
|
# - name: qwen3-32b-vllm
|
|
# display_name: Qwen3 32B (vLLM)
|
|
# use: deerflow.models.vllm_provider:VllmChatModel
|
|
# model: Qwen/Qwen3-32B
|
|
# api_key: $VLLM_API_KEY
|
|
# base_url: http://localhost:8000/v1
|
|
# request_timeout: 600.0
|
|
# max_retries: 2
|
|
# max_tokens: 8192
|
|
# supports_thinking: true
|
|
# supports_vision: false
|
|
# when_thinking_enabled:
|
|
# extra_body:
|
|
# chat_template_kwargs:
|
|
# enable_thinking: true
|
|
|
|
# ============================================================================
|
|
# Tool Groups Configuration
|
|
# ============================================================================
|
|
# Define groups of tools for organization and access control
|
|
|
|
tool_groups:
|
|
- name: web
|
|
- name: file:read
|
|
- name: file:write
|
|
- name: bash
|
|
|
|
# ============================================================================
|
|
# Tools Configuration
|
|
# ============================================================================
|
|
# Configure available tools for the agent to use
|
|
|
|
tools:
|
|
# Web search tool (uses DuckDuckGo, no API key required)
|
|
- name: web_search
|
|
group: web
|
|
use: deerflow.community.ddg_search.tools:web_search_tool
|
|
max_results: 5
|
|
|
|
# Web search tool (requires Tavily API key)
|
|
# - name: web_search
|
|
# group: web
|
|
# use: deerflow.community.tavily.tools:web_search_tool
|
|
# max_results: 5
|
|
# # api_key: $TAVILY_API_KEY # Set if needed
|
|
|
|
# Web search tool (uses InfoQuest, requires InfoQuest API key)
|
|
# - name: web_search
|
|
# group: web
|
|
# use: deerflow.community.infoquest.tools:web_search_tool
|
|
# # Used to limit the scope of search results, only returns content within the specified time range. Set to -1 to disable time filtering
|
|
# search_time_range: 10
|
|
|
|
# Web search tool (uses Exa, requires EXA_API_KEY)
|
|
# - name: web_search
|
|
# group: web
|
|
# use: deerflow.community.exa.tools:web_search_tool
|
|
# max_results: 5
|
|
# search_type: auto # Options: auto, neural, keyword
|
|
# contents_max_characters: 1000
|
|
# # api_key: $EXA_API_KEY
|
|
|
|
# Web search tool (uses Firecrawl, requires FIRECRAWL_API_KEY)
|
|
# - name: web_search
|
|
# group: web
|
|
# use: deerflow.community.firecrawl.tools:web_search_tool
|
|
# max_results: 5
|
|
# # api_key: $FIRECRAWL_API_KEY
|
|
|
|
# Web fetch tool (uses Exa)
|
|
# NOTE: Only one web_fetch provider can be active at a time.
|
|
# Comment out the Jina AI web_fetch entry below before enabling this one.
|
|
# - name: web_fetch
|
|
# group: web
|
|
# use: deerflow.community.exa.tools:web_fetch_tool
|
|
# # api_key: $EXA_API_KEY
|
|
|
|
# Web fetch tool (uses Jina AI reader)
|
|
- name: web_fetch
|
|
group: web
|
|
use: deerflow.community.jina_ai.tools:web_fetch_tool
|
|
timeout: 10
|
|
|
|
# Web fetch tool (uses InfoQuest)
|
|
# - name: web_fetch
|
|
# group: web
|
|
# use: deerflow.community.infoquest.tools:web_fetch_tool
|
|
# # Overall timeout for the entire crawling process (in seconds). Set to positive value to enable, -1 to disable
|
|
# timeout: 10
|
|
# # Waiting time after page loading (in seconds). Set to positive value to enable, -1 to disable
|
|
# fetch_time: 10
|
|
# # Timeout for navigating to the page (in seconds). Set to positive value to enable, -1 to disable
|
|
# navigation_timeout: 30
|
|
|
|
# Web fetch tool (uses Firecrawl, requires FIRECRAWL_API_KEY)
|
|
# - name: web_fetch
|
|
# group: web
|
|
# use: deerflow.community.firecrawl.tools:web_fetch_tool
|
|
# # api_key: $FIRECRAWL_API_KEY
|
|
|
|
# Image search tool (uses DuckDuckGo)
|
|
# Use this to find reference images before image generation
|
|
- name: image_search
|
|
group: web
|
|
use: deerflow.community.image_search.tools:image_search_tool
|
|
max_results: 5
|
|
|
|
# Image search tool (uses InfoQuest)
|
|
# - name: image_search
|
|
# group: web
|
|
# use: deerflow.community.infoquest.tools:image_search_tool
|
|
# # Used to limit the scope of image search results, only returns content within the specified time range. Set to -1 to disable time filtering
|
|
# image_search_time_range: 10
|
|
# # Image size filter. Options: "l" (large), "m" (medium), "i" (icon).
|
|
# image_size: "i"
|
|
|
|
# File operations tools
|
|
- name: ls
|
|
group: file:read
|
|
use: deerflow.sandbox.tools:ls_tool
|
|
|
|
- name: read_file
|
|
group: file:read
|
|
use: deerflow.sandbox.tools:read_file_tool
|
|
|
|
- name: glob
|
|
group: file:read
|
|
use: deerflow.sandbox.tools:glob_tool
|
|
max_results: 200
|
|
|
|
- name: grep
|
|
group: file:read
|
|
use: deerflow.sandbox.tools:grep_tool
|
|
max_results: 100
|
|
|
|
- name: write_file
|
|
group: file:write
|
|
use: deerflow.sandbox.tools:write_file_tool
|
|
|
|
- name: str_replace
|
|
group: file:write
|
|
use: deerflow.sandbox.tools:str_replace_tool
|
|
|
|
# Bash execution tool
|
|
# Active only when using an isolated shell sandbox or when
|
|
# sandbox.allow_host_bash: true explicitly opts into host bash.
|
|
- name: bash
|
|
group: bash
|
|
use: deerflow.sandbox.tools:bash_tool
|
|
|
|
# ============================================================================
|
|
# Tool Search Configuration (Deferred Tool Loading)
|
|
# ============================================================================
|
|
# When enabled, MCP tools are not loaded into the agent's context directly.
|
|
# Instead, they are listed by name in the system prompt and discoverable
|
|
# via the tool_search tool at runtime.
|
|
# This reduces context usage and improves tool selection accuracy when
|
|
# multiple MCP servers expose a large number of tools.
|
|
|
|
tool_search:
|
|
enabled: false
|
|
|
|
# ============================================================================
|
|
# Sandbox Configuration
|
|
# ============================================================================
|
|
# Choose between local sandbox (direct execution) or Docker-based AIO sandbox
|
|
|
|
# Option 1: Local Sandbox (Default)
|
|
# Executes commands directly on the host machine
|
|
uploads:
|
|
# PDF-to-Markdown converter used when a PDF is uploaded.
|
|
# auto — prefer pymupdf4llm when installed; fall back to MarkItDown for
|
|
# image-based or encrypted PDFs (recommended default).
|
|
# pymupdf4llm — always use pymupdf4llm (must be installed: uv add pymupdf4llm).
|
|
# Better heading/table extraction; faster on most files.
|
|
# markitdown — always use MarkItDown (original behaviour, no extra dependency).
|
|
pdf_converter: auto
|
|
|
|
sandbox:
|
|
use: deerflow.sandbox.local:LocalSandboxProvider
|
|
# Host bash execution is disabled by default because LocalSandboxProvider is
|
|
# not a secure isolation boundary for shell access. Enable only for fully
|
|
# trusted, single-user local workflows.
|
|
allow_host_bash: false
|
|
# Optional: Mount additional host directories into the sandbox.
|
|
# Each mount maps a host path to a virtual container path accessible by the agent.
|
|
# mounts:
|
|
# - host_path: /home/user/my-project # Absolute path on the host machine
|
|
# container_path: /mnt/my-project # Virtual path inside the sandbox
|
|
# read_only: true # Whether the mount is read-only (default: false)
|
|
|
|
# Tool output truncation limits (characters).
|
|
# bash uses middle-truncation (head + tail) since errors can appear anywhere in the output.
|
|
# read_file and ls use head-truncation since their content is front-loaded.
|
|
# Set to 0 to disable truncation.
|
|
bash_output_max_chars: 20000
|
|
read_file_output_max_chars: 50000
|
|
ls_output_max_chars: 20000
|
|
|
|
# Option 2: Container-based AIO Sandbox
|
|
# Executes commands in isolated containers (Docker or Apple Container)
|
|
# On macOS: Automatically prefers Apple Container if available, falls back to Docker
|
|
# On other platforms: Uses Docker
|
|
# Uncomment to use:
|
|
# sandbox:
|
|
# use: deerflow.community.aio_sandbox:AioSandboxProvider
|
|
#
|
|
# # Optional: Container image to use (works with both Docker and Apple Container)
|
|
# # Default: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
|
|
# # Recommended: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest (works on both x86_64 and arm64)
|
|
# # image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
|
|
#
|
|
# # Optional: Base port for sandbox containers (default: 8080)
|
|
# # port: 8080
|
|
|
|
# # Optional: Maximum number of concurrent sandbox containers (default: 3)
|
|
# # When the limit is reached the least-recently-used sandbox is evicted to
|
|
# # make room for new ones. Use a positive integer here; omit this field to use the default.
|
|
# # replicas: 3
|
|
#
|
|
# # Optional: Prefix for container names (default: deer-flow-sandbox)
|
|
# # container_prefix: deer-flow-sandbox
|
|
#
|
|
# # Optional: Additional mount directories from host to container
|
|
# # NOTE: Skills directory is automatically mounted from skills.path to skills.container_path
|
|
# # mounts:
|
|
# # # Other custom mounts
|
|
# # - host_path: /path/on/host
|
|
# # container_path: /home/user/shared
|
|
# # read_only: false
|
|
# #
|
|
# # # DeerFlow will surface configured container_path values to the agent,
|
|
# # # so it can directly read/write mounted directories such as /home/user/shared
|
|
#
|
|
# # Optional: Environment variables to inject into the sandbox container
|
|
# # Values starting with $ will be resolved from host environment variables
|
|
# # environment:
|
|
# # NODE_ENV: production
|
|
# # DEBUG: "false"
|
|
# # API_KEY: $MY_API_KEY # Reads from host's MY_API_KEY env var
|
|
# # DATABASE_URL: $DATABASE_URL # Reads from host's DATABASE_URL env var
|
|
|
|
# Option 3: Provisioner-managed AIO Sandbox (docker-compose-dev)
|
|
# Each sandbox_id gets a dedicated Pod in k3s, managed by the provisioner.
|
|
# Recommended for production or advanced users who want better isolation and scalability.:
|
|
# sandbox:
|
|
# use: deerflow.community.aio_sandbox:AioSandboxProvider
|
|
# provisioner_url: http://provisioner:8002
|
|
|
|
# ============================================================================
|
|
# Subagents Configuration
|
|
# ============================================================================
|
|
# Configure timeouts for subagent execution
|
|
# Subagents are background workers delegated tasks by the lead agent
|
|
|
|
# subagents:
|
|
# # Default timeout in seconds for all subagents (default: 900 = 15 minutes)
|
|
# timeout_seconds: 900
|
|
# # Optional global max-turn override for all subagents
|
|
# # max_turns: 120
|
|
#
|
|
# # Optional per-agent overrides
|
|
# agents:
|
|
# general-purpose:
|
|
# timeout_seconds: 1800 # 30 minutes for complex multi-step tasks
|
|
# max_turns: 160
|
|
# bash:
|
|
# timeout_seconds: 300 # 5 minutes for quick command execution
|
|
# max_turns: 80
|
|
|
|
# ============================================================================
|
|
# ACP Agents Configuration
|
|
# ============================================================================
|
|
# Configure external ACP-compatible agents for the built-in `invoke_acp_agent` tool.
|
|
|
|
# acp_agents:
|
|
# claude_code:
|
|
# # DeerFlow expects an ACP adapter here. The standard `claude` CLI does not
|
|
# # speak ACP directly. Install `claude-agent-acp` separately or use:
|
|
# command: npx
|
|
# args: ["-y", "@zed-industries/claude-agent-acp"]
|
|
# description: Claude Code for implementation, refactoring, and debugging
|
|
# model: null
|
|
# # auto_approve_permissions: false # Set to true to auto-approve ACP permission requests
|
|
# # env: # Optional: inject environment variables into the agent subprocess
|
|
# # ANTHROPIC_API_KEY: $ANTHROPIC_API_KEY # $VAR resolves from host environment
|
|
#
|
|
# codex:
|
|
# # DeerFlow expects an ACP adapter here. The standard `codex` CLI does not
|
|
# # speak ACP directly. Install `codex-acp` separately or use:
|
|
# command: npx
|
|
# args: ["-y", "@zed-industries/codex-acp"]
|
|
# description: Codex CLI for repository tasks and code generation
|
|
# model: null
|
|
# # auto_approve_permissions: false # Set to true to auto-approve ACP permission requests
|
|
# # env: # Optional: inject environment variables into the agent subprocess
|
|
# # OPENAI_API_KEY: $OPENAI_API_KEY # $VAR resolves from host environment
|
|
|
|
# ============================================================================
|
|
# Skills Configuration
|
|
# ============================================================================
|
|
# Configure skills directory for specialized agent workflows
|
|
|
|
skills:
|
|
# Path to skills directory on the host (relative to project root or absolute)
|
|
# Default: ../skills (relative to backend directory)
|
|
# Uncomment to customize:
|
|
# path: /absolute/path/to/custom/skills
|
|
|
|
# Path where skills are mounted in the sandbox container
|
|
# This is used by the agent to access skills in both local and Docker sandbox
|
|
# Default: /mnt/skills
|
|
container_path: /mnt/skills
|
|
|
|
# Note: To restrict which skills are loaded for a specific custom agent,
|
|
# define a `skills` list in that agent's `config.yaml` (e.g. `agents/my-agent/config.yaml`):
|
|
# - Omitted or null: load all globally enabled skills (default)
|
|
# - []: disable all skills for this agent
|
|
# - ["skill-name"]: load only specific skills
|
|
|
|
# ============================================================================
|
|
# Title Generation Configuration
|
|
# ============================================================================
|
|
# Automatic conversation title generation settings
|
|
|
|
title:
|
|
enabled: true
|
|
max_words: 6
|
|
max_chars: 60
|
|
model_name: null # Use default model (first model in models list)
|
|
|
|
# ============================================================================
|
|
# Summarization Configuration
|
|
# ============================================================================
|
|
# Automatically summarize conversation history when token limits are approached
|
|
# This helps maintain context in long conversations without exceeding model limits
|
|
|
|
summarization:
|
|
enabled: true
|
|
|
|
# Model to use for summarization (null = use default model)
|
|
# Recommended: Use a lightweight, cost-effective model like "gpt-4o-mini" or similar
|
|
model_name: null
|
|
|
|
# Trigger conditions - at least one required
|
|
# Summarization runs when ANY threshold is met (OR logic)
|
|
# You can specify a single trigger or a list of triggers
|
|
trigger:
|
|
# Trigger when token count reaches 15564
|
|
- type: tokens
|
|
value: 15564
|
|
# Uncomment to also trigger when message count reaches 50
|
|
# - type: messages
|
|
# value: 50
|
|
# Uncomment to trigger when 80% of model's max input tokens is reached
|
|
# - type: fraction
|
|
# value: 0.8
|
|
|
|
# Context retention policy after summarization
|
|
# Specifies how much recent history to preserve
|
|
keep:
|
|
# Keep the most recent 10 messages (recommended)
|
|
type: messages
|
|
value: 10
|
|
# Alternative: Keep specific token count
|
|
# type: tokens
|
|
# value: 3000
|
|
# Alternative: Keep percentage of model's max input tokens
|
|
# type: fraction
|
|
# value: 0.3
|
|
|
|
# Maximum tokens to keep when preparing messages for summarization
|
|
# Set to null to skip trimming (not recommended for very long conversations)
|
|
trim_tokens_to_summarize: 15564
|
|
|
|
# Custom summary prompt template (null = use default LangChain prompt)
|
|
# The prompt should guide the model to extract important context
|
|
summary_prompt: null
|
|
|
|
# ============================================================================
|
|
# Memory Configuration
|
|
# ============================================================================
|
|
# Global memory mechanism
|
|
# Stores user context and conversation history for personalized responses
|
|
memory:
|
|
enabled: true
|
|
storage_path: memory.json # Path relative to backend directory
|
|
debounce_seconds: 30 # Wait time before processing queued updates
|
|
model_name: null # Use default model
|
|
max_facts: 100 # Maximum number of facts to store
|
|
fact_confidence_threshold: 0.7 # Minimum confidence for storing facts
|
|
injection_enabled: true # Whether to inject memory into system prompt
|
|
max_injection_tokens: 2000 # Maximum tokens for memory injection
|
|
|
|
# ============================================================================
|
|
# Skill Self-Evolution Configuration
|
|
# ============================================================================
|
|
# Allow the agent to autonomously create and improve skills in skills/custom/.
|
|
skill_evolution:
|
|
enabled: false # Set to true to allow agent-managed writes under skills/custom
|
|
moderation_model_name: null # Model for LLM-based security scanning (null = use default model)
|
|
|
|
# ============================================================================
|
|
# Checkpointer Configuration
|
|
# ============================================================================
|
|
# Configure state persistence for the embedded DeerFlowClient.
|
|
# The LangGraph Server manages its own state persistence separately
|
|
# via the server infrastructure (this setting does not affect it).
|
|
#
|
|
# When configured, DeerFlowClient will automatically use this checkpointer,
|
|
# enabling multi-turn conversations to persist across process restarts.
|
|
#
|
|
# Supported types:
|
|
# memory - In-process only. State is lost when the process exits. (default)
|
|
# sqlite - File-based SQLite persistence. Survives restarts.
|
|
# Requires: uv add langgraph-checkpoint-sqlite
|
|
# postgres - PostgreSQL persistence. Suitable for multi-process deployments.
|
|
# Requires: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool
|
|
#
|
|
# Examples:
|
|
#
|
|
# In-memory (default when omitted — no persistence):
|
|
# checkpointer:
|
|
# type: memory
|
|
#
|
|
# SQLite (file-based, single-process):
|
|
checkpointer:
|
|
type: sqlite
|
|
connection_string: checkpoints.db
|
|
#
|
|
# PostgreSQL (multi-process, production):
|
|
# checkpointer:
|
|
# type: postgres
|
|
# connection_string: postgresql://user:password@localhost:5432/deerflow
|
|
|
|
# ============================================================================
|
|
# IM Channels Configuration
|
|
# ============================================================================
|
|
# Connect DeerFlow to external messaging platforms.
|
|
# All channels use outbound connections (WebSocket or polling) — no public IP required.
|
|
|
|
# channels:
|
|
# # LangGraph Server URL for thread/message management (default: http://localhost:2024)
|
|
# # For Docker deployments, use the Docker service name instead of localhost:
|
|
# # langgraph_url: http://langgraph:2024
|
|
# # gateway_url: http://gateway:8001
|
|
# langgraph_url: http://localhost:2024
|
|
# # Gateway API URL for auxiliary queries like /models, /memory (default: http://localhost:8001)
|
|
# gateway_url: http://localhost:8001
|
|
# #
|
|
# # Docker Compose note:
|
|
# # If channels run inside the gateway container, use container DNS names instead
|
|
# # of localhost, for example:
|
|
# # langgraph_url: http://langgraph:2024
|
|
# # gateway_url: http://gateway:8001
|
|
# # You can also set DEER_FLOW_CHANNELS_LANGGRAPH_URL / DEER_FLOW_CHANNELS_GATEWAY_URL.
|
|
#
|
|
# # Optional: default mobile/session settings for all IM channels
|
|
# session:
|
|
# assistant_id: lead_agent # or a custom agent name; custom agents route via lead_agent + agent_name
|
|
# config:
|
|
# recursion_limit: 100
|
|
# context:
|
|
# thinking_enabled: true
|
|
# is_plan_mode: false
|
|
# subagent_enabled: false
|
|
#
|
|
# feishu:
|
|
# enabled: false
|
|
# app_id: $FEISHU_APP_ID
|
|
# app_secret: $FEISHU_APP_SECRET
|
|
# # domain: https://open.feishu.cn # China (default)
|
|
# # domain: https://open.larksuite.com # International
|
|
#
|
|
# slack:
|
|
# enabled: false
|
|
# bot_token: $SLACK_BOT_TOKEN # xoxb-...
|
|
# app_token: $SLACK_APP_TOKEN # xapp-... (Socket Mode)
|
|
# allowed_users: [] # empty = allow all
|
|
#
|
|
# telegram:
|
|
# enabled: false
|
|
# bot_token: $TELEGRAM_BOT_TOKEN
|
|
# allowed_users: [] # empty = allow all
|
|
#
|
|
# wechat:
|
|
# enabled: false
|
|
# bot_token: $WECHAT_BOT_TOKEN
|
|
# ilink_bot_id: $WECHAT_ILINK_BOT_ID
|
|
# # Optional: allow first-time QR bootstrap when bot_token is absent
|
|
# qrcode_login_enabled: true
|
|
# # Optional: sent as iLink-App-Id header when provided
|
|
# ilink_app_id: ""
|
|
# # Optional: sent as SKRouteTag header when provided
|
|
# route_tag: ""
|
|
# allowed_users: [] # empty = allow all
|
|
# # Optional: long-polling timeout in seconds
|
|
# polling_timeout: 35
|
|
# # Optional: QR poll interval in seconds when qrcode_login_enabled is true
|
|
# qrcode_poll_interval: 2
|
|
# # Optional: QR bootstrap timeout in seconds
|
|
# qrcode_poll_timeout: 180
|
|
# # Optional: persist getupdates cursor under the gateway container volume
|
|
# state_dir: ./.deer-flow/wechat/state
|
|
# # Optional: max inbound image size in bytes before skipping download
|
|
# max_inbound_image_bytes: 20971520
|
|
# # Optional: max outbound image size in bytes before skipping upload
|
|
# max_outbound_image_bytes: 20971520
|
|
# # Optional: max inbound file size in bytes before skipping download
|
|
# max_inbound_file_bytes: 52428800
|
|
# # Optional: max outbound file size in bytes before skipping upload
|
|
# max_outbound_file_bytes: 52428800
|
|
# # Optional: allowed file extensions for regular file receive/send
|
|
# allowed_file_extensions: [".txt", ".md", ".pdf", ".csv", ".json", ".yaml", ".yml", ".xml", ".html", ".log", ".zip", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".rtf"]
|
|
#
|
|
# # Optional: channel-level session overrides
|
|
# session:
|
|
# assistant_id: mobile-agent # custom agent names are supported here too
|
|
# context:
|
|
# thinking_enabled: false
|
|
#
|
|
# # Optional: per-user overrides by user_id
|
|
# users:
|
|
# "123456789":
|
|
# assistant_id: vip-agent
|
|
# config:
|
|
# recursion_limit: 150
|
|
# context:
|
|
# thinking_enabled: true
|
|
# subagent_enabled: true
|
|
# wecom:
|
|
# enabled: false
|
|
# bot_id: $WECOM_BOT_ID
|
|
# bot_secret: $WECOM_BOT_SECRET
|
|
|
|
# ============================================================================
|
|
# Guardrails Configuration
|
|
# ============================================================================
|
|
# Optional pre-execution authorization for tool calls.
|
|
# When enabled, every tool call passes through the configured provider
|
|
# before execution. Three options: built-in allowlist, OAP policy provider,
|
|
# or custom provider. See backend/docs/GUARDRAILS.md for full documentation.
|
|
#
|
|
# Providers are loaded by class path via resolve_variable (same as models/tools).
|
|
|
|
# --- Option 1: Built-in AllowlistProvider (zero external deps) ---
|
|
# guardrails:
|
|
# enabled: true
|
|
# provider:
|
|
# use: deerflow.guardrails.builtin:AllowlistProvider
|
|
# config:
|
|
# denied_tools: ["bash", "write_file"]
|
|
|
|
# --- Option 2: OAP passport provider (open standard, any implementation) ---
|
|
# The Open Agent Passport (OAP) spec defines passport format and decision codes.
|
|
# Any OAP-compliant provider works. Example using APort (reference implementation):
|
|
# pip install aport-agent-guardrails && aport setup --framework deerflow
|
|
# guardrails:
|
|
# enabled: true
|
|
# provider:
|
|
# use: aport_guardrails.providers.generic:OAPGuardrailProvider
|
|
|
|
# --- Option 3: Custom provider (any class with evaluate/aevaluate methods) ---
|
|
# guardrails:
|
|
# enabled: true
|
|
# provider:
|
|
# use: my_package:MyGuardrailProvider
|
|
# config:
|
|
# key: value
|