Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions

View File

@@ -0,0 +1 @@
# DeerFlow Setup Wizard package

View File

@@ -0,0 +1,251 @@
"""LLM and search provider definitions for the Setup Wizard."""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class LLMProvider:
name: str
display_name: str
description: str
use: str
models: list[str]
default_model: str
env_var: str | None
package: str | None
# Optional: some providers use a different field name for the API key in YAML
api_key_field: str = "api_key"
# Extra config fields beyond the common ones (merged into YAML)
extra_config: dict = field(default_factory=dict)
auth_hint: str | None = None
@dataclass
class WebProvider:
name: str
display_name: str
description: str
use: str
env_var: str | None # None = no API key required
tool_name: str
extra_config: dict = field(default_factory=dict)
@dataclass
class SearchProvider:
name: str
display_name: str
description: str
use: str
env_var: str | None # None = no API key required
tool_name: str = "web_search"
extra_config: dict = field(default_factory=dict)
LLM_PROVIDERS: list[LLMProvider] = [
LLMProvider(
name="openai",
display_name="OpenAI",
description="GPT-4o, GPT-4.1, o3",
use="langchain_openai:ChatOpenAI",
models=["gpt-4o", "gpt-4.1", "o3"],
default_model="gpt-4o",
env_var="OPENAI_API_KEY",
package="langchain-openai",
),
LLMProvider(
name="anthropic",
display_name="Anthropic",
description="Claude Opus 4, Sonnet 4",
use="langchain_anthropic:ChatAnthropic",
models=["claude-opus-4-5", "claude-sonnet-4-5"],
default_model="claude-sonnet-4-5",
env_var="ANTHROPIC_API_KEY",
package="langchain-anthropic",
extra_config={"max_tokens": 8192},
),
LLMProvider(
name="deepseek",
display_name="DeepSeek",
description="V3, R1",
use="langchain_deepseek:ChatDeepSeek",
models=["deepseek-chat", "deepseek-reasoner"],
default_model="deepseek-chat",
env_var="DEEPSEEK_API_KEY",
package="langchain-deepseek",
),
LLMProvider(
name="google",
display_name="Google Gemini",
description="2.0 Flash, 2.5 Pro",
use="langchain_google_genai:ChatGoogleGenerativeAI",
models=["gemini-2.0-flash", "gemini-2.5-pro"],
default_model="gemini-2.0-flash",
env_var="GEMINI_API_KEY",
package="langchain-google-genai",
api_key_field="gemini_api_key",
),
LLMProvider(
name="openrouter",
display_name="OpenRouter",
description="OpenAI-compatible gateway with broad model catalog",
use="langchain_openai:ChatOpenAI",
models=["google/gemini-2.5-flash-preview", "openai/gpt-5-mini", "anthropic/claude-sonnet-4"],
default_model="google/gemini-2.5-flash-preview",
env_var="OPENROUTER_API_KEY",
package="langchain-openai",
extra_config={
"base_url": "https://openrouter.ai/api/v1",
"request_timeout": 600.0,
"max_retries": 2,
"max_tokens": 8192,
"temperature": 0.7,
},
),
LLMProvider(
name="vllm",
display_name="vLLM",
description="Self-hosted OpenAI-compatible serving",
use="deerflow.models.vllm_provider:VllmChatModel",
models=["Qwen/Qwen3-32B", "Qwen/Qwen2.5-Coder-32B-Instruct"],
default_model="Qwen/Qwen3-32B",
env_var="VLLM_API_KEY",
package=None,
extra_config={
"base_url": "http://localhost:8000/v1",
"request_timeout": 600.0,
"max_retries": 2,
"max_tokens": 8192,
"supports_thinking": True,
"supports_vision": False,
"when_thinking_enabled": {
"extra_body": {
"chat_template_kwargs": {
"enable_thinking": True,
}
}
},
},
),
LLMProvider(
name="codex",
display_name="Codex CLI",
description="Uses Codex CLI local auth (~/.codex/auth.json)",
use="deerflow.models.openai_codex_provider:CodexChatModel",
models=["gpt-5.4", "gpt-5-mini"],
default_model="gpt-5.4",
env_var=None,
package=None,
api_key_field="api_key",
extra_config={"supports_thinking": True, "supports_reasoning_effort": True},
auth_hint="Uses existing Codex CLI auth from ~/.codex/auth.json",
),
LLMProvider(
name="claude_code",
display_name="Claude Code OAuth",
description="Uses Claude Code local OAuth credentials",
use="deerflow.models.claude_provider:ClaudeChatModel",
models=["claude-sonnet-4-6", "claude-opus-4-1"],
default_model="claude-sonnet-4-6",
env_var=None,
package=None,
extra_config={"max_tokens": 4096, "supports_thinking": True},
auth_hint="Uses Claude Code OAuth credentials from your local machine",
),
LLMProvider(
name="other",
display_name="Other OpenAI-compatible",
description="Custom gateway with base_url and model name",
use="langchain_openai:ChatOpenAI",
models=["gpt-4o"],
default_model="gpt-4o",
env_var="OPENAI_API_KEY",
package="langchain-openai",
),
]
SEARCH_PROVIDERS: list[SearchProvider] = [
SearchProvider(
name="ddg",
display_name="DuckDuckGo (free, no key needed)",
description="No API key required",
use="deerflow.community.ddg_search.tools:web_search_tool",
env_var=None,
extra_config={"max_results": 5},
),
SearchProvider(
name="tavily",
display_name="Tavily",
description="Recommended, free tier available",
use="deerflow.community.tavily.tools:web_search_tool",
env_var="TAVILY_API_KEY",
extra_config={"max_results": 5},
),
SearchProvider(
name="infoquest",
display_name="InfoQuest",
description="Higher quality vertical search, API key required",
use="deerflow.community.infoquest.tools:web_search_tool",
env_var="INFOQUEST_API_KEY",
extra_config={"search_time_range": 10},
),
SearchProvider(
name="exa",
display_name="Exa",
description="Neural + keyword web search, API key required",
use="deerflow.community.exa.tools:web_search_tool",
env_var="EXA_API_KEY",
extra_config={
"max_results": 5,
"search_type": "auto",
"contents_max_characters": 1000,
},
),
SearchProvider(
name="firecrawl",
display_name="Firecrawl",
description="Search + crawl via Firecrawl API",
use="deerflow.community.firecrawl.tools:web_search_tool",
env_var="FIRECRAWL_API_KEY",
extra_config={"max_results": 5},
),
]
WEB_FETCH_PROVIDERS: list[WebProvider] = [
WebProvider(
name="jina_ai",
display_name="Jina AI Reader",
description="Good default reader, no API key required",
use="deerflow.community.jina_ai.tools:web_fetch_tool",
env_var=None,
tool_name="web_fetch",
extra_config={"timeout": 10},
),
WebProvider(
name="exa",
display_name="Exa",
description="API key required",
use="deerflow.community.exa.tools:web_fetch_tool",
env_var="EXA_API_KEY",
tool_name="web_fetch",
),
WebProvider(
name="infoquest",
display_name="InfoQuest",
description="API key required",
use="deerflow.community.infoquest.tools:web_fetch_tool",
env_var="INFOQUEST_API_KEY",
tool_name="web_fetch",
extra_config={"timeout": 10, "fetch_time": 10, "navigation_timeout": 30},
),
WebProvider(
name="firecrawl",
display_name="Firecrawl",
description="Search-grade crawl with markdown output, API key required",
use="deerflow.community.firecrawl.tools:web_fetch_tool",
env_var="FIRECRAWL_API_KEY",
tool_name="web_fetch",
),
]

View File

@@ -0,0 +1 @@
# Setup Wizard steps

View File

@@ -0,0 +1,51 @@
"""Step: execution mode and safety-related capabilities."""
from __future__ import annotations
from dataclasses import dataclass
from wizard.ui import ask_choice, ask_yes_no, print_header, print_info, print_warning
LOCAL_SANDBOX = "deerflow.sandbox.local:LocalSandboxProvider"
CONTAINER_SANDBOX = "deerflow.community.aio_sandbox:AioSandboxProvider"
@dataclass
class ExecutionStepResult:
sandbox_use: str
allow_host_bash: bool
include_bash_tool: bool
include_write_tools: bool
def run_execution_step(step_label: str = "Step 3/4") -> ExecutionStepResult:
print_header(f"{step_label} · Execution & Safety")
print_info("Choose how much execution power DeerFlow should have in this workspace.")
options = [
"Local sandbox — fastest, uses host filesystem paths",
"Container sandbox — more isolated, requires Docker or Apple Container",
]
sandbox_idx = ask_choice("Execution mode", options, default=0)
sandbox_use = LOCAL_SANDBOX if sandbox_idx == 0 else CONTAINER_SANDBOX
print()
if sandbox_use == LOCAL_SANDBOX:
print_warning(
"Local sandbox is convenient but not a secure shell isolation boundary."
)
print_info("Keep host bash disabled unless this is a fully trusted local workflow.")
else:
print_info("Container sandbox isolates shell execution better than host-local mode.")
include_bash_tool = ask_yes_no("Enable bash command execution?", default=False)
include_write_tools = ask_yes_no(
"Enable file write tools (write_file, str_replace)?", default=True
)
return ExecutionStepResult(
sandbox_use=sandbox_use,
allow_host_bash=sandbox_use == LOCAL_SANDBOX and include_bash_tool,
include_bash_tool=include_bash_tool,
include_write_tools=include_write_tools,
)

View File

@@ -0,0 +1,76 @@
"""Step 1: LLM provider selection."""
from __future__ import annotations
from dataclasses import dataclass
from wizard.providers import LLM_PROVIDERS, LLMProvider
from wizard.ui import (
ask_choice,
ask_secret,
ask_text,
print_header,
print_info,
print_success,
)
@dataclass
class LLMStepResult:
provider: LLMProvider
model_name: str
api_key: str | None
base_url: str | None = None
def run_llm_step(step_label: str = "Step 1/3") -> LLMStepResult:
print_header(f"{step_label} · Choose your LLM provider")
options = [f"{p.display_name} ({p.description})" for p in LLM_PROVIDERS]
idx = ask_choice("Enter choice", options)
provider = LLM_PROVIDERS[idx]
print()
# Model selection (show list, default to first)
if len(provider.models) > 1:
print_info(f"Available models for {provider.display_name}:")
model_idx = ask_choice("Select model", provider.models, default=0)
model_name = provider.models[model_idx]
else:
model_name = provider.models[0]
print()
base_url: str | None = None
if provider.name in {"openrouter", "vllm"}:
base_url = provider.extra_config.get("base_url")
if provider.name == "other":
print_header(f"{step_label} · Connection details")
base_url = ask_text("Base URL (e.g. https://api.openai.com/v1)", required=True)
model_name = ask_text("Model name", default=provider.default_model)
elif provider.auth_hint:
print_header(f"{step_label} · Authentication")
print_info(provider.auth_hint)
api_key = None
return LLMStepResult(
provider=provider,
model_name=model_name,
api_key=api_key,
base_url=base_url,
)
print_header(f"{step_label} · Enter your API Key")
if provider.env_var:
api_key = ask_secret(f"{provider.env_var}")
else:
api_key = None
if api_key:
print_success(f"Key will be saved to .env as {provider.env_var}")
return LLMStepResult(
provider=provider,
model_name=model_name,
api_key=api_key,
base_url=base_url,
)

View File

@@ -0,0 +1,66 @@
"""Step: Web search configuration."""
from __future__ import annotations
from dataclasses import dataclass
from wizard.providers import SEARCH_PROVIDERS, WEB_FETCH_PROVIDERS, SearchProvider, WebProvider
from wizard.ui import ask_choice, ask_secret, print_header, print_info, print_success
@dataclass
class SearchStepResult:
search_provider: SearchProvider | None # None = skip
search_api_key: str | None
fetch_provider: WebProvider | None # None = skip
fetch_api_key: str | None
def run_search_step(step_label: str = "Step 3/3") -> SearchStepResult:
print_header(f"{step_label} · Web Search & Fetch (optional)")
provided_keys: dict[str, str] = {}
search_options = [f"{p.display_name}{p.description}" for p in SEARCH_PROVIDERS]
search_options.append("Skip for now (agent still works without web search)")
idx = ask_choice("Choose a web search provider", search_options, default=0)
search_provider: SearchProvider | None = None
search_api_key: str | None = None
if idx >= len(SEARCH_PROVIDERS):
search_provider = None
else:
search_provider = SEARCH_PROVIDERS[idx]
if search_provider.env_var:
print()
search_api_key = ask_secret(f"{search_provider.env_var}")
provided_keys[search_provider.env_var] = search_api_key
print_success(f"Key will be saved to .env as {search_provider.env_var}")
print()
fetch_options = [f"{p.display_name}{p.description}" for p in WEB_FETCH_PROVIDERS]
fetch_options.append("Skip for now (agent can still answer without web fetch)")
idx = ask_choice("Choose a web fetch provider", fetch_options, default=0)
fetch_provider: WebProvider | None = None
fetch_api_key: str | None = None
if idx < len(WEB_FETCH_PROVIDERS):
fetch_provider = WEB_FETCH_PROVIDERS[idx]
if fetch_provider.env_var:
if fetch_provider.env_var in provided_keys:
fetch_api_key = provided_keys[fetch_provider.env_var]
print()
print_info(f"Reusing {fetch_provider.env_var} from web search provider")
else:
print()
fetch_api_key = ask_secret(f"{fetch_provider.env_var}")
provided_keys[fetch_provider.env_var] = fetch_api_key
print_success(f"Key will be saved to .env as {fetch_provider.env_var}")
return SearchStepResult(
search_provider=search_provider,
search_api_key=search_api_key,
fetch_provider=fetch_provider,
fetch_api_key=fetch_api_key,
)

View File

@@ -0,0 +1,261 @@
"""Terminal UI helpers for the Setup Wizard."""
from __future__ import annotations
import getpass
import shutil
import sys
try:
import termios
import tty
except ImportError: # pragma: no cover - non-Unix fallback
termios = None
tty = None
# ── ANSI colours ──────────────────────────────────────────────────────────────
def _supports_color() -> bool:
return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
def _c(text: str, code: str) -> str:
if _supports_color():
return f"\033[{code}m{text}\033[0m"
return text
def green(text: str) -> str:
return _c(text, "32")
def red(text: str) -> str:
return _c(text, "31")
def yellow(text: str) -> str:
return _c(text, "33")
def cyan(text: str) -> str:
return _c(text, "36")
def bold(text: str) -> str:
return _c(text, "1")
def inverse(text: str) -> str:
return _c(text, "7")
# ── UI primitives ─────────────────────────────────────────────────────────────
def print_header(title: str) -> None:
width = max(len(title) + 4, 44)
bar = "" * width
print()
print(f"{bar}")
print(f"{title.ljust(width - 2)}")
print(f"{bar}")
print()
def print_section(title: str) -> None:
print()
print(bold(f"── {title} ──"))
print()
def print_success(message: str) -> None:
print(f" {green('')} {message}")
def print_warning(message: str) -> None:
print(f" {yellow('!')} {message}")
def print_error(message: str) -> None:
print(f" {red('')} {message}")
def print_info(message: str) -> None:
print(f" {cyan('')} {message}")
def _ask_choice_with_numbers(prompt: str, options: list[str], default: int | None = None) -> int:
for i, opt in enumerate(options, 1):
marker = f" {green('*')}" if default is not None and i - 1 == default else " "
print(f"{marker} {i}. {opt}")
print()
while True:
suffix = f" [{default + 1}]" if default is not None else ""
raw = input(f"{prompt}{suffix}: ").strip()
if raw == "" and default is not None:
return default
if raw.isdigit():
idx = int(raw) - 1
if 0 <= idx < len(options):
return idx
print(f" Please enter a number between 1 and {len(options)}.")
def _supports_arrow_menu() -> bool:
return (
termios is not None
and tty is not None
and hasattr(sys.stdin, "isatty")
and hasattr(sys.stdout, "isatty")
and sys.stdin.isatty()
and sys.stdout.isatty()
and sys.stderr.isatty()
)
def _clear_rendered_lines(count: int) -> None:
if count <= 0:
return
sys.stdout.write("\x1b[2K\r")
for _ in range(count):
sys.stdout.write("\x1b[1A\x1b[2K\r")
def _read_key(fd: int) -> str:
first = sys.stdin.read(1)
if first != "\x1b":
return first
second = sys.stdin.read(1)
if second != "[":
return first
third = sys.stdin.read(1)
return f"\x1b[{third}"
def _terminal_width() -> int:
return max(shutil.get_terminal_size(fallback=(80, 24)).columns, 40)
def _truncate_line(text: str, max_width: int) -> str:
if len(text) <= max_width:
return text
if max_width <= 1:
return text[:max_width]
return f"{text[: max_width - 1]}"
def _render_choice_menu(options: list[str], selected: int) -> int:
number_width = len(str(len(options)))
menu_width = _terminal_width()
content_width = max(menu_width - 3, 20)
for i, opt in enumerate(options, 1):
line = _truncate_line(f"{i:>{number_width}}. {opt}", content_width)
if i - 1 == selected:
print(f"{green('')} {inverse(bold(line))}")
else:
print(f" {line}")
sys.stdout.flush()
return len(options)
def _ask_choice_with_arrows(prompt: str, options: list[str], default: int | None = None) -> int:
selected = default if default is not None else 0
typed = ""
fd = sys.stdin.fileno()
original_settings = termios.tcgetattr(fd)
rendered_lines = 0
try:
sys.stdout.write("\x1b[?25l")
sys.stdout.flush()
tty.setcbreak(fd)
prompt_help = f"{prompt} (↑/↓ move, Enter confirm, number quick-select)"
print(cyan(_truncate_line(prompt_help, max(_terminal_width() - 2, 20))))
while True:
if rendered_lines:
_clear_rendered_lines(rendered_lines)
rendered_lines = _render_choice_menu(options, selected)
key = _read_key(fd)
if key == "\x03":
raise KeyboardInterrupt
if key in ("\r", "\n"):
if typed:
idx = int(typed) - 1
if 0 <= idx < len(options):
selected = idx
typed = ""
break
if key == "\x1b[A":
selected = (selected - 1) % len(options)
typed = ""
continue
if key == "\x1b[B":
selected = (selected + 1) % len(options)
typed = ""
continue
if key in ("\x7f", "\b"):
typed = typed[:-1]
continue
if key.isdigit():
typed += key
continue
if rendered_lines:
_clear_rendered_lines(rendered_lines)
print(f"{prompt}: {options[selected]}")
return selected
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, original_settings)
sys.stdout.write("\x1b[?25h")
sys.stdout.flush()
def ask_choice(prompt: str, options: list[str], default: int | None = None) -> int:
"""Present a menu and return the 0-based index of the selected option."""
if _supports_arrow_menu():
return _ask_choice_with_arrows(prompt, options, default=default)
return _ask_choice_with_numbers(prompt, options, default=default)
def ask_text(prompt: str, default: str = "", required: bool = False) -> str:
"""Ask for a text value, returning default if the user presses Enter."""
suffix = f" [{default}]" if default else ""
while True:
value = input(f"{prompt}{suffix}: ").strip()
if value:
return value
if default:
return default
if not required:
return ""
print(" This field is required.")
def ask_secret(prompt: str) -> str:
"""Ask for a secret value (hidden input)."""
while True:
value = getpass.getpass(f"{prompt}: ").strip()
if value:
return value
print(" API key cannot be empty.")
def ask_yes_no(prompt: str, default: bool = True) -> bool:
"""Ask a yes/no question."""
suffix = "[Y/N]"
while True:
raw = input(f"{prompt} {suffix}: ").strip().lower()
if raw == "":
return default
if raw in ("y", "yes"):
return True
if raw in ("n", "no"):
return False
print(" Please enter y or n.")

View File

@@ -0,0 +1,290 @@
"""Config file writer for the Setup Wizard.
Writes config.yaml as a minimal working configuration and updates .env
without wiping existing user customisations where possible.
"""
from __future__ import annotations
from copy import deepcopy
from pathlib import Path
from typing import Any
import yaml
def _project_root() -> Path:
return Path(__file__).resolve().parents[2]
# ── .env helpers ──────────────────────────────────────────────────────────────
def read_env_file(env_path: Path) -> dict[str, str]:
"""Parse a .env file into a dict (ignores comments and blank lines)."""
result: dict[str, str] = {}
if not env_path.exists():
return result
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if "=" in line:
key, _, value = line.partition("=")
result[key.strip()] = value.strip()
return result
def write_env_file(env_path: Path, pairs: dict[str, str]) -> None:
"""Merge *pairs* into an existing (or new) .env file.
Existing keys are updated in place; new keys are appended.
Lines with comments and other formatting are preserved.
"""
lines: list[str] = []
if env_path.exists():
lines = env_path.read_text(encoding="utf-8").splitlines()
updated: set[str] = set()
new_lines: list[str] = []
for line in lines:
stripped = line.strip()
if stripped and not stripped.startswith("#") and "=" in stripped:
key = stripped.split("=", 1)[0].strip()
if key in pairs:
new_lines.append(f"{key}={pairs[key]}")
updated.add(key)
continue
new_lines.append(line)
for key, value in pairs.items():
if key not in updated:
new_lines.append(f"{key}={value}")
env_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
# ── config.yaml helpers ───────────────────────────────────────────────────────
def _yaml_dump(data: Any) -> str:
return yaml.safe_dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
def _default_tools() -> list[dict[str, Any]]:
return [
{"name": "image_search", "use": "deerflow.community.image_search.tools:image_search_tool", "group": "web", "max_results": 5},
{"name": "ls", "use": "deerflow.sandbox.tools:ls_tool", "group": "file:read"},
{"name": "read_file", "use": "deerflow.sandbox.tools:read_file_tool", "group": "file:read"},
{"name": "glob", "use": "deerflow.sandbox.tools:glob_tool", "group": "file:read"},
{"name": "grep", "use": "deerflow.sandbox.tools:grep_tool", "group": "file:read"},
{"name": "write_file", "use": "deerflow.sandbox.tools:write_file_tool", "group": "file:write"},
{"name": "str_replace", "use": "deerflow.sandbox.tools:str_replace_tool", "group": "file:write"},
{"name": "bash", "use": "deerflow.sandbox.tools:bash_tool", "group": "bash"},
]
def _build_tools(
*,
base_tools: list[dict[str, Any]] | None,
search_use: str | None,
search_tool_name: str,
search_extra_config: dict | None,
web_fetch_use: str | None,
web_fetch_tool_name: str,
web_fetch_extra_config: dict | None,
include_bash_tool: bool,
include_write_tools: bool,
) -> list[dict[str, Any]]:
tools = deepcopy(base_tools if base_tools is not None else _default_tools())
tools = [
tool
for tool in tools
if tool.get("name") not in {search_tool_name, web_fetch_tool_name, "write_file", "str_replace", "bash"}
]
web_group = "web"
if search_use:
search_tool: dict[str, Any] = {
"name": search_tool_name,
"use": search_use,
"group": web_group,
}
if search_extra_config:
search_tool.update(search_extra_config)
tools.insert(0, search_tool)
if web_fetch_use:
fetch_tool: dict[str, Any] = {
"name": web_fetch_tool_name,
"use": web_fetch_use,
"group": web_group,
}
if web_fetch_extra_config:
fetch_tool.update(web_fetch_extra_config)
insert_idx = 1 if search_use else 0
tools.insert(insert_idx, fetch_tool)
if include_write_tools:
tools.extend(
[
{"name": "write_file", "use": "deerflow.sandbox.tools:write_file_tool", "group": "file:write"},
{"name": "str_replace", "use": "deerflow.sandbox.tools:str_replace_tool", "group": "file:write"},
]
)
if include_bash_tool:
tools.append({"name": "bash", "use": "deerflow.sandbox.tools:bash_tool", "group": "bash"})
return tools
def _make_model_config_name(model_name: str) -> str:
"""Derive a meaningful config model name from the provider model identifier.
Replaces path separators and dots with hyphens so the result is a clean
YAML-friendly identifier (e.g. "google/gemini-2.5-pro""gemini-2-5-pro",
"gpt-5.4""gpt-5-4", "deepseek-chat""deepseek-chat").
"""
# Take only the last path component for namespaced models (e.g. "org/model-name")
base = model_name.split("/")[-1]
# Replace dots with hyphens so "gpt-5.4" → "gpt-5-4"
return base.replace(".", "-")
def build_minimal_config(
*,
provider_use: str,
model_name: str,
display_name: str,
api_key_field: str,
env_var: str | None,
extra_model_config: dict | None = None,
base_url: str | None = None,
search_use: str | None = None,
search_tool_name: str = "web_search",
search_extra_config: dict | None = None,
web_fetch_use: str | None = None,
web_fetch_tool_name: str = "web_fetch",
web_fetch_extra_config: dict | None = None,
sandbox_use: str = "deerflow.sandbox.local:LocalSandboxProvider",
allow_host_bash: bool = False,
include_bash_tool: bool = False,
include_write_tools: bool = True,
config_version: int = 5,
base_config: dict[str, Any] | None = None,
) -> str:
"""Build the content of a minimal config.yaml."""
from datetime import date
today = date.today().isoformat()
model_entry: dict[str, Any] = {
"name": _make_model_config_name(model_name),
"display_name": display_name,
"use": provider_use,
"model": model_name,
}
if env_var:
model_entry[api_key_field] = f"${env_var}"
extra_model_fields = dict(extra_model_config or {})
if "base_url" in extra_model_fields and not base_url:
base_url = extra_model_fields.pop("base_url")
if base_url:
model_entry["base_url"] = base_url
if extra_model_fields:
model_entry.update(extra_model_fields)
data: dict[str, Any] = deepcopy(base_config or {})
data["config_version"] = config_version
data["models"] = [model_entry]
base_tools = data.get("tools")
if not isinstance(base_tools, list):
base_tools = None
tools = _build_tools(
base_tools=base_tools,
search_use=search_use,
search_tool_name=search_tool_name,
search_extra_config=search_extra_config,
web_fetch_use=web_fetch_use,
web_fetch_tool_name=web_fetch_tool_name,
web_fetch_extra_config=web_fetch_extra_config,
include_bash_tool=include_bash_tool,
include_write_tools=include_write_tools,
)
data["tools"] = tools
sandbox_config = deepcopy(data.get("sandbox") if isinstance(data.get("sandbox"), dict) else {})
sandbox_config["use"] = sandbox_use
if sandbox_use == "deerflow.sandbox.local:LocalSandboxProvider":
sandbox_config["allow_host_bash"] = allow_host_bash
else:
sandbox_config.pop("allow_host_bash", None)
data["sandbox"] = sandbox_config
header = (
f"# DeerFlow Configuration\n"
f"# Generated by 'make setup' on {today}\n"
f"# Run 'make setup' to reconfigure, or edit this file for advanced options.\n"
f"# Full reference: config.example.yaml\n\n"
)
return header + _yaml_dump(data)
def write_config_yaml(
config_path: Path,
*,
provider_use: str,
model_name: str,
display_name: str,
api_key_field: str,
env_var: str | None,
extra_model_config: dict | None = None,
base_url: str | None = None,
search_use: str | None = None,
search_tool_name: str = "web_search",
search_extra_config: dict | None = None,
web_fetch_use: str | None = None,
web_fetch_tool_name: str = "web_fetch",
web_fetch_extra_config: dict | None = None,
sandbox_use: str = "deerflow.sandbox.local:LocalSandboxProvider",
allow_host_bash: bool = False,
include_bash_tool: bool = False,
include_write_tools: bool = True,
) -> None:
"""Write (or overwrite) config.yaml with a minimal working configuration."""
# Read config_version from config.example.yaml if present
config_version = 5
example_path = config_path.parent / "config.example.yaml"
if example_path.exists():
try:
import yaml as _yaml
raw = _yaml.safe_load(example_path.read_text(encoding="utf-8")) or {}
config_version = int(raw.get("config_version", 5))
example_defaults = raw
except Exception:
example_defaults = None
else:
example_defaults = None
content = build_minimal_config(
provider_use=provider_use,
model_name=model_name,
display_name=display_name,
api_key_field=api_key_field,
env_var=env_var,
extra_model_config=extra_model_config,
base_url=base_url,
search_use=search_use,
search_tool_name=search_tool_name,
search_extra_config=search_extra_config,
web_fetch_use=web_fetch_use,
web_fetch_tool_name=web_fetch_tool_name,
web_fetch_extra_config=web_fetch_extra_config,
sandbox_use=sandbox_use,
allow_host_bash=allow_host_bash,
include_bash_tool=include_bash_tool,
include_write_tools=include_write_tools,
config_version=config_version,
base_config=example_defaults,
)
config_path.write_text(content, encoding="utf-8")