Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
253
deer-flow/docker/docker-compose-dev.yaml
Normal file
253
deer-flow/docker/docker-compose-dev.yaml
Normal file
@@ -0,0 +1,253 @@
|
||||
# DeerFlow Development Environment
|
||||
# Usage: docker-compose -f docker-compose-dev.yaml up --build
|
||||
#
|
||||
# Services:
|
||||
# - nginx: Reverse proxy (port 2026)
|
||||
# - frontend: Frontend Next.js dev server (port 3000)
|
||||
# - gateway: Backend Gateway API (port 8001)
|
||||
# - langgraph: LangGraph server (port 2024)
|
||||
# - provisioner (optional): Sandbox provisioner (creates Pods in host Kubernetes)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Kubernetes cluster + kubeconfig are only required when using provisioner mode.
|
||||
#
|
||||
# Access: http://localhost:2026
|
||||
|
||||
services:
|
||||
# ── Sandbox Provisioner ────────────────────────────────────────────────
|
||||
# Manages per-sandbox Pod + Service lifecycle in the host Kubernetes
|
||||
# cluster via the K8s API.
|
||||
# Backend accesses sandboxes directly via host.docker.internal:{NodePort}.
|
||||
provisioner:
|
||||
build:
|
||||
context: ./provisioner
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
container_name: deer-flow-provisioner
|
||||
volumes:
|
||||
- ~/.kube/config:/root/.kube/config:ro
|
||||
environment:
|
||||
- K8S_NAMESPACE=deer-flow
|
||||
- SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
|
||||
# Host paths for K8s HostPath volumes (must be absolute paths accessible by K8s node)
|
||||
# On Docker Desktop/OrbStack, use your actual host paths like /Users/username/...
|
||||
# Set these in your shell before running docker-compose:
|
||||
# export DEER_FLOW_ROOT=/absolute/path/to/deer-flow
|
||||
- SKILLS_HOST_PATH=${DEER_FLOW_ROOT}/skills
|
||||
- THREADS_HOST_PATH=${DEER_FLOW_ROOT}/backend/.deer-flow/threads
|
||||
# Production: use PVC instead of hostPath to avoid data loss on node failure.
|
||||
# When set, hostPath vars above are ignored for the corresponding volume.
|
||||
# USERDATA_PVC_NAME uses subPath (threads/{thread_id}/user-data) automatically.
|
||||
# - SKILLS_PVC_NAME=deer-flow-skills-pvc
|
||||
# - USERDATA_PVC_NAME=deer-flow-userdata-pvc
|
||||
- KUBECONFIG_PATH=/root/.kube/config
|
||||
- NODE_HOST=host.docker.internal
|
||||
# Override K8S API server URL since kubeconfig uses 127.0.0.1
|
||||
# which is unreachable from inside the container
|
||||
- K8S_API_SERVER=https://host.docker.internal:26443
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 6
|
||||
start_period: 15s
|
||||
|
||||
# ── Reverse Proxy ──────────────────────────────────────────────────────
|
||||
# Routes API traffic to gateway/langgraph and (optionally) provisioner.
|
||||
# LANGGRAPH_UPSTREAM and LANGGRAPH_REWRITE control gateway vs standard
|
||||
# routing (processed by envsubst at container start).
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: deer-flow-nginx
|
||||
ports:
|
||||
- "2026:2026"
|
||||
volumes:
|
||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf.template:ro
|
||||
environment:
|
||||
- LANGGRAPH_UPSTREAM=${LANGGRAPH_UPSTREAM:-langgraph:2024}
|
||||
- LANGGRAPH_REWRITE=${LANGGRAPH_REWRITE:-/}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
envsubst '$$LANGGRAPH_UPSTREAM $$LANGGRAPH_REWRITE' \
|
||||
< /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf
|
||||
test -e /proc/net/if_inet6 || sed -i '/^[[:space:]]*listen[[:space:]]\+\[::\]:2026;/d' /etc/nginx/nginx.conf
|
||||
exec nginx -g 'daemon off;'
|
||||
depends_on:
|
||||
- frontend
|
||||
- gateway
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
# Frontend - Next.js Development Server
|
||||
frontend:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: frontend/Dockerfile
|
||||
target: dev
|
||||
args:
|
||||
PNPM_STORE_PATH: ${PNPM_STORE_PATH:-/root/.local/share/pnpm/store}
|
||||
NPM_REGISTRY: ${NPM_REGISTRY:-}
|
||||
container_name: deer-flow-frontend
|
||||
command: sh -c "cd frontend && pnpm run dev > /app/logs/frontend.log 2>&1"
|
||||
volumes:
|
||||
- ../frontend/src:/app/frontend/src
|
||||
- ../frontend/public:/app/frontend/public
|
||||
- ../frontend/next.config.js:/app/frontend/next.config.js:ro
|
||||
- ../logs:/app/logs
|
||||
# Mount pnpm store for caching
|
||||
- ${PNPM_STORE_PATH:-~/.local/share/pnpm/store}:/root/.local/share/pnpm/store
|
||||
working_dir: /app
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- WATCHPACK_POLLING=true
|
||||
- CI=true
|
||||
- DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://gateway:8001
|
||||
- DEER_FLOW_INTERNAL_LANGGRAPH_BASE_URL=http://langgraph:2024
|
||||
env_file:
|
||||
- ../frontend/.env
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
# Backend - Gateway API
|
||||
gateway:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/Dockerfile
|
||||
target: dev
|
||||
# cache_from disabled - requires manual setup: mkdir -p /tmp/docker-cache-gateway
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
|
||||
UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
|
||||
container_name: deer-flow-gateway
|
||||
command: sh -c "{ cd backend && (uv sync || (echo '[startup] uv sync failed; recreating .venv and retrying once' && uv venv --allow-existing .venv && uv sync)) && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --reload --reload-include='*.yaml .env'; } > /app/logs/gateway.log 2>&1"
|
||||
volumes:
|
||||
- ../backend/:/app/backend/
|
||||
# Preserve the .venv built during Docker image build — mounting the full backend/
|
||||
# directory above would otherwise shadow it with the (empty) host directory.
|
||||
- gateway-venv:/app/backend/.venv
|
||||
- ../config.yaml:/app/config.yaml
|
||||
- ../extensions_config.json:/app/extensions_config.json
|
||||
- ../skills:/app/skills
|
||||
- ../logs:/app/logs
|
||||
# Use a Docker-managed uv cache volume instead of a host bind mount.
|
||||
# On macOS/Docker Desktop, uv may fail to create symlinks inside shared
|
||||
# host directories, which causes startup-time `uv sync` to crash.
|
||||
- gateway-uv-cache:/root/.cache/uv
|
||||
# DooD: same as gateway — AioSandboxProvider runs inside LangGraph process.
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# CLI auth directories for auto-auth (Claude Code + Codex CLI)
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.claude
|
||||
target: /root/.claude
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.codex
|
||||
target: /root/.codex
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
working_dir: /app
|
||||
environment:
|
||||
- CI=true
|
||||
- DEER_FLOW_HOME=/app/backend/.deer-flow
|
||||
- DEER_FLOW_CHANNELS_LANGGRAPH_URL=${DEER_FLOW_CHANNELS_LANGGRAPH_URL:-http://langgraph:2024}
|
||||
- DEER_FLOW_CHANNELS_GATEWAY_URL=${DEER_FLOW_CHANNELS_GATEWAY_URL:-http://gateway:8001}
|
||||
- DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_ROOT}/backend/.deer-flow
|
||||
- DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_ROOT}/skills
|
||||
- DEER_FLOW_SANDBOX_HOST=host.docker.internal
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
# For Linux: map host.docker.internal to host gateway
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
# Backend - LangGraph Server
|
||||
langgraph:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/Dockerfile
|
||||
target: dev
|
||||
# cache_from disabled - requires manual setup: mkdir -p /tmp/docker-cache-langgraph
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
|
||||
UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
|
||||
container_name: deer-flow-langgraph
|
||||
command: sh -c "cd backend && { (uv sync || (echo '[startup] uv sync failed; recreating .venv and retrying once' && uv venv --allow-existing .venv && uv sync)) && allow_blocking='' && if [ \"\${LANGGRAPH_ALLOW_BLOCKING:-0}\" = '1' ]; then allow_blocking='--allow-blocking'; fi && uv run langgraph dev --no-browser \${allow_blocking} --host 0.0.0.0 --port 2024 --n-jobs-per-worker \${LANGGRAPH_JOBS_PER_WORKER:-10}; } > /app/logs/langgraph.log 2>&1"
|
||||
volumes:
|
||||
- ../backend/:/app/backend/
|
||||
# Preserve the .venv built during Docker image build — mounting the full backend/
|
||||
# directory above would otherwise shadow it with the (empty) host directory.
|
||||
- langgraph-venv:/app/backend/.venv
|
||||
- ../config.yaml:/app/config.yaml
|
||||
- ../extensions_config.json:/app/extensions_config.json
|
||||
- ../skills:/app/skills
|
||||
- ../logs:/app/logs
|
||||
# Use a Docker-managed uv cache volume instead of a host bind mount.
|
||||
# On macOS/Docker Desktop, uv may fail to create symlinks inside shared
|
||||
# host directories, which causes startup-time `uv sync` to crash.
|
||||
- langgraph-uv-cache:/root/.cache/uv
|
||||
# DooD: same as gateway — AioSandboxProvider runs inside LangGraph process.
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# CLI auth directories for auto-auth (Claude Code + Codex CLI)
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.claude
|
||||
target: /root/.claude
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.codex
|
||||
target: /root/.codex
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
working_dir: /app
|
||||
environment:
|
||||
- CI=true
|
||||
- DEER_FLOW_HOME=/app/backend/.deer-flow
|
||||
- DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_ROOT}/backend/.deer-flow
|
||||
- DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_ROOT}/skills
|
||||
- DEER_FLOW_SANDBOX_HOST=host.docker.internal
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
# For Linux: map host.docker.internal to host gateway
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
# Persist .venv across container restarts so dependencies installed during
|
||||
# image build are not shadowed by the host backend/ directory mount.
|
||||
gateway-venv:
|
||||
langgraph-venv:
|
||||
gateway-uv-cache:
|
||||
langgraph-uv-cache:
|
||||
|
||||
networks:
|
||||
deer-flow-dev:
|
||||
driver: bridge
|
||||
ipam:
|
||||
config:
|
||||
- subnet: 192.168.200.0/24
|
||||
Reference in New Issue
Block a user