Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
253
deer-flow/docker/docker-compose-dev.yaml
Normal file
253
deer-flow/docker/docker-compose-dev.yaml
Normal file
@@ -0,0 +1,253 @@
|
||||
# DeerFlow Development Environment
|
||||
# Usage: docker-compose -f docker-compose-dev.yaml up --build
|
||||
#
|
||||
# Services:
|
||||
# - nginx: Reverse proxy (port 2026)
|
||||
# - frontend: Frontend Next.js dev server (port 3000)
|
||||
# - gateway: Backend Gateway API (port 8001)
|
||||
# - langgraph: LangGraph server (port 2024)
|
||||
# - provisioner (optional): Sandbox provisioner (creates Pods in host Kubernetes)
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Kubernetes cluster + kubeconfig are only required when using provisioner mode.
|
||||
#
|
||||
# Access: http://localhost:2026
|
||||
|
||||
services:
|
||||
# ── Sandbox Provisioner ────────────────────────────────────────────────
|
||||
# Manages per-sandbox Pod + Service lifecycle in the host Kubernetes
|
||||
# cluster via the K8s API.
|
||||
# Backend accesses sandboxes directly via host.docker.internal:{NodePort}.
|
||||
provisioner:
|
||||
build:
|
||||
context: ./provisioner
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
container_name: deer-flow-provisioner
|
||||
volumes:
|
||||
- ~/.kube/config:/root/.kube/config:ro
|
||||
environment:
|
||||
- K8S_NAMESPACE=deer-flow
|
||||
- SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
|
||||
# Host paths for K8s HostPath volumes (must be absolute paths accessible by K8s node)
|
||||
# On Docker Desktop/OrbStack, use your actual host paths like /Users/username/...
|
||||
# Set these in your shell before running docker-compose:
|
||||
# export DEER_FLOW_ROOT=/absolute/path/to/deer-flow
|
||||
- SKILLS_HOST_PATH=${DEER_FLOW_ROOT}/skills
|
||||
- THREADS_HOST_PATH=${DEER_FLOW_ROOT}/backend/.deer-flow/threads
|
||||
# Production: use PVC instead of hostPath to avoid data loss on node failure.
|
||||
# When set, hostPath vars above are ignored for the corresponding volume.
|
||||
# USERDATA_PVC_NAME uses subPath (threads/{thread_id}/user-data) automatically.
|
||||
# - SKILLS_PVC_NAME=deer-flow-skills-pvc
|
||||
# - USERDATA_PVC_NAME=deer-flow-userdata-pvc
|
||||
- KUBECONFIG_PATH=/root/.kube/config
|
||||
- NODE_HOST=host.docker.internal
|
||||
# Override K8S API server URL since kubeconfig uses 127.0.0.1
|
||||
# which is unreachable from inside the container
|
||||
- K8S_API_SERVER=https://host.docker.internal:26443
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 6
|
||||
start_period: 15s
|
||||
|
||||
# ── Reverse Proxy ──────────────────────────────────────────────────────
|
||||
# Routes API traffic to gateway/langgraph and (optionally) provisioner.
|
||||
# LANGGRAPH_UPSTREAM and LANGGRAPH_REWRITE control gateway vs standard
|
||||
# routing (processed by envsubst at container start).
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: deer-flow-nginx
|
||||
ports:
|
||||
- "2026:2026"
|
||||
volumes:
|
||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf.template:ro
|
||||
environment:
|
||||
- LANGGRAPH_UPSTREAM=${LANGGRAPH_UPSTREAM:-langgraph:2024}
|
||||
- LANGGRAPH_REWRITE=${LANGGRAPH_REWRITE:-/}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
envsubst '$$LANGGRAPH_UPSTREAM $$LANGGRAPH_REWRITE' \
|
||||
< /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf
|
||||
test -e /proc/net/if_inet6 || sed -i '/^[[:space:]]*listen[[:space:]]\+\[::\]:2026;/d' /etc/nginx/nginx.conf
|
||||
exec nginx -g 'daemon off;'
|
||||
depends_on:
|
||||
- frontend
|
||||
- gateway
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
# Frontend - Next.js Development Server
|
||||
frontend:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: frontend/Dockerfile
|
||||
target: dev
|
||||
args:
|
||||
PNPM_STORE_PATH: ${PNPM_STORE_PATH:-/root/.local/share/pnpm/store}
|
||||
NPM_REGISTRY: ${NPM_REGISTRY:-}
|
||||
container_name: deer-flow-frontend
|
||||
command: sh -c "cd frontend && pnpm run dev > /app/logs/frontend.log 2>&1"
|
||||
volumes:
|
||||
- ../frontend/src:/app/frontend/src
|
||||
- ../frontend/public:/app/frontend/public
|
||||
- ../frontend/next.config.js:/app/frontend/next.config.js:ro
|
||||
- ../logs:/app/logs
|
||||
# Mount pnpm store for caching
|
||||
- ${PNPM_STORE_PATH:-~/.local/share/pnpm/store}:/root/.local/share/pnpm/store
|
||||
working_dir: /app
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- WATCHPACK_POLLING=true
|
||||
- CI=true
|
||||
- DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://gateway:8001
|
||||
- DEER_FLOW_INTERNAL_LANGGRAPH_BASE_URL=http://langgraph:2024
|
||||
env_file:
|
||||
- ../frontend/.env
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
# Backend - Gateway API
|
||||
gateway:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/Dockerfile
|
||||
target: dev
|
||||
# cache_from disabled - requires manual setup: mkdir -p /tmp/docker-cache-gateway
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
|
||||
UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
|
||||
container_name: deer-flow-gateway
|
||||
command: sh -c "{ cd backend && (uv sync || (echo '[startup] uv sync failed; recreating .venv and retrying once' && uv venv --allow-existing .venv && uv sync)) && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --reload --reload-include='*.yaml .env'; } > /app/logs/gateway.log 2>&1"
|
||||
volumes:
|
||||
- ../backend/:/app/backend/
|
||||
# Preserve the .venv built during Docker image build — mounting the full backend/
|
||||
# directory above would otherwise shadow it with the (empty) host directory.
|
||||
- gateway-venv:/app/backend/.venv
|
||||
- ../config.yaml:/app/config.yaml
|
||||
- ../extensions_config.json:/app/extensions_config.json
|
||||
- ../skills:/app/skills
|
||||
- ../logs:/app/logs
|
||||
# Use a Docker-managed uv cache volume instead of a host bind mount.
|
||||
# On macOS/Docker Desktop, uv may fail to create symlinks inside shared
|
||||
# host directories, which causes startup-time `uv sync` to crash.
|
||||
- gateway-uv-cache:/root/.cache/uv
|
||||
# DooD: same as gateway — AioSandboxProvider runs inside LangGraph process.
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# CLI auth directories for auto-auth (Claude Code + Codex CLI)
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.claude
|
||||
target: /root/.claude
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.codex
|
||||
target: /root/.codex
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
working_dir: /app
|
||||
environment:
|
||||
- CI=true
|
||||
- DEER_FLOW_HOME=/app/backend/.deer-flow
|
||||
- DEER_FLOW_CHANNELS_LANGGRAPH_URL=${DEER_FLOW_CHANNELS_LANGGRAPH_URL:-http://langgraph:2024}
|
||||
- DEER_FLOW_CHANNELS_GATEWAY_URL=${DEER_FLOW_CHANNELS_GATEWAY_URL:-http://gateway:8001}
|
||||
- DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_ROOT}/backend/.deer-flow
|
||||
- DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_ROOT}/skills
|
||||
- DEER_FLOW_SANDBOX_HOST=host.docker.internal
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
# For Linux: map host.docker.internal to host gateway
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
# Backend - LangGraph Server
|
||||
langgraph:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/Dockerfile
|
||||
target: dev
|
||||
# cache_from disabled - requires manual setup: mkdir -p /tmp/docker-cache-langgraph
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
|
||||
UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
|
||||
container_name: deer-flow-langgraph
|
||||
command: sh -c "cd backend && { (uv sync || (echo '[startup] uv sync failed; recreating .venv and retrying once' && uv venv --allow-existing .venv && uv sync)) && allow_blocking='' && if [ \"\${LANGGRAPH_ALLOW_BLOCKING:-0}\" = '1' ]; then allow_blocking='--allow-blocking'; fi && uv run langgraph dev --no-browser \${allow_blocking} --host 0.0.0.0 --port 2024 --n-jobs-per-worker \${LANGGRAPH_JOBS_PER_WORKER:-10}; } > /app/logs/langgraph.log 2>&1"
|
||||
volumes:
|
||||
- ../backend/:/app/backend/
|
||||
# Preserve the .venv built during Docker image build — mounting the full backend/
|
||||
# directory above would otherwise shadow it with the (empty) host directory.
|
||||
- langgraph-venv:/app/backend/.venv
|
||||
- ../config.yaml:/app/config.yaml
|
||||
- ../extensions_config.json:/app/extensions_config.json
|
||||
- ../skills:/app/skills
|
||||
- ../logs:/app/logs
|
||||
# Use a Docker-managed uv cache volume instead of a host bind mount.
|
||||
# On macOS/Docker Desktop, uv may fail to create symlinks inside shared
|
||||
# host directories, which causes startup-time `uv sync` to crash.
|
||||
- langgraph-uv-cache:/root/.cache/uv
|
||||
# DooD: same as gateway — AioSandboxProvider runs inside LangGraph process.
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# CLI auth directories for auto-auth (Claude Code + Codex CLI)
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.claude
|
||||
target: /root/.claude
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.codex
|
||||
target: /root/.codex
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
working_dir: /app
|
||||
environment:
|
||||
- CI=true
|
||||
- DEER_FLOW_HOME=/app/backend/.deer-flow
|
||||
- DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_ROOT}/backend/.deer-flow
|
||||
- DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_ROOT}/skills
|
||||
- DEER_FLOW_SANDBOX_HOST=host.docker.internal
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
# For Linux: map host.docker.internal to host gateway
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow-dev
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
# Persist .venv across container restarts so dependencies installed during
|
||||
# image build are not shadowed by the host backend/ directory mount.
|
||||
gateway-venv:
|
||||
langgraph-venv:
|
||||
gateway-uv-cache:
|
||||
langgraph-uv-cache:
|
||||
|
||||
networks:
|
||||
deer-flow-dev:
|
||||
driver: bridge
|
||||
ipam:
|
||||
config:
|
||||
- subnet: 192.168.200.0/24
|
||||
202
deer-flow/docker/docker-compose.yaml
Normal file
202
deer-flow/docker/docker-compose.yaml
Normal file
@@ -0,0 +1,202 @@
|
||||
# DeerFlow Production Environment
|
||||
# Usage: make up
|
||||
#
|
||||
# Services:
|
||||
# - nginx: Reverse proxy (port 2026, configurable via PORT env var)
|
||||
# - frontend: Next.js production server
|
||||
# - gateway: FastAPI Gateway API
|
||||
# - langgraph: LangGraph production server (Dockerfile generated by langgraph dockerfile)
|
||||
# - provisioner: (optional) Sandbox provisioner for Kubernetes mode
|
||||
#
|
||||
# Key environment variables (set via environment/.env or scripts/deploy.sh):
|
||||
# DEER_FLOW_HOME — runtime data dir, default $REPO_ROOT/backend/.deer-flow
|
||||
# DEER_FLOW_CONFIG_PATH — path to config.yaml
|
||||
# DEER_FLOW_EXTENSIONS_CONFIG_PATH — path to extensions_config.json
|
||||
# DEER_FLOW_DOCKER_SOCKET — Docker socket path, default /var/run/docker.sock
|
||||
# DEER_FLOW_REPO_ROOT — repo root (used for skills host path in DooD)
|
||||
# BETTER_AUTH_SECRET — required for frontend auth/session security
|
||||
#
|
||||
# LangSmith tracing is disabled by default (LANGSMITH_TRACING=false).
|
||||
# Set LANGSMITH_TRACING=true and LANGSMITH_API_KEY in .env to enable it.
|
||||
#
|
||||
# Access: http://localhost:${PORT:-2026}
|
||||
|
||||
services:
|
||||
# ── Reverse Proxy ──────────────────────────────────────────────────────────
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: deer-flow-nginx
|
||||
ports:
|
||||
- "${PORT:-2026}:2026"
|
||||
volumes:
|
||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf.template:ro
|
||||
environment:
|
||||
- LANGGRAPH_UPSTREAM=${LANGGRAPH_UPSTREAM:-langgraph:2024}
|
||||
- LANGGRAPH_REWRITE=${LANGGRAPH_REWRITE:-/}
|
||||
command: >
|
||||
sh -c "envsubst '$$LANGGRAPH_UPSTREAM $$LANGGRAPH_REWRITE'
|
||||
< /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf
|
||||
&& nginx -g 'daemon off;'"
|
||||
depends_on:
|
||||
- frontend
|
||||
- gateway
|
||||
networks:
|
||||
- deer-flow
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Frontend: Next.js Production ───────────────────────────────────────────
|
||||
frontend:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: frontend/Dockerfile
|
||||
target: prod
|
||||
args:
|
||||
PNPM_STORE_PATH: ${PNPM_STORE_PATH:-/root/.local/share/pnpm/store}
|
||||
NPM_REGISTRY: ${NPM_REGISTRY:-}
|
||||
container_name: deer-flow-frontend
|
||||
environment:
|
||||
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET}
|
||||
- DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://gateway:8001
|
||||
- DEER_FLOW_INTERNAL_LANGGRAPH_BASE_URL=http://langgraph:2024
|
||||
env_file:
|
||||
- ../frontend/.env
|
||||
networks:
|
||||
- deer-flow
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Gateway API ────────────────────────────────────────────────────────────
|
||||
gateway:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/Dockerfile
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
|
||||
UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
|
||||
container_name: deer-flow-gateway
|
||||
command: sh -c "cd backend && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --workers ${GATEWAY_WORKERS:-4}"
|
||||
volumes:
|
||||
- ${DEER_FLOW_CONFIG_PATH}:/app/backend/config.yaml:ro
|
||||
- ${DEER_FLOW_EXTENSIONS_CONFIG_PATH}:/app/backend/extensions_config.json:ro
|
||||
- ../skills:/app/skills:ro
|
||||
- ${DEER_FLOW_HOME}:/app/backend/.deer-flow
|
||||
# DooD: AioSandboxProvider starts sandbox containers via host Docker daemon
|
||||
- ${DEER_FLOW_DOCKER_SOCKET}:/var/run/docker.sock
|
||||
# CLI auth directories for auto-auth (Claude Code + Codex CLI)
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.claude
|
||||
target: /root/.claude
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.codex
|
||||
target: /root/.codex
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
working_dir: /app
|
||||
environment:
|
||||
- CI=true
|
||||
- DEER_FLOW_HOME=/app/backend/.deer-flow
|
||||
- DEER_FLOW_CONFIG_PATH=/app/backend/config.yaml
|
||||
- DEER_FLOW_EXTENSIONS_CONFIG_PATH=/app/backend/extensions_config.json
|
||||
- DEER_FLOW_CHANNELS_LANGGRAPH_URL=${DEER_FLOW_CHANNELS_LANGGRAPH_URL:-http://langgraph:2024}
|
||||
- DEER_FLOW_CHANNELS_GATEWAY_URL=${DEER_FLOW_CHANNELS_GATEWAY_URL:-http://gateway:8001}
|
||||
# DooD path/network translation
|
||||
- DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_HOME}
|
||||
- DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_REPO_ROOT}/skills
|
||||
- DEER_FLOW_SANDBOX_HOST=host.docker.internal
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow
|
||||
restart: unless-stopped
|
||||
|
||||
# ── LangGraph Server ───────────────────────────────────────────────────────
|
||||
# TODO: switch to langchain/langgraph-api (licensed) once a license key is available.
|
||||
# For now, use `langgraph dev` (no license required) with the standard backend image.
|
||||
langgraph:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/Dockerfile
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
|
||||
UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
|
||||
container_name: deer-flow-langgraph
|
||||
command: sh -c 'cd /app/backend && args="--no-browser --no-reload --host 0.0.0.0 --port 2024 --n-jobs-per-worker $${LANGGRAPH_JOBS_PER_WORKER:-10}" && if [ "$${LANGGRAPH_ALLOW_BLOCKING:-0}" = "1" ]; then args="$$args --allow-blocking"; fi && uv run langgraph dev $$args'
|
||||
volumes:
|
||||
- ${DEER_FLOW_CONFIG_PATH}:/app/backend/config.yaml:ro
|
||||
- ${DEER_FLOW_EXTENSIONS_CONFIG_PATH}:/app/backend/extensions_config.json:ro
|
||||
- ${DEER_FLOW_HOME}:/app/backend/.deer-flow
|
||||
- ../skills:/app/skills:ro
|
||||
- ../backend/.langgraph_api:/app/backend/.langgraph_api
|
||||
# DooD: same as gateway
|
||||
- ${DEER_FLOW_DOCKER_SOCKET}:/var/run/docker.sock
|
||||
# CLI auth directories for auto-auth (Claude Code + Codex CLI)
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.claude
|
||||
target: /root/.claude
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
- type: bind
|
||||
source: ${HOME:?HOME must be set}/.codex
|
||||
target: /root/.codex
|
||||
read_only: true
|
||||
bind:
|
||||
create_host_path: true
|
||||
environment:
|
||||
- CI=true
|
||||
- DEER_FLOW_HOME=/app/backend/.deer-flow
|
||||
- DEER_FLOW_CONFIG_PATH=/app/backend/config.yaml
|
||||
- DEER_FLOW_EXTENSIONS_CONFIG_PATH=/app/backend/extensions_config.json
|
||||
- DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_HOME}
|
||||
- DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_REPO_ROOT}/skills
|
||||
- DEER_FLOW_SANDBOX_HOST=host.docker.internal
|
||||
# LangSmith tracing: set LANGSMITH_TRACING=true and LANGSMITH_API_KEY in .env to enable.
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Sandbox Provisioner (optional, Kubernetes mode) ────────────────────────
|
||||
provisioner:
|
||||
build:
|
||||
context: ./provisioner
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
APT_MIRROR: ${APT_MIRROR:-}
|
||||
PIP_INDEX_URL: ${PIP_INDEX_URL:-}
|
||||
container_name: deer-flow-provisioner
|
||||
volumes:
|
||||
- ~/.kube/config:/root/.kube/config:ro
|
||||
environment:
|
||||
- K8S_NAMESPACE=deer-flow
|
||||
- SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
|
||||
- SKILLS_HOST_PATH=${DEER_FLOW_REPO_ROOT}/skills
|
||||
- THREADS_HOST_PATH=${DEER_FLOW_HOME}/threads
|
||||
- KUBECONFIG_PATH=/root/.kube/config
|
||||
- NODE_HOST=host.docker.internal
|
||||
- K8S_API_SERVER=https://host.docker.internal:26443
|
||||
env_file:
|
||||
- ../.env
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- deer-flow
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 6
|
||||
networks:
|
||||
deer-flow:
|
||||
driver: bridge
|
||||
236
deer-flow/docker/nginx/nginx.conf
Normal file
236
deer-flow/docker/nginx/nginx.conf
Normal file
@@ -0,0 +1,236 @@
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
pid /tmp/nginx.pid;
|
||||
http {
|
||||
# Basic settings
|
||||
sendfile on;
|
||||
tcp_nopush on;
|
||||
tcp_nodelay on;
|
||||
keepalive_timeout 65;
|
||||
types_hash_max_size 2048;
|
||||
|
||||
# Logging
|
||||
access_log /dev/stdout;
|
||||
error_log /dev/stderr;
|
||||
|
||||
# Docker internal DNS (for resolving k3s hostname)
|
||||
resolver 127.0.0.11 valid=10s ipv6=off;
|
||||
|
||||
# Upstream servers (using Docker service names)
|
||||
# NOTE: `zone` and `resolve` are nginx Plus-only features and are not
|
||||
# available in the standard nginx:alpine image. Docker's internal DNS
|
||||
# (127.0.0.11) handles service discovery; upstreams are resolved at
|
||||
# nginx startup and remain valid for the lifetime of the deployment.
|
||||
upstream gateway {
|
||||
server gateway:8001;
|
||||
}
|
||||
|
||||
upstream langgraph {
|
||||
server ${LANGGRAPH_UPSTREAM};
|
||||
}
|
||||
|
||||
upstream frontend {
|
||||
server frontend:3000;
|
||||
}
|
||||
|
||||
# ── Main server (path-based routing) ─────────────────────────────────
|
||||
server {
|
||||
listen 2026 default_server;
|
||||
listen [::]:2026 default_server;
|
||||
server_name _;
|
||||
|
||||
# Hide CORS headers from upstream to prevent duplicates
|
||||
proxy_hide_header 'Access-Control-Allow-Origin';
|
||||
proxy_hide_header 'Access-Control-Allow-Methods';
|
||||
proxy_hide_header 'Access-Control-Allow-Headers';
|
||||
proxy_hide_header 'Access-Control-Allow-Credentials';
|
||||
|
||||
# CORS headers for all responses (nginx handles CORS centrally)
|
||||
add_header 'Access-Control-Allow-Origin' '*' always;
|
||||
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always;
|
||||
add_header 'Access-Control-Allow-Headers' '*' always;
|
||||
|
||||
# Handle OPTIONS requests (CORS preflight)
|
||||
if ($request_method = 'OPTIONS') {
|
||||
return 204;
|
||||
}
|
||||
|
||||
# LangGraph API routes
|
||||
# In standard mode: /api/langgraph/* → langgraph:2024 (rewrite to /*)
|
||||
# In gateway mode: /api/langgraph/* → gateway:8001 (rewrite to /api/*)
|
||||
# Controlled by LANGGRAPH_UPSTREAM and LANGGRAPH_REWRITE env vars.
|
||||
location /api/langgraph/ {
|
||||
rewrite ^/api/langgraph/(.*) ${LANGGRAPH_REWRITE}$1 break;
|
||||
proxy_pass http://langgraph;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# Headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Connection '';
|
||||
|
||||
# SSE/Streaming support
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_set_header X-Accel-Buffering no;
|
||||
|
||||
# Timeouts for long-running requests
|
||||
proxy_connect_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
|
||||
# Chunked transfer encoding
|
||||
chunked_transfer_encoding on;
|
||||
}
|
||||
|
||||
# Custom API: Models endpoint
|
||||
location /api/models {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Memory endpoint
|
||||
location /api/memory {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: MCP configuration endpoint
|
||||
location /api/mcp {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Skills configuration endpoint
|
||||
location /api/skills {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Agents endpoint
|
||||
location /api/agents {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Uploads endpoint
|
||||
location ~ ^/api/threads/[^/]+/uploads {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Large file upload support
|
||||
client_max_body_size 100M;
|
||||
proxy_request_buffering off;
|
||||
}
|
||||
|
||||
# Custom API: Other endpoints under /api/threads
|
||||
location ~ ^/api/threads {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# API Documentation: Swagger UI
|
||||
location /docs {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# API Documentation: ReDoc
|
||||
location /redoc {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# API Documentation: OpenAPI Schema
|
||||
location /openapi.json {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Health check endpoint (gateway)
|
||||
location /health {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# ── Provisioner API (sandbox management) ────────────────────────
|
||||
# Use a variable so nginx resolves provisioner at request time (not startup).
|
||||
# This allows nginx to start even when provisioner container is not running.
|
||||
location /api/sandboxes {
|
||||
set $provisioner_upstream provisioner:8002;
|
||||
proxy_pass http://$provisioner_upstream;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# All other requests go to frontend
|
||||
location / {
|
||||
proxy_pass http://frontend;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# Headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_cache_bypass $http_upgrade;
|
||||
|
||||
# Timeouts
|
||||
proxy_connect_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
}
|
||||
}
|
||||
}
|
||||
241
deer-flow/docker/nginx/nginx.local.conf
Normal file
241
deer-flow/docker/nginx/nginx.local.conf
Normal file
@@ -0,0 +1,241 @@
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
pid logs/nginx.pid;
|
||||
http {
|
||||
# Basic settings
|
||||
sendfile on;
|
||||
tcp_nopush on;
|
||||
tcp_nodelay on;
|
||||
keepalive_timeout 65;
|
||||
types_hash_max_size 2048;
|
||||
|
||||
# Logging
|
||||
access_log logs/nginx-access.log;
|
||||
error_log logs/nginx-error.log;
|
||||
|
||||
# Upstream servers (using 127.0.0.1 for local development)
|
||||
upstream gateway {
|
||||
server 127.0.0.1:8001;
|
||||
}
|
||||
|
||||
upstream langgraph {
|
||||
server 127.0.0.1:2024;
|
||||
}
|
||||
|
||||
upstream frontend {
|
||||
server 127.0.0.1:3000;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 2026;
|
||||
listen [::]:2026;
|
||||
server_name _;
|
||||
|
||||
# Hide CORS headers from upstream to prevent duplicates
|
||||
proxy_hide_header 'Access-Control-Allow-Origin';
|
||||
proxy_hide_header 'Access-Control-Allow-Methods';
|
||||
proxy_hide_header 'Access-Control-Allow-Headers';
|
||||
proxy_hide_header 'Access-Control-Allow-Credentials';
|
||||
|
||||
# CORS headers for all responses (nginx handles CORS centrally)
|
||||
add_header 'Access-Control-Allow-Origin' '*' always;
|
||||
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always;
|
||||
add_header 'Access-Control-Allow-Headers' '*' always;
|
||||
|
||||
# Handle OPTIONS requests (CORS preflight)
|
||||
if ($request_method = 'OPTIONS') {
|
||||
return 204;
|
||||
}
|
||||
|
||||
# LangGraph API routes (served by langgraph dev)
|
||||
# Rewrites /api/langgraph/* to /* before proxying to LangGraph server
|
||||
location /api/langgraph/ {
|
||||
rewrite ^/api/langgraph/(.*) /$1 break;
|
||||
proxy_pass http://langgraph;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# Headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Connection '';
|
||||
|
||||
# SSE/Streaming support
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_set_header X-Accel-Buffering no;
|
||||
|
||||
# Timeouts for long-running requests
|
||||
proxy_connect_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
|
||||
# Chunked transfer encoding
|
||||
chunked_transfer_encoding on;
|
||||
}
|
||||
|
||||
# Experimental: Gateway-backed LangGraph-compatible API
|
||||
# Frontend can opt-in via NEXT_PUBLIC_LANGGRAPH_BASE_URL=/api/langgraph-compat
|
||||
location /api/langgraph-compat/ {
|
||||
rewrite ^/api/langgraph-compat/(.*) /api/$1 break;
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# Headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Connection '';
|
||||
|
||||
# SSE/Streaming support
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_set_header X-Accel-Buffering no;
|
||||
|
||||
# Timeouts for long-running requests
|
||||
proxy_connect_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
|
||||
# Chunked transfer encoding
|
||||
chunked_transfer_encoding on;
|
||||
}
|
||||
|
||||
# Custom API: Models endpoint
|
||||
location /api/models {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Memory endpoint
|
||||
location /api/memory {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: MCP configuration endpoint
|
||||
location /api/mcp {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Skills configuration endpoint
|
||||
location /api/skills {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Agents endpoint
|
||||
location /api/agents {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Custom API: Uploads endpoint
|
||||
location ~ ^/api/threads/[^/]+/uploads {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Large file upload support
|
||||
client_max_body_size 100M;
|
||||
proxy_request_buffering off;
|
||||
}
|
||||
|
||||
# Custom API: Other endpoints under /api/threads
|
||||
location ~ ^/api/threads {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# API Documentation: Swagger UI
|
||||
location /api/docs {
|
||||
proxy_pass http://gateway/docs ;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# API Documentation: ReDoc
|
||||
location /api/redoc {
|
||||
proxy_pass http://gateway/redoc;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# API Documentation: OpenAPI Schema
|
||||
location /openapi.json {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Health check endpoint (gateway)
|
||||
location /health {
|
||||
proxy_pass http://gateway;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# All other requests go to frontend
|
||||
location / {
|
||||
proxy_pass http://frontend;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# Headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_cache_bypass $http_upgrade;
|
||||
|
||||
# Timeouts
|
||||
proxy_connect_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
}
|
||||
}
|
||||
}
|
||||
29
deer-flow/docker/provisioner/Dockerfile
Normal file
29
deer-flow/docker/provisioner/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
ARG APT_MIRROR
|
||||
ARG PIP_INDEX_URL
|
||||
|
||||
# Optionally override apt mirror for restricted networks (e.g. APT_MIRROR=mirrors.aliyun.com)
|
||||
RUN if [ -n "${APT_MIRROR}" ]; then \
|
||||
sed -i "s|deb.debian.org|${APT_MIRROR}|g" /etc/apt/sources.list.d/debian.sources 2>/dev/null || true; \
|
||||
sed -i "s|deb.debian.org|${APT_MIRROR}|g" /etc/apt/sources.list 2>/dev/null || true; \
|
||||
fi
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
${PIP_INDEX_URL:+--index-url "$PIP_INDEX_URL"} \
|
||||
fastapi \
|
||||
"uvicorn[standard]" \
|
||||
kubernetes
|
||||
|
||||
WORKDIR /app
|
||||
COPY app.py .
|
||||
|
||||
EXPOSE 8002
|
||||
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8002"]
|
||||
332
deer-flow/docker/provisioner/README.md
Normal file
332
deer-flow/docker/provisioner/README.md
Normal file
@@ -0,0 +1,332 @@
|
||||
# DeerFlow Sandbox Provisioner
|
||||
|
||||
The **Sandbox Provisioner** is a FastAPI service that dynamically manages sandbox Pods in Kubernetes. It provides a REST API for the DeerFlow backend to create, monitor, and destroy isolated sandbox environments for code execution.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌────────────┐ HTTP ┌─────────────┐ K8s API ┌──────────────┐
|
||||
│ Backend │ ─────▸ │ Provisioner │ ────────▸ │ Host K8s │
|
||||
│ (gateway/ │ │ :8002 │ │ API Server │
|
||||
│ langgraph) │ └─────────────┘ └──────┬───────┘
|
||||
└────────────┘ │ creates
|
||||
│
|
||||
┌─────────────┐ ┌────▼─────┐
|
||||
│ Backend │ ──────▸ │ Sandbox │
|
||||
│ (via Docker │ NodePort│ Pod(s) │
|
||||
│ network) │ └──────────┘
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Backend Request**: When the backend needs to execute code, it sends a `POST /api/sandboxes` request with a `sandbox_id` and `thread_id`.
|
||||
|
||||
2. **Pod Creation**: The provisioner creates a dedicated Pod in the `deer-flow` namespace with:
|
||||
- The sandbox container image (all-in-one-sandbox)
|
||||
- HostPath volumes mounted for:
|
||||
- `/mnt/skills` → Read-only access to public skills
|
||||
- `/mnt/user-data` → Read-write access to thread-specific data
|
||||
- Resource limits (CPU, memory, ephemeral storage)
|
||||
- Readiness/liveness probes
|
||||
|
||||
3. **Service Creation**: A NodePort Service is created to expose the Pod, with Kubernetes auto-allocating a port from the NodePort range (typically 30000-32767).
|
||||
|
||||
4. **Access URL**: The provisioner returns `http://host.docker.internal:{NodePort}` to the backend, which the backend containers can reach directly.
|
||||
|
||||
5. **Cleanup**: When the session ends, `DELETE /api/sandboxes/{sandbox_id}` removes both the Pod and Service.
|
||||
|
||||
## Requirements
|
||||
|
||||
Host machine with a running Kubernetes cluster (Docker Desktop K8s, OrbStack, minikube, kind, etc.)
|
||||
|
||||
### Enable Kubernetes in Docker Desktop
|
||||
1. Open Docker Desktop settings
|
||||
2. Go to "Kubernetes" tab
|
||||
3. Check "Enable Kubernetes"
|
||||
4. Click "Apply & Restart"
|
||||
|
||||
### Enable Kubernetes in OrbStack
|
||||
1. Open OrbStack settings
|
||||
2. Go to "Kubernetes" tab
|
||||
3. Check "Enable Kubernetes"
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
Health check endpoint.
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"status": "ok"
|
||||
}
|
||||
```
|
||||
|
||||
### `POST /api/sandboxes`
|
||||
Create a new sandbox Pod + Service.
|
||||
|
||||
**Request**:
|
||||
```json
|
||||
{
|
||||
"sandbox_id": "abc-123",
|
||||
"thread_id": "thread-456"
|
||||
}
|
||||
```
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"sandbox_id": "abc-123",
|
||||
"sandbox_url": "http://host.docker.internal:32123",
|
||||
"status": "Pending"
|
||||
}
|
||||
```
|
||||
|
||||
**Idempotent**: Calling with the same `sandbox_id` returns the existing sandbox info.
|
||||
|
||||
### `GET /api/sandboxes/{sandbox_id}`
|
||||
Get status and URL of a specific sandbox.
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"sandbox_id": "abc-123",
|
||||
"sandbox_url": "http://host.docker.internal:32123",
|
||||
"status": "Running"
|
||||
}
|
||||
```
|
||||
|
||||
**Status Values**: `Pending`, `Running`, `Succeeded`, `Failed`, `Unknown`, `NotFound`
|
||||
|
||||
### `DELETE /api/sandboxes/{sandbox_id}`
|
||||
Destroy a sandbox Pod + Service.
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"ok": true,
|
||||
"sandbox_id": "abc-123"
|
||||
}
|
||||
```
|
||||
|
||||
### `GET /api/sandboxes`
|
||||
List all sandboxes currently managed.
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"sandboxes": [
|
||||
{
|
||||
"sandbox_id": "abc-123",
|
||||
"sandbox_url": "http://host.docker.internal:32123",
|
||||
"status": "Running"
|
||||
}
|
||||
],
|
||||
"count": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
The provisioner is configured via environment variables (set in [docker-compose-dev.yaml](../docker-compose-dev.yaml)):
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `K8S_NAMESPACE` | `deer-flow` | Kubernetes namespace for sandbox resources |
|
||||
| `SANDBOX_IMAGE` | `enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest` | Container image for sandbox Pods |
|
||||
| `SKILLS_HOST_PATH` | - | **Host machine** path to skills directory (must be absolute) |
|
||||
| `THREADS_HOST_PATH` | - | **Host machine** path to threads data directory (must be absolute) |
|
||||
| `SKILLS_PVC_NAME` | empty (use hostPath) | PVC name for skills volume; when set, sandbox Pods use PVC instead of hostPath |
|
||||
| `USERDATA_PVC_NAME` | empty (use hostPath) | PVC name for user-data volume; when set, uses PVC with `subPath: threads/{thread_id}/user-data` |
|
||||
| `KUBECONFIG_PATH` | `/root/.kube/config` | Path to kubeconfig **inside** the provisioner container |
|
||||
| `NODE_HOST` | `host.docker.internal` | Hostname that backend containers use to reach host NodePorts |
|
||||
| `K8S_API_SERVER` | (from kubeconfig) | Override K8s API server URL (e.g., `https://host.docker.internal:26443`) |
|
||||
|
||||
### Important: K8S_API_SERVER Override
|
||||
|
||||
If your kubeconfig uses `localhost`, `127.0.0.1`, or `0.0.0.0` as the API server address (common with OrbStack, minikube, kind), the provisioner **cannot** reach it from inside the Docker container.
|
||||
|
||||
**Solution**: Set `K8S_API_SERVER` to use `host.docker.internal`:
|
||||
|
||||
```yaml
|
||||
# docker-compose-dev.yaml
|
||||
provisioner:
|
||||
environment:
|
||||
- K8S_API_SERVER=https://host.docker.internal:26443 # Replace 26443 with your API port
|
||||
```
|
||||
|
||||
Check your kubeconfig API server:
|
||||
```bash
|
||||
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Host Machine Requirements
|
||||
|
||||
1. **Kubernetes Cluster**:
|
||||
- Docker Desktop with Kubernetes enabled, or
|
||||
- OrbStack (built-in K8s), or
|
||||
- minikube, kind, k3s, etc.
|
||||
|
||||
2. **kubectl Configured**:
|
||||
- `~/.kube/config` must exist and be valid
|
||||
- Current context should point to your local cluster
|
||||
|
||||
3. **Kubernetes Access**:
|
||||
- The provisioner needs permissions to:
|
||||
- Create/read/delete Pods in the `deer-flow` namespace
|
||||
- Create/read/delete Services in the `deer-flow` namespace
|
||||
- Read Namespaces (to create `deer-flow` if missing)
|
||||
|
||||
4. **Host Paths**:
|
||||
- The `SKILLS_HOST_PATH` and `THREADS_HOST_PATH` must be **absolute paths on the host machine**
|
||||
- These paths are mounted into sandbox Pods via K8s HostPath volumes
|
||||
- The paths must exist and be readable by the K8s node
|
||||
|
||||
### Docker Compose Setup
|
||||
|
||||
The provisioner runs as part of the docker-compose-dev stack:
|
||||
|
||||
```bash
|
||||
# Start Docker services (provisioner starts only when config.yaml enables provisioner mode)
|
||||
make docker-start
|
||||
|
||||
# Or start just the provisioner
|
||||
docker compose -p deer-flow-dev -f docker/docker-compose-dev.yaml up -d provisioner
|
||||
```
|
||||
|
||||
The compose file:
|
||||
- Mounts your host's `~/.kube/config` into the container
|
||||
- Adds `extra_hosts` entry for `host.docker.internal` (required on Linux)
|
||||
- Configures environment variables for K8s access
|
||||
|
||||
## Testing
|
||||
|
||||
### Manual API Testing
|
||||
|
||||
```bash
|
||||
# Health check
|
||||
curl http://localhost:8002/health
|
||||
|
||||
# Create a sandbox (via provisioner container for internal DNS)
|
||||
docker exec deer-flow-provisioner curl -X POST http://localhost:8002/api/sandboxes \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"sandbox_id":"test-001","thread_id":"thread-001"}'
|
||||
|
||||
# Check sandbox status
|
||||
docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes/test-001
|
||||
|
||||
# List all sandboxes
|
||||
docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes
|
||||
|
||||
# Verify Pod and Service in K8s
|
||||
kubectl get pod,svc -n deer-flow -l sandbox-id=test-001
|
||||
|
||||
# Delete sandbox
|
||||
docker exec deer-flow-provisioner curl -X DELETE http://localhost:8002/api/sandboxes/test-001
|
||||
```
|
||||
|
||||
### Verify from Backend Containers
|
||||
|
||||
Once a sandbox is created, the backend containers (gateway, langgraph) can access it:
|
||||
|
||||
```bash
|
||||
# Get sandbox URL from provisioner
|
||||
SANDBOX_URL=$(docker exec deer-flow-provisioner curl -s http://localhost:8002/api/sandboxes/test-001 | jq -r .sandbox_url)
|
||||
|
||||
# Test from gateway container
|
||||
docker exec deer-flow-gateway curl -s $SANDBOX_URL/v1/sandbox
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Issue: "Kubeconfig not found"
|
||||
|
||||
**Cause**: The kubeconfig file doesn't exist at the mounted path.
|
||||
|
||||
**Solution**:
|
||||
- Ensure `~/.kube/config` exists on your host machine
|
||||
- Run `kubectl config view` to verify
|
||||
- Check the volume mount in docker-compose-dev.yaml
|
||||
|
||||
### Issue: "Kubeconfig path is a directory"
|
||||
|
||||
**Cause**: The mounted `KUBECONFIG_PATH` points to a directory instead of a file.
|
||||
|
||||
**Solution**:
|
||||
- Ensure the compose mount source is a file (e.g., `~/.kube/config`) not a directory
|
||||
- Verify inside container:
|
||||
```bash
|
||||
docker exec deer-flow-provisioner ls -ld /root/.kube/config
|
||||
```
|
||||
- Expected output should indicate a regular file (`-`), not a directory (`d`)
|
||||
|
||||
### Issue: "Connection refused" to K8s API
|
||||
|
||||
**Cause**: The provisioner can't reach the K8s API server.
|
||||
|
||||
**Solution**:
|
||||
1. Check your kubeconfig server address:
|
||||
```bash
|
||||
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'
|
||||
```
|
||||
2. If it's `localhost` or `127.0.0.1`, set `K8S_API_SERVER`:
|
||||
```yaml
|
||||
environment:
|
||||
- K8S_API_SERVER=https://host.docker.internal:PORT
|
||||
```
|
||||
|
||||
### Issue: "Unprocessable Entity" when creating Pod
|
||||
|
||||
**Cause**: HostPath volumes contain invalid paths (e.g., relative paths with `..`).
|
||||
|
||||
**Solution**:
|
||||
- Use absolute paths for `SKILLS_HOST_PATH` and `THREADS_HOST_PATH`
|
||||
- Verify the paths exist on your host machine:
|
||||
```bash
|
||||
ls -la /path/to/skills
|
||||
ls -la /path/to/backend/.deer-flow/threads
|
||||
```
|
||||
|
||||
### Issue: Pod stuck in "ContainerCreating"
|
||||
|
||||
**Cause**: Usually pulling the sandbox image from the registry.
|
||||
|
||||
**Solution**:
|
||||
- Pre-pull the image: `make docker-init`
|
||||
- Check Pod events: `kubectl describe pod sandbox-XXX -n deer-flow`
|
||||
- Check node: `kubectl get nodes`
|
||||
|
||||
### Issue: Cannot access sandbox URL from backend
|
||||
|
||||
**Cause**: NodePort not reachable or `NODE_HOST` misconfigured.
|
||||
|
||||
**Solution**:
|
||||
- Verify the Service exists: `kubectl get svc -n deer-flow`
|
||||
- Test from host: `curl http://localhost:NODE_PORT/v1/sandbox`
|
||||
- Ensure `extra_hosts` is set in docker-compose (Linux)
|
||||
- Check `NODE_HOST` env var matches how backend reaches host
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **HostPath Volumes**: The provisioner mounts host directories into sandbox Pods by default. Ensure these paths contain only trusted data. For production, prefer PVC-based volumes (set `SKILLS_PVC_NAME` and `USERDATA_PVC_NAME`) to avoid node-specific data loss risks.
|
||||
|
||||
2. **Resource Limits**: Each sandbox Pod has CPU, memory, and storage limits to prevent resource exhaustion.
|
||||
|
||||
3. **Network Isolation**: Sandbox Pods run in the `deer-flow` namespace but share the host's network namespace via NodePort. Consider NetworkPolicies for stricter isolation.
|
||||
|
||||
4. **kubeconfig Access**: The provisioner has full access to your Kubernetes cluster via the mounted kubeconfig. Run it only in trusted environments.
|
||||
|
||||
5. **Image Trust**: The sandbox image should come from a trusted registry. Review and audit the image contents.
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- [ ] Support for custom resource requests/limits per sandbox
|
||||
- [x] PersistentVolume support for larger data requirements
|
||||
- [ ] Automatic cleanup of stale sandboxes (timeout-based)
|
||||
- [ ] Metrics and monitoring (Prometheus integration)
|
||||
- [ ] Multi-cluster support (route to different K8s clusters)
|
||||
- [ ] Pod affinity/anti-affinity rules for better placement
|
||||
- [ ] NetworkPolicy templates for sandbox isolation
|
||||
582
deer-flow/docker/provisioner/app.py
Normal file
582
deer-flow/docker/provisioner/app.py
Normal file
@@ -0,0 +1,582 @@
|
||||
"""DeerFlow Sandbox Provisioner Service.
|
||||
|
||||
Dynamically creates and manages per-sandbox Pods in Kubernetes.
|
||||
Each ``sandbox_id`` gets its own Pod + NodePort Service. The backend
|
||||
accesses sandboxes directly via ``{NODE_HOST}:{NodePort}``.
|
||||
|
||||
The provisioner connects to the host machine's Kubernetes cluster via a
|
||||
mounted kubeconfig (``~/.kube/config``). Sandbox Pods run on the host
|
||||
K8s and are accessed by the backend via ``{NODE_HOST}:{NodePort}``.
|
||||
|
||||
Endpoints:
|
||||
POST /api/sandboxes — Create a sandbox Pod + Service
|
||||
DELETE /api/sandboxes/{sandbox_id} — Destroy a sandbox Pod + Service
|
||||
GET /api/sandboxes/{sandbox_id} — Get sandbox status & URL
|
||||
GET /api/sandboxes — List all sandboxes
|
||||
GET /health — Provisioner health check
|
||||
|
||||
Architecture (docker-compose-dev):
|
||||
┌────────────┐ HTTP ┌─────────────┐ K8s API ┌──────────────┐
|
||||
│ remote │ ─────▸ │ provisioner │ ────────▸ │ host K8s │
|
||||
│ _backend │ │ :8002 │ │ API server │
|
||||
└────────────┘ └─────────────┘ └──────┬───────┘
|
||||
│ creates
|
||||
┌─────────────┐ ┌──────▼───────┐
|
||||
│ backend │ ────────▸ │ sandbox │
|
||||
│ │ direct │ Pod(s) │
|
||||
└─────────────┘ NodePort └──────────────┘
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import urllib3
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from kubernetes import client as k8s_client
|
||||
from kubernetes import config as k8s_config
|
||||
from kubernetes.client.rest import ApiException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# Suppress only the InsecureRequestWarning from urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
# ── Configuration (all tuneable via environment variables) ───────────────
|
||||
|
||||
K8S_NAMESPACE = os.environ.get("K8S_NAMESPACE", "deer-flow")
|
||||
SANDBOX_IMAGE = os.environ.get(
|
||||
"SANDBOX_IMAGE",
|
||||
"enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest",
|
||||
)
|
||||
SKILLS_HOST_PATH = os.environ.get("SKILLS_HOST_PATH", "/skills")
|
||||
THREADS_HOST_PATH = os.environ.get("THREADS_HOST_PATH", "/.deer-flow/threads")
|
||||
SKILLS_PVC_NAME = os.environ.get("SKILLS_PVC_NAME", "")
|
||||
USERDATA_PVC_NAME = os.environ.get("USERDATA_PVC_NAME", "")
|
||||
SAFE_THREAD_ID_PATTERN = r"^[A-Za-z0-9_\-]+$"
|
||||
|
||||
# Path to the kubeconfig *inside* the provisioner container.
|
||||
# Typically the host's ~/.kube/config is mounted here.
|
||||
KUBECONFIG_PATH = os.environ.get("KUBECONFIG_PATH", "/root/.kube/config")
|
||||
|
||||
# The hostname / IP that the *backend container* uses to reach NodePort
|
||||
# services on the host Kubernetes node. On Docker Desktop for macOS this
|
||||
# is ``host.docker.internal``; on Linux it may be the host's LAN IP.
|
||||
NODE_HOST = os.environ.get("NODE_HOST", "host.docker.internal")
|
||||
|
||||
|
||||
def join_host_path(base: str, *parts: str) -> str:
|
||||
"""Join host filesystem path segments while preserving native style."""
|
||||
if not parts:
|
||||
return base
|
||||
|
||||
if re.match(r"^[A-Za-z]:[\\/]", base) or base.startswith("\\\\") or "\\" in base:
|
||||
from pathlib import PureWindowsPath
|
||||
|
||||
result = PureWindowsPath(base)
|
||||
for part in parts:
|
||||
result /= part
|
||||
return str(result)
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
result = Path(base)
|
||||
for part in parts:
|
||||
result /= part
|
||||
return str(result)
|
||||
|
||||
|
||||
def _validate_thread_id(thread_id: str) -> str:
|
||||
if not re.match(SAFE_THREAD_ID_PATTERN, thread_id):
|
||||
raise ValueError(
|
||||
"Invalid thread_id: only alphanumeric characters, hyphens, and underscores are allowed."
|
||||
)
|
||||
return thread_id
|
||||
|
||||
|
||||
# ── K8s client setup ────────────────────────────────────────────────────
|
||||
|
||||
core_v1: k8s_client.CoreV1Api | None = None
|
||||
|
||||
|
||||
def _init_k8s_client() -> k8s_client.CoreV1Api:
|
||||
"""Load kubeconfig from the mounted host config and return a CoreV1Api.
|
||||
|
||||
Tries the mounted kubeconfig first, then falls back to in-cluster
|
||||
config (useful if the provisioner itself runs inside K8s).
|
||||
"""
|
||||
if os.path.exists(KUBECONFIG_PATH):
|
||||
if os.path.isdir(KUBECONFIG_PATH):
|
||||
raise RuntimeError(
|
||||
f"KUBECONFIG_PATH points to a directory, expected a file: {KUBECONFIG_PATH}"
|
||||
)
|
||||
try:
|
||||
k8s_config.load_kube_config(config_file=KUBECONFIG_PATH)
|
||||
logger.info(f"Loaded kubeconfig from {KUBECONFIG_PATH}")
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
f"Failed to load kubeconfig from {KUBECONFIG_PATH}: {exc}"
|
||||
) from exc
|
||||
else:
|
||||
logger.warning(
|
||||
f"Kubeconfig not found at {KUBECONFIG_PATH}; trying in-cluster config"
|
||||
)
|
||||
try:
|
||||
k8s_config.load_incluster_config()
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
"Failed to initialize Kubernetes client. "
|
||||
f"No kubeconfig at {KUBECONFIG_PATH}, and in-cluster config is unavailable: {exc}"
|
||||
) from exc
|
||||
|
||||
# When connecting from inside Docker to the host's K8s API, the
|
||||
# kubeconfig may reference ``localhost`` or ``127.0.0.1``. We
|
||||
# optionally rewrite the server address so it reaches the host.
|
||||
k8s_api_server = os.environ.get("K8S_API_SERVER")
|
||||
if k8s_api_server:
|
||||
configuration = k8s_client.Configuration.get_default_copy()
|
||||
configuration.host = k8s_api_server
|
||||
# Self-signed certs are common for local clusters
|
||||
configuration.verify_ssl = False
|
||||
api_client = k8s_client.ApiClient(configuration)
|
||||
return k8s_client.CoreV1Api(api_client)
|
||||
|
||||
return k8s_client.CoreV1Api()
|
||||
|
||||
|
||||
def _wait_for_kubeconfig(timeout: int = 30) -> None:
|
||||
"""Wait for kubeconfig file if configured, then continue with fallback support."""
|
||||
deadline = time.time() + timeout
|
||||
while time.time() < deadline:
|
||||
if os.path.exists(KUBECONFIG_PATH):
|
||||
if os.path.isfile(KUBECONFIG_PATH):
|
||||
logger.info(f"Found kubeconfig file at {KUBECONFIG_PATH}")
|
||||
return
|
||||
if os.path.isdir(KUBECONFIG_PATH):
|
||||
raise RuntimeError(
|
||||
"Kubeconfig path is a directory. "
|
||||
f"Please mount a kubeconfig file at {KUBECONFIG_PATH}."
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Kubeconfig path exists but is not a regular file: {KUBECONFIG_PATH}"
|
||||
)
|
||||
logger.info(f"Waiting for kubeconfig at {KUBECONFIG_PATH} …")
|
||||
time.sleep(2)
|
||||
logger.warning(
|
||||
f"Kubeconfig not found at {KUBECONFIG_PATH} after {timeout}s; "
|
||||
"will attempt in-cluster Kubernetes config"
|
||||
)
|
||||
|
||||
|
||||
def _ensure_namespace() -> None:
|
||||
"""Create the K8s namespace if it does not yet exist."""
|
||||
try:
|
||||
core_v1.read_namespace(K8S_NAMESPACE)
|
||||
logger.info(f"Namespace '{K8S_NAMESPACE}' already exists")
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
ns = k8s_client.V1Namespace(
|
||||
metadata=k8s_client.V1ObjectMeta(
|
||||
name=K8S_NAMESPACE,
|
||||
labels={
|
||||
"app.kubernetes.io/name": "deer-flow",
|
||||
"app.kubernetes.io/component": "sandbox",
|
||||
},
|
||||
)
|
||||
)
|
||||
core_v1.create_namespace(ns)
|
||||
logger.info(f"Created namespace '{K8S_NAMESPACE}'")
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
# ── FastAPI lifespan ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_app: FastAPI):
|
||||
global core_v1
|
||||
_wait_for_kubeconfig()
|
||||
core_v1 = _init_k8s_client()
|
||||
_ensure_namespace()
|
||||
logger.info("Provisioner is ready (using host Kubernetes)")
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(title="DeerFlow Sandbox Provisioner", lifespan=lifespan)
|
||||
|
||||
|
||||
# ── Request / Response models ───────────────────────────────────────────
|
||||
|
||||
|
||||
class CreateSandboxRequest(BaseModel):
|
||||
sandbox_id: str
|
||||
thread_id: str = Field(pattern=SAFE_THREAD_ID_PATTERN)
|
||||
|
||||
|
||||
class SandboxResponse(BaseModel):
|
||||
sandbox_id: str
|
||||
sandbox_url: str # Direct access URL, e.g. http://host.docker.internal:{NodePort}
|
||||
status: str
|
||||
|
||||
|
||||
# ── K8s resource helpers ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _pod_name(sandbox_id: str) -> str:
|
||||
return f"sandbox-{sandbox_id}"
|
||||
|
||||
|
||||
def _svc_name(sandbox_id: str) -> str:
|
||||
return f"sandbox-{sandbox_id}-svc"
|
||||
|
||||
|
||||
def _sandbox_url(node_port: int) -> str:
|
||||
"""Build the sandbox URL using the configured NODE_HOST."""
|
||||
return f"http://{NODE_HOST}:{node_port}"
|
||||
|
||||
|
||||
def _build_volumes(thread_id: str) -> list[k8s_client.V1Volume]:
|
||||
"""Build volume list: PVC when configured, otherwise hostPath."""
|
||||
if SKILLS_PVC_NAME:
|
||||
skills_vol = k8s_client.V1Volume(
|
||||
name="skills",
|
||||
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
|
||||
claim_name=SKILLS_PVC_NAME,
|
||||
read_only=True,
|
||||
),
|
||||
)
|
||||
else:
|
||||
skills_vol = k8s_client.V1Volume(
|
||||
name="skills",
|
||||
host_path=k8s_client.V1HostPathVolumeSource(
|
||||
path=SKILLS_HOST_PATH,
|
||||
type="Directory",
|
||||
),
|
||||
)
|
||||
|
||||
if USERDATA_PVC_NAME:
|
||||
userdata_vol = k8s_client.V1Volume(
|
||||
name="user-data",
|
||||
persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
|
||||
claim_name=USERDATA_PVC_NAME,
|
||||
),
|
||||
)
|
||||
else:
|
||||
userdata_vol = k8s_client.V1Volume(
|
||||
name="user-data",
|
||||
host_path=k8s_client.V1HostPathVolumeSource(
|
||||
path=join_host_path(THREADS_HOST_PATH, thread_id, "user-data"),
|
||||
type="DirectoryOrCreate",
|
||||
),
|
||||
)
|
||||
|
||||
return [skills_vol, userdata_vol]
|
||||
|
||||
|
||||
def _build_volume_mounts(thread_id: str) -> list[k8s_client.V1VolumeMount]:
|
||||
"""Build volume mount list, using subPath for PVC user-data."""
|
||||
userdata_mount = k8s_client.V1VolumeMount(
|
||||
name="user-data",
|
||||
mount_path="/mnt/user-data",
|
||||
read_only=False,
|
||||
)
|
||||
if USERDATA_PVC_NAME:
|
||||
userdata_mount.sub_path = f"threads/{thread_id}/user-data"
|
||||
|
||||
return [
|
||||
k8s_client.V1VolumeMount(
|
||||
name="skills",
|
||||
mount_path="/mnt/skills",
|
||||
read_only=True,
|
||||
),
|
||||
userdata_mount,
|
||||
]
|
||||
|
||||
|
||||
def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
|
||||
"""Construct a Pod manifest for a single sandbox."""
|
||||
thread_id = _validate_thread_id(thread_id)
|
||||
return k8s_client.V1Pod(
|
||||
metadata=k8s_client.V1ObjectMeta(
|
||||
name=_pod_name(sandbox_id),
|
||||
namespace=K8S_NAMESPACE,
|
||||
labels={
|
||||
"app": "deer-flow-sandbox",
|
||||
"sandbox-id": sandbox_id,
|
||||
"app.kubernetes.io/name": "deer-flow",
|
||||
"app.kubernetes.io/component": "sandbox",
|
||||
},
|
||||
),
|
||||
spec=k8s_client.V1PodSpec(
|
||||
containers=[
|
||||
k8s_client.V1Container(
|
||||
name="sandbox",
|
||||
image=SANDBOX_IMAGE,
|
||||
image_pull_policy="IfNotPresent",
|
||||
ports=[
|
||||
k8s_client.V1ContainerPort(
|
||||
name="http",
|
||||
container_port=8080,
|
||||
protocol="TCP",
|
||||
)
|
||||
],
|
||||
readiness_probe=k8s_client.V1Probe(
|
||||
http_get=k8s_client.V1HTTPGetAction(
|
||||
path="/v1/sandbox",
|
||||
port=8080,
|
||||
),
|
||||
initial_delay_seconds=5,
|
||||
period_seconds=5,
|
||||
timeout_seconds=3,
|
||||
failure_threshold=3,
|
||||
),
|
||||
liveness_probe=k8s_client.V1Probe(
|
||||
http_get=k8s_client.V1HTTPGetAction(
|
||||
path="/v1/sandbox",
|
||||
port=8080,
|
||||
),
|
||||
initial_delay_seconds=10,
|
||||
period_seconds=10,
|
||||
timeout_seconds=3,
|
||||
failure_threshold=3,
|
||||
),
|
||||
resources=k8s_client.V1ResourceRequirements(
|
||||
requests={
|
||||
"cpu": "100m",
|
||||
"memory": "256Mi",
|
||||
"ephemeral-storage": "500Mi",
|
||||
},
|
||||
limits={
|
||||
"cpu": "1000m",
|
||||
"memory": "1Gi",
|
||||
"ephemeral-storage": "500Mi",
|
||||
},
|
||||
),
|
||||
volume_mounts=_build_volume_mounts(thread_id),
|
||||
security_context=k8s_client.V1SecurityContext(
|
||||
privileged=False,
|
||||
allow_privilege_escalation=True,
|
||||
),
|
||||
)
|
||||
],
|
||||
volumes=_build_volumes(thread_id),
|
||||
restart_policy="Always",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _build_service(sandbox_id: str) -> k8s_client.V1Service:
|
||||
"""Construct a NodePort Service manifest (port auto-allocated by K8s)."""
|
||||
return k8s_client.V1Service(
|
||||
metadata=k8s_client.V1ObjectMeta(
|
||||
name=_svc_name(sandbox_id),
|
||||
namespace=K8S_NAMESPACE,
|
||||
labels={
|
||||
"app": "deer-flow-sandbox",
|
||||
"sandbox-id": sandbox_id,
|
||||
"app.kubernetes.io/name": "deer-flow",
|
||||
"app.kubernetes.io/component": "sandbox",
|
||||
},
|
||||
),
|
||||
spec=k8s_client.V1ServiceSpec(
|
||||
type="NodePort",
|
||||
ports=[
|
||||
k8s_client.V1ServicePort(
|
||||
name="http",
|
||||
port=8080,
|
||||
target_port=8080,
|
||||
protocol="TCP",
|
||||
# nodePort omitted → K8s auto-allocates from the range
|
||||
)
|
||||
],
|
||||
selector={
|
||||
"sandbox-id": sandbox_id,
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _get_node_port(sandbox_id: str) -> int | None:
|
||||
"""Read the K8s-allocated NodePort from the Service."""
|
||||
try:
|
||||
svc = core_v1.read_namespaced_service(_svc_name(sandbox_id), K8S_NAMESPACE)
|
||||
for port in svc.spec.ports or []:
|
||||
if port.name == "http":
|
||||
return port.node_port
|
||||
except ApiException:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _get_pod_phase(sandbox_id: str) -> str:
|
||||
"""Return the Pod phase (Pending / Running / Succeeded / Failed / Unknown)."""
|
||||
try:
|
||||
pod = core_v1.read_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
|
||||
return pod.status.phase or "Unknown"
|
||||
except ApiException:
|
||||
return "NotFound"
|
||||
|
||||
|
||||
# ── API endpoints ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""Provisioner health check."""
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.post("/api/sandboxes", response_model=SandboxResponse)
|
||||
async def create_sandbox(req: CreateSandboxRequest):
|
||||
"""Create a sandbox Pod + NodePort Service for *sandbox_id*.
|
||||
|
||||
If the sandbox already exists, returns the existing information
|
||||
(idempotent).
|
||||
"""
|
||||
sandbox_id = req.sandbox_id
|
||||
thread_id = req.thread_id
|
||||
|
||||
logger.info(
|
||||
f"Received request to create sandbox '{sandbox_id}' for thread '{thread_id}'"
|
||||
)
|
||||
|
||||
# ── Fast path: sandbox already exists ────────────────────────────
|
||||
existing_port = _get_node_port(sandbox_id)
|
||||
if existing_port:
|
||||
return SandboxResponse(
|
||||
sandbox_id=sandbox_id,
|
||||
sandbox_url=_sandbox_url(existing_port),
|
||||
status=_get_pod_phase(sandbox_id),
|
||||
)
|
||||
|
||||
# ── Create Pod ───────────────────────────────────────────────────
|
||||
try:
|
||||
core_v1.create_namespaced_pod(K8S_NAMESPACE, _build_pod(sandbox_id, thread_id))
|
||||
logger.info(f"Created Pod {_pod_name(sandbox_id)}")
|
||||
except ApiException as exc:
|
||||
if exc.status != 409: # 409 = AlreadyExists
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Pod creation failed: {exc.reason}"
|
||||
)
|
||||
|
||||
# ── Create Service ───────────────────────────────────────────────
|
||||
try:
|
||||
core_v1.create_namespaced_service(K8S_NAMESPACE, _build_service(sandbox_id))
|
||||
logger.info(f"Created Service {_svc_name(sandbox_id)}")
|
||||
except ApiException as exc:
|
||||
if exc.status != 409:
|
||||
# Roll back the Pod on failure
|
||||
try:
|
||||
core_v1.delete_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
|
||||
except ApiException:
|
||||
pass
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Service creation failed: {exc.reason}"
|
||||
)
|
||||
|
||||
# ── Read the auto-allocated NodePort ─────────────────────────────
|
||||
node_port: int | None = None
|
||||
for _ in range(20):
|
||||
node_port = _get_node_port(sandbox_id)
|
||||
if node_port:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
if not node_port:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="NodePort was not allocated in time"
|
||||
)
|
||||
|
||||
return SandboxResponse(
|
||||
sandbox_id=sandbox_id,
|
||||
sandbox_url=_sandbox_url(node_port),
|
||||
status=_get_pod_phase(sandbox_id),
|
||||
)
|
||||
|
||||
|
||||
@app.delete("/api/sandboxes/{sandbox_id}")
|
||||
async def destroy_sandbox(sandbox_id: str):
|
||||
"""Destroy a sandbox Pod + Service."""
|
||||
errors: list[str] = []
|
||||
|
||||
# Delete Service
|
||||
try:
|
||||
core_v1.delete_namespaced_service(_svc_name(sandbox_id), K8S_NAMESPACE)
|
||||
logger.info(f"Deleted Service {_svc_name(sandbox_id)}")
|
||||
except ApiException as exc:
|
||||
if exc.status != 404:
|
||||
errors.append(f"service: {exc.reason}")
|
||||
|
||||
# Delete Pod
|
||||
try:
|
||||
core_v1.delete_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
|
||||
logger.info(f"Deleted Pod {_pod_name(sandbox_id)}")
|
||||
except ApiException as exc:
|
||||
if exc.status != 404:
|
||||
errors.append(f"pod: {exc.reason}")
|
||||
|
||||
if errors:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Partial cleanup: {', '.join(errors)}"
|
||||
)
|
||||
|
||||
return {"ok": True, "sandbox_id": sandbox_id}
|
||||
|
||||
|
||||
@app.get("/api/sandboxes/{sandbox_id}", response_model=SandboxResponse)
|
||||
async def get_sandbox(sandbox_id: str):
|
||||
"""Return current status and URL for a sandbox."""
|
||||
node_port = _get_node_port(sandbox_id)
|
||||
if not node_port:
|
||||
raise HTTPException(status_code=404, detail=f"Sandbox '{sandbox_id}' not found")
|
||||
|
||||
return SandboxResponse(
|
||||
sandbox_id=sandbox_id,
|
||||
sandbox_url=_sandbox_url(node_port),
|
||||
status=_get_pod_phase(sandbox_id),
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/sandboxes")
|
||||
async def list_sandboxes():
|
||||
"""List every sandbox currently managed in the namespace."""
|
||||
try:
|
||||
services = core_v1.list_namespaced_service(
|
||||
K8S_NAMESPACE,
|
||||
label_selector="app=deer-flow-sandbox",
|
||||
)
|
||||
except ApiException as exc:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to list services: {exc.reason}"
|
||||
)
|
||||
|
||||
sandboxes: list[SandboxResponse] = []
|
||||
for svc in services.items:
|
||||
sid = (svc.metadata.labels or {}).get("sandbox-id")
|
||||
if not sid:
|
||||
continue
|
||||
node_port = None
|
||||
for port in svc.spec.ports or []:
|
||||
if port.name == "http":
|
||||
node_port = port.node_port
|
||||
break
|
||||
if node_port:
|
||||
sandboxes.append(
|
||||
SandboxResponse(
|
||||
sandbox_id=sid,
|
||||
sandbox_url=_sandbox_url(node_port),
|
||||
status=_get_pod_phase(sid),
|
||||
)
|
||||
)
|
||||
|
||||
return {"sandboxes": sandboxes, "count": len(sandboxes)}
|
||||
Reference in New Issue
Block a user