Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions
--- a/deer-flow/docker/docker-compose-dev.yaml
+++ b/deer-flow/docker/docker-compose-dev.yaml
@@ -0,0 +1,253 @@
+# DeerFlow Development Environment
+# Usage: docker-compose -f docker-compose-dev.yaml up --build
+#
+# Services:
+#   - nginx: Reverse proxy (port 2026)
+#   - frontend: Frontend Next.js dev server (port 3000)
+#   - gateway: Backend Gateway API (port 8001)
+#   - langgraph: LangGraph server (port 2024)
+#   - provisioner (optional): Sandbox provisioner (creates Pods in host Kubernetes)
+#
+# Prerequisites:
+#   - Kubernetes cluster + kubeconfig are only required when using provisioner mode.
+#
+# Access: http://localhost:2026
+
+services:
+  # ── Sandbox Provisioner ────────────────────────────────────────────────
+  # Manages per-sandbox Pod + Service lifecycle in the host Kubernetes
+  # cluster via the K8s API.
+  # Backend accesses sandboxes directly via host.docker.internal:{NodePort}.
+  provisioner:
+    build:
+      context: ./provisioner
+      dockerfile: Dockerfile
+      args:
+        APT_MIRROR: ${APT_MIRROR:-}
+    container_name: deer-flow-provisioner
+    volumes:
+      - ~/.kube/config:/root/.kube/config:ro
+    environment:
+      - K8S_NAMESPACE=deer-flow
+      - SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+      # Host paths for K8s HostPath volumes (must be absolute paths accessible by K8s node)
+      # On Docker Desktop/OrbStack, use your actual host paths like /Users/username/...
+      # Set these in your shell before running docker-compose:
+      #   export DEER_FLOW_ROOT=/absolute/path/to/deer-flow
+      - SKILLS_HOST_PATH=${DEER_FLOW_ROOT}/skills
+      - THREADS_HOST_PATH=${DEER_FLOW_ROOT}/backend/.deer-flow/threads
+      # Production: use PVC instead of hostPath to avoid data loss on node failure.
+      # When set, hostPath vars above are ignored for the corresponding volume.
+      # USERDATA_PVC_NAME uses subPath (threads/{thread_id}/user-data) automatically.
+      # - SKILLS_PVC_NAME=deer-flow-skills-pvc
+      # - USERDATA_PVC_NAME=deer-flow-userdata-pvc
+      - KUBECONFIG_PATH=/root/.kube/config
+      - NODE_HOST=host.docker.internal
+      # Override K8S API server URL since kubeconfig uses 127.0.0.1
+      # which is unreachable from inside the container
+      - K8S_API_SERVER=https://host.docker.internal:26443
+    env_file:
+      - ../.env
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    networks:
+      - deer-flow-dev
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 6
+      start_period: 15s
+
+  # ── Reverse Proxy ──────────────────────────────────────────────────────
+  # Routes API traffic to gateway/langgraph and (optionally) provisioner.
+  # LANGGRAPH_UPSTREAM and LANGGRAPH_REWRITE control gateway vs standard
+  # routing (processed by envsubst at container start).
+  nginx:
+    image: nginx:alpine
+    container_name: deer-flow-nginx
+    ports:
+      - "2026:2026"
+    volumes:
+      - ./nginx/nginx.conf:/etc/nginx/nginx.conf.template:ro
+    environment:
+      - LANGGRAPH_UPSTREAM=${LANGGRAPH_UPSTREAM:-langgraph:2024}
+      - LANGGRAPH_REWRITE=${LANGGRAPH_REWRITE:-/}
+    command:
+      - sh
+      - -c
+      - |
+        set -e
+        envsubst '$$LANGGRAPH_UPSTREAM $$LANGGRAPH_REWRITE' \
+          < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf
+        test -e /proc/net/if_inet6 || sed -i '/^[[:space:]]*listen[[:space:]]\+\[::\]:2026;/d' /etc/nginx/nginx.conf
+        exec nginx -g 'daemon off;'
+    depends_on:
+      - frontend
+      - gateway
+    networks:
+      - deer-flow-dev
+    restart: unless-stopped
+
+  # Frontend - Next.js Development Server
+  frontend:
+    build:
+      context: ../
+      dockerfile: frontend/Dockerfile
+      target: dev
+      args:
+        PNPM_STORE_PATH: ${PNPM_STORE_PATH:-/root/.local/share/pnpm/store}
+        NPM_REGISTRY: ${NPM_REGISTRY:-}
+    container_name: deer-flow-frontend
+    command: sh -c "cd frontend && pnpm run dev > /app/logs/frontend.log 2>&1"
+    volumes:
+      - ../frontend/src:/app/frontend/src
+      - ../frontend/public:/app/frontend/public
+      - ../frontend/next.config.js:/app/frontend/next.config.js:ro
+      - ../logs:/app/logs
+      # Mount pnpm store for caching
+      - ${PNPM_STORE_PATH:-~/.local/share/pnpm/store}:/root/.local/share/pnpm/store
+    working_dir: /app
+    environment:
+      - NODE_ENV=development
+      - WATCHPACK_POLLING=true
+      - CI=true
+      - DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://gateway:8001
+      - DEER_FLOW_INTERNAL_LANGGRAPH_BASE_URL=http://langgraph:2024
+    env_file:
+      - ../frontend/.env
+    networks:
+      - deer-flow-dev
+    restart: unless-stopped
+
+  # Backend - Gateway API
+  gateway:
+    build:
+      context: ../
+      dockerfile: backend/Dockerfile
+      target: dev
+      # cache_from disabled - requires manual setup: mkdir -p /tmp/docker-cache-gateway
+      args:
+        APT_MIRROR: ${APT_MIRROR:-}
+        UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
+        UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
+    container_name: deer-flow-gateway
+    command: sh -c "{ cd backend && (uv sync || (echo '[startup] uv sync failed; recreating .venv and retrying once' && uv venv --allow-existing .venv && uv sync)) && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --reload --reload-include='*.yaml .env'; } > /app/logs/gateway.log 2>&1"
+    volumes:
+      - ../backend/:/app/backend/
+      # Preserve the .venv built during Docker image build — mounting the full backend/
+      # directory above would otherwise shadow it with the (empty) host directory.
+      - gateway-venv:/app/backend/.venv
+      - ../config.yaml:/app/config.yaml
+      - ../extensions_config.json:/app/extensions_config.json
+      - ../skills:/app/skills
+      - ../logs:/app/logs
+      # Use a Docker-managed uv cache volume instead of a host bind mount.
+      # On macOS/Docker Desktop, uv may fail to create symlinks inside shared
+      # host directories, which causes startup-time `uv sync` to crash.
+      - gateway-uv-cache:/root/.cache/uv
+      # DooD: same as gateway — AioSandboxProvider runs inside LangGraph process.
+      - /var/run/docker.sock:/var/run/docker.sock
+      # CLI auth directories for auto-auth (Claude Code + Codex CLI)
+      - type: bind
+        source: ${HOME:?HOME must be set}/.claude
+        target: /root/.claude
+        read_only: true
+        bind:
+          create_host_path: true
+      - type: bind
+        source: ${HOME:?HOME must be set}/.codex
+        target: /root/.codex
+        read_only: true
+        bind:
+          create_host_path: true
+    working_dir: /app
+    environment:
+      - CI=true
+      - DEER_FLOW_HOME=/app/backend/.deer-flow
+      - DEER_FLOW_CHANNELS_LANGGRAPH_URL=${DEER_FLOW_CHANNELS_LANGGRAPH_URL:-http://langgraph:2024}
+      - DEER_FLOW_CHANNELS_GATEWAY_URL=${DEER_FLOW_CHANNELS_GATEWAY_URL:-http://gateway:8001}
+      - DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_ROOT}/backend/.deer-flow
+      - DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_ROOT}/skills
+      - DEER_FLOW_SANDBOX_HOST=host.docker.internal
+    env_file:
+      - ../.env
+    extra_hosts:
+      # For Linux: map host.docker.internal to host gateway
+      - "host.docker.internal:host-gateway"
+    networks:
+      - deer-flow-dev
+    restart: unless-stopped
+
+  # Backend - LangGraph Server
+  langgraph:
+    build:
+      context: ../
+      dockerfile: backend/Dockerfile
+      target: dev
+      # cache_from disabled - requires manual setup: mkdir -p /tmp/docker-cache-langgraph
+      args:
+        APT_MIRROR: ${APT_MIRROR:-}
+        UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
+        UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
+    container_name: deer-flow-langgraph
+    command: sh -c "cd backend && { (uv sync || (echo '[startup] uv sync failed; recreating .venv and retrying once' && uv venv --allow-existing .venv && uv sync)) && allow_blocking='' && if [ \"\${LANGGRAPH_ALLOW_BLOCKING:-0}\" = '1' ]; then allow_blocking='--allow-blocking'; fi && uv run langgraph dev --no-browser \${allow_blocking} --host 0.0.0.0 --port 2024 --n-jobs-per-worker \${LANGGRAPH_JOBS_PER_WORKER:-10}; } > /app/logs/langgraph.log 2>&1"
+    volumes:
+      - ../backend/:/app/backend/
+      # Preserve the .venv built during Docker image build — mounting the full backend/
+      # directory above would otherwise shadow it with the (empty) host directory.
+      - langgraph-venv:/app/backend/.venv
+      - ../config.yaml:/app/config.yaml
+      - ../extensions_config.json:/app/extensions_config.json
+      - ../skills:/app/skills
+      - ../logs:/app/logs
+      # Use a Docker-managed uv cache volume instead of a host bind mount.
+      # On macOS/Docker Desktop, uv may fail to create symlinks inside shared
+      # host directories, which causes startup-time `uv sync` to crash.
+      - langgraph-uv-cache:/root/.cache/uv
+      # DooD: same as gateway — AioSandboxProvider runs inside LangGraph process.
+      - /var/run/docker.sock:/var/run/docker.sock
+      # CLI auth directories for auto-auth (Claude Code + Codex CLI)
+      - type: bind
+        source: ${HOME:?HOME must be set}/.claude
+        target: /root/.claude
+        read_only: true
+        bind:
+          create_host_path: true
+      - type: bind
+        source: ${HOME:?HOME must be set}/.codex
+        target: /root/.codex
+        read_only: true
+        bind:
+          create_host_path: true
+    working_dir: /app
+    environment:
+      - CI=true
+      - DEER_FLOW_HOME=/app/backend/.deer-flow
+      - DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_ROOT}/backend/.deer-flow
+      - DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_ROOT}/skills
+      - DEER_FLOW_SANDBOX_HOST=host.docker.internal
+    env_file:
+      - ../.env
+    extra_hosts:
+      # For Linux: map host.docker.internal to host gateway
+      - "host.docker.internal:host-gateway"
+    networks:
+      - deer-flow-dev
+    restart: unless-stopped
+
+volumes:
+  # Persist .venv across container restarts so dependencies installed during
+  # image build are not shadowed by the host backend/ directory mount.
+  gateway-venv:
+  langgraph-venv:
+  gateway-uv-cache:
+  langgraph-uv-cache:
+
+networks:
+  deer-flow-dev:
+    driver: bridge
+    ipam:
+      config:
+        - subnet: 192.168.200.0/24
--- a/deer-flow/docker/docker-compose.yaml
+++ b/deer-flow/docker/docker-compose.yaml
@@ -0,0 +1,202 @@
+# DeerFlow Production Environment
+# Usage: make up
+#
+# Services:
+#   - nginx:       Reverse proxy (port 2026, configurable via PORT env var)
+#   - frontend:    Next.js production server
+#   - gateway:     FastAPI Gateway API
+#   - langgraph:   LangGraph production server (Dockerfile generated by langgraph dockerfile)
+#   - provisioner: (optional) Sandbox provisioner for Kubernetes mode
+#
+# Key environment variables (set via environment/.env or scripts/deploy.sh):
+#   DEER_FLOW_HOME                   — runtime data dir, default $REPO_ROOT/backend/.deer-flow
+#   DEER_FLOW_CONFIG_PATH            — path to config.yaml
+#   DEER_FLOW_EXTENSIONS_CONFIG_PATH — path to extensions_config.json
+#   DEER_FLOW_DOCKER_SOCKET          — Docker socket path, default /var/run/docker.sock
+#   DEER_FLOW_REPO_ROOT              — repo root (used for skills host path in DooD)
+#   BETTER_AUTH_SECRET               — required for frontend auth/session security
+#
+# LangSmith tracing is disabled by default (LANGSMITH_TRACING=false).
+# Set LANGSMITH_TRACING=true and LANGSMITH_API_KEY in .env to enable it.
+#
+# Access: http://localhost:${PORT:-2026}
+
+services:
+  # ── Reverse Proxy ──────────────────────────────────────────────────────────
+  nginx:
+    image: nginx:alpine
+    container_name: deer-flow-nginx
+    ports:
+      - "${PORT:-2026}:2026"
+    volumes:
+      - ./nginx/nginx.conf:/etc/nginx/nginx.conf.template:ro
+    environment:
+      - LANGGRAPH_UPSTREAM=${LANGGRAPH_UPSTREAM:-langgraph:2024}
+      - LANGGRAPH_REWRITE=${LANGGRAPH_REWRITE:-/}
+    command: >
+      sh -c "envsubst '$$LANGGRAPH_UPSTREAM $$LANGGRAPH_REWRITE'
+      < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf
+      && nginx -g 'daemon off;'"
+    depends_on:
+      - frontend
+      - gateway
+    networks:
+      - deer-flow
+    restart: unless-stopped
+
+  # ── Frontend: Next.js Production ───────────────────────────────────────────
+  frontend:
+    build:
+      context: ../
+      dockerfile: frontend/Dockerfile
+      target: prod
+      args:
+        PNPM_STORE_PATH: ${PNPM_STORE_PATH:-/root/.local/share/pnpm/store}
+        NPM_REGISTRY: ${NPM_REGISTRY:-}
+    container_name: deer-flow-frontend
+    environment:
+      - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET}
+      - DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://gateway:8001
+      - DEER_FLOW_INTERNAL_LANGGRAPH_BASE_URL=http://langgraph:2024
+    env_file:
+      - ../frontend/.env
+    networks:
+      - deer-flow
+    restart: unless-stopped
+
+  # ── Gateway API ────────────────────────────────────────────────────────────
+  gateway:
+    build:
+      context: ../
+      dockerfile: backend/Dockerfile
+      args:
+        APT_MIRROR: ${APT_MIRROR:-}
+        UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
+        UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
+    container_name: deer-flow-gateway
+    command: sh -c "cd backend && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --workers ${GATEWAY_WORKERS:-4}"
+    volumes:
+      - ${DEER_FLOW_CONFIG_PATH}:/app/backend/config.yaml:ro
+      - ${DEER_FLOW_EXTENSIONS_CONFIG_PATH}:/app/backend/extensions_config.json:ro
+      - ../skills:/app/skills:ro
+      - ${DEER_FLOW_HOME}:/app/backend/.deer-flow
+      # DooD: AioSandboxProvider starts sandbox containers via host Docker daemon
+      - ${DEER_FLOW_DOCKER_SOCKET}:/var/run/docker.sock
+      # CLI auth directories for auto-auth (Claude Code + Codex CLI)
+      - type: bind
+        source: ${HOME:?HOME must be set}/.claude
+        target: /root/.claude
+        read_only: true
+        bind:
+          create_host_path: true
+      - type: bind
+        source: ${HOME:?HOME must be set}/.codex
+        target: /root/.codex
+        read_only: true
+        bind:
+          create_host_path: true
+    working_dir: /app
+    environment:
+      - CI=true
+      - DEER_FLOW_HOME=/app/backend/.deer-flow
+      - DEER_FLOW_CONFIG_PATH=/app/backend/config.yaml
+      - DEER_FLOW_EXTENSIONS_CONFIG_PATH=/app/backend/extensions_config.json
+      - DEER_FLOW_CHANNELS_LANGGRAPH_URL=${DEER_FLOW_CHANNELS_LANGGRAPH_URL:-http://langgraph:2024}
+      - DEER_FLOW_CHANNELS_GATEWAY_URL=${DEER_FLOW_CHANNELS_GATEWAY_URL:-http://gateway:8001}
+      # DooD path/network translation
+      - DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_HOME}
+      - DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_REPO_ROOT}/skills
+      - DEER_FLOW_SANDBOX_HOST=host.docker.internal
+    env_file:
+      - ../.env
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    networks:
+      - deer-flow
+    restart: unless-stopped
+
+  # ── LangGraph Server ───────────────────────────────────────────────────────
+  # TODO: switch to langchain/langgraph-api (licensed) once a license key is available.
+  # For now, use `langgraph dev` (no license required) with the standard backend image.
+  langgraph:
+    build:
+      context: ../
+      dockerfile: backend/Dockerfile
+      args:
+        APT_MIRROR: ${APT_MIRROR:-}
+        UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
+        UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
+    container_name: deer-flow-langgraph
+    command: sh -c 'cd /app/backend && args="--no-browser --no-reload --host 0.0.0.0 --port 2024 --n-jobs-per-worker $${LANGGRAPH_JOBS_PER_WORKER:-10}" && if [ "$${LANGGRAPH_ALLOW_BLOCKING:-0}" = "1" ]; then args="$$args --allow-blocking"; fi && uv run langgraph dev $$args'
+    volumes:
+      - ${DEER_FLOW_CONFIG_PATH}:/app/backend/config.yaml:ro
+      - ${DEER_FLOW_EXTENSIONS_CONFIG_PATH}:/app/backend/extensions_config.json:ro
+      - ${DEER_FLOW_HOME}:/app/backend/.deer-flow
+      - ../skills:/app/skills:ro
+      - ../backend/.langgraph_api:/app/backend/.langgraph_api
+      # DooD: same as gateway
+      - ${DEER_FLOW_DOCKER_SOCKET}:/var/run/docker.sock
+      # CLI auth directories for auto-auth (Claude Code + Codex CLI)
+      - type: bind
+        source: ${HOME:?HOME must be set}/.claude
+        target: /root/.claude
+        read_only: true
+        bind:
+          create_host_path: true
+      - type: bind
+        source: ${HOME:?HOME must be set}/.codex
+        target: /root/.codex
+        read_only: true
+        bind:
+          create_host_path: true
+    environment:
+      - CI=true
+      - DEER_FLOW_HOME=/app/backend/.deer-flow
+      - DEER_FLOW_CONFIG_PATH=/app/backend/config.yaml
+      - DEER_FLOW_EXTENSIONS_CONFIG_PATH=/app/backend/extensions_config.json
+      - DEER_FLOW_HOST_BASE_DIR=${DEER_FLOW_HOME}
+      - DEER_FLOW_HOST_SKILLS_PATH=${DEER_FLOW_REPO_ROOT}/skills
+      - DEER_FLOW_SANDBOX_HOST=host.docker.internal
+      # LangSmith tracing: set LANGSMITH_TRACING=true and LANGSMITH_API_KEY in .env to enable.
+    env_file:
+      - ../.env
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    networks:
+      - deer-flow
+    restart: unless-stopped
+
+  # ── Sandbox Provisioner (optional, Kubernetes mode) ────────────────────────
+  provisioner:
+    build:
+      context: ./provisioner
+      dockerfile: Dockerfile
+      args:
+        APT_MIRROR: ${APT_MIRROR:-}
+        PIP_INDEX_URL: ${PIP_INDEX_URL:-}
+    container_name: deer-flow-provisioner
+    volumes:
+      - ~/.kube/config:/root/.kube/config:ro
+    environment:
+      - K8S_NAMESPACE=deer-flow
+      - SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+      - SKILLS_HOST_PATH=${DEER_FLOW_REPO_ROOT}/skills
+      - THREADS_HOST_PATH=${DEER_FLOW_HOME}/threads
+      - KUBECONFIG_PATH=/root/.kube/config
+      - NODE_HOST=host.docker.internal
+      - K8S_API_SERVER=https://host.docker.internal:26443
+    env_file:
+      - ../.env
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    networks:
+      - deer-flow
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 6
+networks:
+  deer-flow:
+    driver: bridge
--- a/deer-flow/docker/nginx/nginx.conf
+++ b/deer-flow/docker/nginx/nginx.conf
@@ -0,0 +1,236 @@
+events {
+    worker_connections 1024;
+}
+pid /tmp/nginx.pid;
+http {
+    # Basic settings
+    sendfile on;
+    tcp_nopush on;
+    tcp_nodelay on;
+    keepalive_timeout 65;
+    types_hash_max_size 2048;
+
+    # Logging
+    access_log /dev/stdout;
+    error_log /dev/stderr;
+
+    # Docker internal DNS (for resolving k3s hostname)
+    resolver 127.0.0.11 valid=10s ipv6=off;
+
+    # Upstream servers (using Docker service names)
+    # NOTE: `zone` and `resolve` are nginx Plus-only features and are not
+    # available in the standard nginx:alpine image. Docker's internal DNS
+    # (127.0.0.11) handles service discovery; upstreams are resolved at
+    # nginx startup and remain valid for the lifetime of the deployment.
+    upstream gateway {
+        server gateway:8001;
+    }
+
+    upstream langgraph {
+        server ${LANGGRAPH_UPSTREAM};
+    }
+
+    upstream frontend {
+        server frontend:3000;
+    }
+
+    # ── Main server (path-based routing) ─────────────────────────────────
+    server {
+        listen 2026 default_server;
+        listen [::]:2026 default_server;
+        server_name _;
+
+        # Hide CORS headers from upstream to prevent duplicates
+        proxy_hide_header 'Access-Control-Allow-Origin';
+        proxy_hide_header 'Access-Control-Allow-Methods';
+        proxy_hide_header 'Access-Control-Allow-Headers';
+        proxy_hide_header 'Access-Control-Allow-Credentials';
+
+        # CORS headers for all responses (nginx handles CORS centrally)
+        add_header 'Access-Control-Allow-Origin' '*' always;
+        add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always;
+        add_header 'Access-Control-Allow-Headers' '*' always;
+
+        # Handle OPTIONS requests (CORS preflight)
+        if ($request_method = 'OPTIONS') {
+            return 204;
+        }
+
+        # LangGraph API routes
+        # In standard mode: /api/langgraph/* → langgraph:2024 (rewrite to /*)
+        # In gateway mode:  /api/langgraph/* → gateway:8001  (rewrite to /api/*)
+        # Controlled by LANGGRAPH_UPSTREAM and LANGGRAPH_REWRITE env vars.
+        location /api/langgraph/ {
+            rewrite ^/api/langgraph/(.*) ${LANGGRAPH_REWRITE}$1 break;
+            proxy_pass http://langgraph;
+            proxy_http_version 1.1;
+
+            # Headers
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_set_header Connection '';
+
+            # SSE/Streaming support
+            proxy_buffering off;
+            proxy_cache off;
+            proxy_set_header X-Accel-Buffering no;
+
+            # Timeouts for long-running requests
+            proxy_connect_timeout 600s;
+            proxy_send_timeout 600s;
+            proxy_read_timeout 600s;
+
+            # Chunked transfer encoding
+            chunked_transfer_encoding on;
+        }
+
+        # Custom API: Models endpoint
+        location /api/models {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Memory endpoint
+        location /api/memory {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: MCP configuration endpoint
+        location /api/mcp {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Skills configuration endpoint
+        location /api/skills {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Agents endpoint
+        location /api/agents {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Uploads endpoint
+        location ~ ^/api/threads/[^/]+/uploads {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+
+            # Large file upload support
+            client_max_body_size 100M;
+            proxy_request_buffering off;
+        }
+
+        # Custom API: Other endpoints under /api/threads
+        location ~ ^/api/threads {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # API Documentation: Swagger UI
+        location /docs {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # API Documentation: ReDoc
+        location /redoc {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # API Documentation: OpenAPI Schema
+        location /openapi.json {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Health check endpoint (gateway)
+        location /health {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # ── Provisioner API (sandbox management) ────────────────────────
+        # Use a variable so nginx resolves provisioner at request time (not startup).
+        # This allows nginx to start even when provisioner container is not running.
+        location /api/sandboxes {
+            set $provisioner_upstream provisioner:8002;
+            proxy_pass http://$provisioner_upstream;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # All other requests go to frontend
+        location / {
+            proxy_pass http://frontend;
+            proxy_http_version 1.1;
+
+            # Headers
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection 'upgrade';
+            proxy_cache_bypass $http_upgrade;
+
+            # Timeouts
+            proxy_connect_timeout 600s;
+            proxy_send_timeout 600s;
+            proxy_read_timeout 600s;
+        }
+    }
+}
--- a/deer-flow/docker/nginx/nginx.local.conf
+++ b/deer-flow/docker/nginx/nginx.local.conf
@@ -0,0 +1,241 @@
+events {
+    worker_connections 1024;
+}
+pid logs/nginx.pid;
+http {
+    # Basic settings
+    sendfile on;
+    tcp_nopush on;
+    tcp_nodelay on;
+    keepalive_timeout 65;
+    types_hash_max_size 2048;
+
+    # Logging
+    access_log logs/nginx-access.log;
+    error_log logs/nginx-error.log;
+
+    # Upstream servers (using 127.0.0.1 for local development)
+    upstream gateway {
+        server 127.0.0.1:8001;
+    }
+
+    upstream langgraph {
+        server 127.0.0.1:2024;
+    }
+
+    upstream frontend {
+        server 127.0.0.1:3000;
+    }
+
+    server {
+        listen 2026;
+        listen [::]:2026;
+        server_name _;
+
+        # Hide CORS headers from upstream to prevent duplicates
+        proxy_hide_header 'Access-Control-Allow-Origin';
+        proxy_hide_header 'Access-Control-Allow-Methods';
+        proxy_hide_header 'Access-Control-Allow-Headers';
+        proxy_hide_header 'Access-Control-Allow-Credentials';
+
+        # CORS headers for all responses (nginx handles CORS centrally)
+        add_header 'Access-Control-Allow-Origin' '*' always;
+        add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always;
+        add_header 'Access-Control-Allow-Headers' '*' always;
+
+        # Handle OPTIONS requests (CORS preflight)
+        if ($request_method = 'OPTIONS') {
+            return 204;
+        }
+
+        # LangGraph API routes (served by langgraph dev)
+        # Rewrites /api/langgraph/* to /* before proxying to LangGraph server
+        location /api/langgraph/ {
+            rewrite ^/api/langgraph/(.*) /$1 break;
+            proxy_pass http://langgraph;
+            proxy_http_version 1.1;
+
+            # Headers
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_set_header Connection '';
+
+            # SSE/Streaming support
+            proxy_buffering off;
+            proxy_cache off;
+            proxy_set_header X-Accel-Buffering no;
+
+            # Timeouts for long-running requests
+            proxy_connect_timeout 600s;
+            proxy_send_timeout 600s;
+            proxy_read_timeout 600s;
+
+            # Chunked transfer encoding
+            chunked_transfer_encoding on;
+        }
+
+        # Experimental: Gateway-backed LangGraph-compatible API
+        # Frontend can opt-in via NEXT_PUBLIC_LANGGRAPH_BASE_URL=/api/langgraph-compat
+        location /api/langgraph-compat/ {
+            rewrite ^/api/langgraph-compat/(.*) /api/$1 break;
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+
+            # Headers
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_set_header Connection '';
+
+            # SSE/Streaming support
+            proxy_buffering off;
+            proxy_cache off;
+            proxy_set_header X-Accel-Buffering no;
+
+            # Timeouts for long-running requests
+            proxy_connect_timeout 600s;
+            proxy_send_timeout 600s;
+            proxy_read_timeout 600s;
+
+            # Chunked transfer encoding
+            chunked_transfer_encoding on;
+        }
+
+        # Custom API: Models endpoint
+        location /api/models {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Memory endpoint
+        location /api/memory {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: MCP configuration endpoint
+        location /api/mcp {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Skills configuration endpoint
+        location /api/skills {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Agents endpoint
+        location /api/agents {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Custom API: Uploads endpoint
+        location ~ ^/api/threads/[^/]+/uploads {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+
+            # Large file upload support
+            client_max_body_size 100M;
+            proxy_request_buffering off;
+        }
+
+        # Custom API: Other endpoints under /api/threads
+        location ~ ^/api/threads {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # API Documentation: Swagger UI
+        location /api/docs {
+            proxy_pass http://gateway/docs  ;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # API Documentation: ReDoc
+        location /api/redoc {
+            proxy_pass http://gateway/redoc;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # API Documentation: OpenAPI Schema
+        location /openapi.json {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # Health check endpoint (gateway)
+        location /health {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+
+        # All other requests go to frontend
+        location / {
+            proxy_pass http://frontend;
+            proxy_http_version 1.1;
+
+            # Headers
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection 'upgrade';
+            proxy_cache_bypass $http_upgrade;
+
+            # Timeouts
+            proxy_connect_timeout 600s;
+            proxy_send_timeout 600s;
+            proxy_read_timeout 600s;
+        }
+    }
+}
--- a/deer-flow/docker/provisioner/Dockerfile
+++ b/deer-flow/docker/provisioner/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.12-slim-bookworm
+
+ARG APT_MIRROR
+ARG PIP_INDEX_URL
+
+# Optionally override apt mirror for restricted networks (e.g. APT_MIRROR=mirrors.aliyun.com)
+RUN if [ -n "${APT_MIRROR}" ]; then \
+      sed -i "s|deb.debian.org|${APT_MIRROR}|g" /etc/apt/sources.list.d/debian.sources 2>/dev/null || true; \
+      sed -i "s|deb.debian.org|${APT_MIRROR}|g" /etc/apt/sources.list 2>/dev/null || true; \
+    fi
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+RUN pip install --no-cache-dir \
+    ${PIP_INDEX_URL:+--index-url "$PIP_INDEX_URL"} \
+    fastapi \
+    "uvicorn[standard]" \
+    kubernetes
+
+WORKDIR /app
+COPY app.py .
+
+EXPOSE 8002
+
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8002"]
--- a/deer-flow/docker/provisioner/README.md
+++ b/deer-flow/docker/provisioner/README.md
@@ -0,0 +1,332 @@
+# DeerFlow Sandbox Provisioner
+
+The **Sandbox Provisioner** is a FastAPI service that dynamically manages sandbox Pods in Kubernetes. It provides a REST API for the DeerFlow backend to create, monitor, and destroy isolated sandbox environments for code execution.
+
+## Architecture
+
+```
+┌────────────┐  HTTP  ┌─────────────┐  K8s API  ┌──────────────┐
+│  Backend   │ ─────▸ │ Provisioner │ ────────▸ │  Host K8s    │
+│  (gateway/ │        │   :8002     │           │  API Server  │
+│ langgraph) │        └─────────────┘           └──────┬───────┘
+└────────────┘                                          │ creates
+                                                        │
+                          ┌─────────────┐         ┌────▼─────┐
+                          │   Backend   │ ──────▸ │  Sandbox │
+                          │ (via Docker │ NodePort│  Pod(s)  │
+                          │   network)  │         └──────────┘
+                          └─────────────┘
+```
+
+### How It Works
+
+1. **Backend Request**: When the backend needs to execute code, it sends a `POST /api/sandboxes` request with a `sandbox_id` and `thread_id`.
+
+2. **Pod Creation**: The provisioner creates a dedicated Pod in the `deer-flow` namespace with:
+   - The sandbox container image (all-in-one-sandbox)
+   - HostPath volumes mounted for:
+     - `/mnt/skills` → Read-only access to public skills
+     - `/mnt/user-data` → Read-write access to thread-specific data
+   - Resource limits (CPU, memory, ephemeral storage)
+   - Readiness/liveness probes
+
+3. **Service Creation**: A NodePort Service is created to expose the Pod, with Kubernetes auto-allocating a port from the NodePort range (typically 30000-32767).
+
+4. **Access URL**: The provisioner returns `http://host.docker.internal:{NodePort}` to the backend, which the backend containers can reach directly.
+
+5. **Cleanup**: When the session ends, `DELETE /api/sandboxes/{sandbox_id}` removes both the Pod and Service.
+
+## Requirements
+
+Host machine with a running Kubernetes cluster (Docker Desktop K8s, OrbStack, minikube, kind, etc.)
+
+### Enable Kubernetes in Docker Desktop
+1. Open Docker Desktop settings
+2. Go to "Kubernetes" tab
+3. Check "Enable Kubernetes"
+4. Click "Apply & Restart"
+
+### Enable Kubernetes in OrbStack
+1. Open OrbStack settings
+2. Go to "Kubernetes" tab
+3. Check "Enable Kubernetes"
+
+## API Endpoints
+
+### `GET /health`
+Health check endpoint.
+
+**Response**:
+```json
+{
+  "status": "ok"
+}
+```
+
+### `POST /api/sandboxes`
+Create a new sandbox Pod + Service.
+
+**Request**:
+```json
+{
+  "sandbox_id": "abc-123",
+  "thread_id": "thread-456"
+}
+```
+
+**Response**:
+```json
+{
+  "sandbox_id": "abc-123",
+  "sandbox_url": "http://host.docker.internal:32123",
+  "status": "Pending"
+}
+```
+
+**Idempotent**: Calling with the same `sandbox_id` returns the existing sandbox info.
+
+### `GET /api/sandboxes/{sandbox_id}`
+Get status and URL of a specific sandbox.
+
+**Response**:
+```json
+{
+  "sandbox_id": "abc-123",
+  "sandbox_url": "http://host.docker.internal:32123",
+  "status": "Running"
+}
+```
+
+**Status Values**: `Pending`, `Running`, `Succeeded`, `Failed`, `Unknown`, `NotFound`
+
+### `DELETE /api/sandboxes/{sandbox_id}`
+Destroy a sandbox Pod + Service.
+
+**Response**:
+```json
+{
+  "ok": true,
+  "sandbox_id": "abc-123"
+}
+```
+
+### `GET /api/sandboxes`
+List all sandboxes currently managed.
+
+**Response**:
+```json
+{
+  "sandboxes": [
+    {
+      "sandbox_id": "abc-123",
+      "sandbox_url": "http://host.docker.internal:32123",
+      "status": "Running"
+    }
+  ],
+  "count": 1
+}
+```
+
+## Configuration
+
+The provisioner is configured via environment variables (set in [docker-compose-dev.yaml](../docker-compose-dev.yaml)):
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `K8S_NAMESPACE` | `deer-flow` | Kubernetes namespace for sandbox resources |
+| `SANDBOX_IMAGE` | `enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest` | Container image for sandbox Pods |
+| `SKILLS_HOST_PATH` | - | **Host machine** path to skills directory (must be absolute) |
+| `THREADS_HOST_PATH` | - | **Host machine** path to threads data directory (must be absolute) |
+| `SKILLS_PVC_NAME` | empty (use hostPath) | PVC name for skills volume; when set, sandbox Pods use PVC instead of hostPath |
+| `USERDATA_PVC_NAME` | empty (use hostPath) | PVC name for user-data volume; when set, uses PVC with `subPath: threads/{thread_id}/user-data` |
+| `KUBECONFIG_PATH` | `/root/.kube/config` | Path to kubeconfig **inside** the provisioner container |
+| `NODE_HOST` | `host.docker.internal` | Hostname that backend containers use to reach host NodePorts |
+| `K8S_API_SERVER` | (from kubeconfig) | Override K8s API server URL (e.g., `https://host.docker.internal:26443`) |
+
+### Important: K8S_API_SERVER Override
+
+If your kubeconfig uses `localhost`, `127.0.0.1`, or `0.0.0.0` as the API server address (common with OrbStack, minikube, kind), the provisioner **cannot** reach it from inside the Docker container. 
+
+**Solution**: Set `K8S_API_SERVER` to use `host.docker.internal`:
+
+```yaml
+# docker-compose-dev.yaml
+provisioner:
+  environment:
+    - K8S_API_SERVER=https://host.docker.internal:26443  # Replace 26443 with your API port
+```
+
+Check your kubeconfig API server:
+```bash
+kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'
+```
+
+## Prerequisites
+
+### Host Machine Requirements
+
+1. **Kubernetes Cluster**: 
+   - Docker Desktop with Kubernetes enabled, or
+   - OrbStack (built-in K8s), or
+   - minikube, kind, k3s, etc.
+
+2. **kubectl Configured**:
+   - `~/.kube/config` must exist and be valid
+   - Current context should point to your local cluster
+
+3. **Kubernetes Access**:
+   - The provisioner needs permissions to:
+     - Create/read/delete Pods in the `deer-flow` namespace
+     - Create/read/delete Services in the `deer-flow` namespace
+     - Read Namespaces (to create `deer-flow` if missing)
+
+4. **Host Paths**:
+   - The `SKILLS_HOST_PATH` and `THREADS_HOST_PATH` must be **absolute paths on the host machine**
+   - These paths are mounted into sandbox Pods via K8s HostPath volumes
+   - The paths must exist and be readable by the K8s node
+
+### Docker Compose Setup
+
+The provisioner runs as part of the docker-compose-dev stack:
+
+```bash
+# Start Docker services (provisioner starts only when config.yaml enables provisioner mode)
+make docker-start
+
+# Or start just the provisioner
+docker compose -p deer-flow-dev -f docker/docker-compose-dev.yaml up -d provisioner
+```
+
+The compose file:
+- Mounts your host's `~/.kube/config` into the container
+- Adds `extra_hosts` entry for `host.docker.internal` (required on Linux)
+- Configures environment variables for K8s access
+
+## Testing
+
+### Manual API Testing
+
+```bash
+# Health check
+curl http://localhost:8002/health
+
+# Create a sandbox (via provisioner container for internal DNS)
+docker exec deer-flow-provisioner curl -X POST http://localhost:8002/api/sandboxes \
+  -H "Content-Type: application/json" \
+  -d '{"sandbox_id":"test-001","thread_id":"thread-001"}'
+
+# Check sandbox status
+docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes/test-001
+
+# List all sandboxes
+docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes
+
+# Verify Pod and Service in K8s
+kubectl get pod,svc -n deer-flow -l sandbox-id=test-001
+
+# Delete sandbox
+docker exec deer-flow-provisioner curl -X DELETE http://localhost:8002/api/sandboxes/test-001
+```
+
+### Verify from Backend Containers
+
+Once a sandbox is created, the backend containers (gateway, langgraph) can access it:
+
+```bash
+# Get sandbox URL from provisioner
+SANDBOX_URL=$(docker exec deer-flow-provisioner curl -s http://localhost:8002/api/sandboxes/test-001 | jq -r .sandbox_url)
+
+# Test from gateway container
+docker exec deer-flow-gateway curl -s $SANDBOX_URL/v1/sandbox
+```
+
+## Troubleshooting
+
+### Issue: "Kubeconfig not found"
+
+**Cause**: The kubeconfig file doesn't exist at the mounted path.
+
+**Solution**: 
+- Ensure `~/.kube/config` exists on your host machine
+- Run `kubectl config view` to verify
+- Check the volume mount in docker-compose-dev.yaml
+
+### Issue: "Kubeconfig path is a directory"
+
+**Cause**: The mounted `KUBECONFIG_PATH` points to a directory instead of a file.
+
+**Solution**:
+- Ensure the compose mount source is a file (e.g., `~/.kube/config`) not a directory
+- Verify inside container:
+  ```bash
+  docker exec deer-flow-provisioner ls -ld /root/.kube/config
+  ```
+- Expected output should indicate a regular file (`-`), not a directory (`d`)
+
+### Issue: "Connection refused" to K8s API
+
+**Cause**: The provisioner can't reach the K8s API server.
+
+**Solution**:
+1. Check your kubeconfig server address:
+   ```bash
+   kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'
+   ```
+2. If it's `localhost` or `127.0.0.1`, set `K8S_API_SERVER`:
+   ```yaml
+   environment:
+     - K8S_API_SERVER=https://host.docker.internal:PORT
+   ```
+
+### Issue: "Unprocessable Entity" when creating Pod
+
+**Cause**: HostPath volumes contain invalid paths (e.g., relative paths with `..`).
+
+**Solution**: 
+- Use absolute paths for `SKILLS_HOST_PATH` and `THREADS_HOST_PATH`
+- Verify the paths exist on your host machine:
+  ```bash
+  ls -la /path/to/skills
+  ls -la /path/to/backend/.deer-flow/threads
+  ```
+
+### Issue: Pod stuck in "ContainerCreating"
+
+**Cause**: Usually pulling the sandbox image from the registry.
+
+**Solution**:
+- Pre-pull the image: `make docker-init`
+- Check Pod events: `kubectl describe pod sandbox-XXX -n deer-flow`
+- Check node: `kubectl get nodes`
+
+### Issue: Cannot access sandbox URL from backend
+
+**Cause**: NodePort not reachable or `NODE_HOST` misconfigured.
+
+**Solution**:
+- Verify the Service exists: `kubectl get svc -n deer-flow`
+- Test from host: `curl http://localhost:NODE_PORT/v1/sandbox`
+- Ensure `extra_hosts` is set in docker-compose (Linux)
+- Check `NODE_HOST` env var matches how backend reaches host
+
+## Security Considerations
+
+1. **HostPath Volumes**: The provisioner mounts host directories into sandbox Pods by default. Ensure these paths contain only trusted data. For production, prefer PVC-based volumes (set `SKILLS_PVC_NAME` and `USERDATA_PVC_NAME`) to avoid node-specific data loss risks.
+
+2. **Resource Limits**: Each sandbox Pod has CPU, memory, and storage limits to prevent resource exhaustion.
+
+3. **Network Isolation**: Sandbox Pods run in the `deer-flow` namespace but share the host's network namespace via NodePort. Consider NetworkPolicies for stricter isolation.
+
+4. **kubeconfig Access**: The provisioner has full access to your Kubernetes cluster via the mounted kubeconfig. Run it only in trusted environments.
+
+5. **Image Trust**: The sandbox image should come from a trusted registry. Review and audit the image contents.
+
+## Future Enhancements
+
+- [ ] Support for custom resource requests/limits per sandbox
+- [x] PersistentVolume support for larger data requirements
+- [ ] Automatic cleanup of stale sandboxes (timeout-based)
+- [ ] Metrics and monitoring (Prometheus integration)
+- [ ] Multi-cluster support (route to different K8s clusters)
+- [ ] Pod affinity/anti-affinity rules for better placement
+- [ ] NetworkPolicy templates for sandbox isolation
--- a/deer-flow/docker/provisioner/app.py
+++ b/deer-flow/docker/provisioner/app.py
@@ -0,0 +1,582 @@
+"""DeerFlow Sandbox Provisioner Service.
+
+Dynamically creates and manages per-sandbox Pods in Kubernetes.
+Each ``sandbox_id`` gets its own Pod + NodePort Service.  The backend
+accesses sandboxes directly via ``{NODE_HOST}:{NodePort}``.
+
+The provisioner connects to the host machine's Kubernetes cluster via a
+mounted kubeconfig (``~/.kube/config``).  Sandbox Pods run on the host
+K8s and are accessed by the backend via ``{NODE_HOST}:{NodePort}``.
+
+Endpoints:
+    POST   /api/sandboxes              — Create a sandbox Pod + Service
+    DELETE /api/sandboxes/{sandbox_id} — Destroy a sandbox Pod + Service
+    GET    /api/sandboxes/{sandbox_id} — Get sandbox status & URL
+    GET    /api/sandboxes              — List all sandboxes
+    GET    /health                     — Provisioner health check
+
+Architecture (docker-compose-dev):
+    ┌────────────┐  HTTP  ┌─────────────┐  K8s API  ┌──────────────┐
+    │ remote     │ ─────▸ │ provisioner │ ────────▸ │  host K8s    │
+    │ _backend   │        │ :8002       │           │  API server  │
+    └────────────┘        └─────────────┘           └──────┬───────┘
+                                                           │ creates
+                          ┌─────────────┐           ┌──────▼───────┐
+                          │   backend   │ ────────▸ │   sandbox    │
+                          │             │  direct   │   Pod(s)     │
+                          └─────────────┘ NodePort  └──────────────┘
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+import time
+from contextlib import asynccontextmanager
+
+import urllib3
+from fastapi import FastAPI, HTTPException
+from kubernetes import client as k8s_client
+from kubernetes import config as k8s_config
+from kubernetes.client.rest import ApiException
+from pydantic import BaseModel, Field
+
+# Suppress only the InsecureRequestWarning from urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+
+# ── Configuration (all tuneable via environment variables) ───────────────
+
+K8S_NAMESPACE = os.environ.get("K8S_NAMESPACE", "deer-flow")
+SANDBOX_IMAGE = os.environ.get(
+    "SANDBOX_IMAGE",
+    "enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest",
+)
+SKILLS_HOST_PATH = os.environ.get("SKILLS_HOST_PATH", "/skills")
+THREADS_HOST_PATH = os.environ.get("THREADS_HOST_PATH", "/.deer-flow/threads")
+SKILLS_PVC_NAME = os.environ.get("SKILLS_PVC_NAME", "")
+USERDATA_PVC_NAME = os.environ.get("USERDATA_PVC_NAME", "")
+SAFE_THREAD_ID_PATTERN = r"^[A-Za-z0-9_\-]+$"
+
+# Path to the kubeconfig *inside* the provisioner container.
+# Typically the host's ~/.kube/config is mounted here.
+KUBECONFIG_PATH = os.environ.get("KUBECONFIG_PATH", "/root/.kube/config")
+
+# The hostname / IP that the *backend container* uses to reach NodePort
+# services on the host Kubernetes node.  On Docker Desktop for macOS this
+# is ``host.docker.internal``; on Linux it may be the host's LAN IP.
+NODE_HOST = os.environ.get("NODE_HOST", "host.docker.internal")
+
+
+def join_host_path(base: str, *parts: str) -> str:
+    """Join host filesystem path segments while preserving native style."""
+    if not parts:
+        return base
+
+    if re.match(r"^[A-Za-z]:[\\/]", base) or base.startswith("\\\\") or "\\" in base:
+        from pathlib import PureWindowsPath
+
+        result = PureWindowsPath(base)
+        for part in parts:
+            result /= part
+        return str(result)
+
+    from pathlib import Path
+
+    result = Path(base)
+    for part in parts:
+        result /= part
+    return str(result)
+
+
+def _validate_thread_id(thread_id: str) -> str:
+    if not re.match(SAFE_THREAD_ID_PATTERN, thread_id):
+        raise ValueError(
+            "Invalid thread_id: only alphanumeric characters, hyphens, and underscores are allowed."
+        )
+    return thread_id
+
+
+# ── K8s client setup ────────────────────────────────────────────────────
+
+core_v1: k8s_client.CoreV1Api | None = None
+
+
+def _init_k8s_client() -> k8s_client.CoreV1Api:
+    """Load kubeconfig from the mounted host config and return a CoreV1Api.
+
+    Tries the mounted kubeconfig first, then falls back to in-cluster
+    config (useful if the provisioner itself runs inside K8s).
+    """
+    if os.path.exists(KUBECONFIG_PATH):
+        if os.path.isdir(KUBECONFIG_PATH):
+            raise RuntimeError(
+                f"KUBECONFIG_PATH points to a directory, expected a file: {KUBECONFIG_PATH}"
+            )
+        try:
+            k8s_config.load_kube_config(config_file=KUBECONFIG_PATH)
+            logger.info(f"Loaded kubeconfig from {KUBECONFIG_PATH}")
+        except Exception as exc:
+            raise RuntimeError(
+                f"Failed to load kubeconfig from {KUBECONFIG_PATH}: {exc}"
+            ) from exc
+    else:
+        logger.warning(
+            f"Kubeconfig not found at {KUBECONFIG_PATH}; trying in-cluster config"
+        )
+        try:
+            k8s_config.load_incluster_config()
+        except Exception as exc:
+            raise RuntimeError(
+                "Failed to initialize Kubernetes client. "
+                f"No kubeconfig at {KUBECONFIG_PATH}, and in-cluster config is unavailable: {exc}"
+            ) from exc
+
+    # When connecting from inside Docker to the host's K8s API, the
+    # kubeconfig may reference ``localhost`` or ``127.0.0.1``.  We
+    # optionally rewrite the server address so it reaches the host.
+    k8s_api_server = os.environ.get("K8S_API_SERVER")
+    if k8s_api_server:
+        configuration = k8s_client.Configuration.get_default_copy()
+        configuration.host = k8s_api_server
+        # Self-signed certs are common for local clusters
+        configuration.verify_ssl = False
+        api_client = k8s_client.ApiClient(configuration)
+        return k8s_client.CoreV1Api(api_client)
+
+    return k8s_client.CoreV1Api()
+
+
+def _wait_for_kubeconfig(timeout: int = 30) -> None:
+    """Wait for kubeconfig file if configured, then continue with fallback support."""
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        if os.path.exists(KUBECONFIG_PATH):
+            if os.path.isfile(KUBECONFIG_PATH):
+                logger.info(f"Found kubeconfig file at {KUBECONFIG_PATH}")
+                return
+            if os.path.isdir(KUBECONFIG_PATH):
+                raise RuntimeError(
+                    "Kubeconfig path is a directory. "
+                    f"Please mount a kubeconfig file at {KUBECONFIG_PATH}."
+                )
+            raise RuntimeError(
+                f"Kubeconfig path exists but is not a regular file: {KUBECONFIG_PATH}"
+            )
+        logger.info(f"Waiting for kubeconfig at {KUBECONFIG_PATH} …")
+        time.sleep(2)
+    logger.warning(
+        f"Kubeconfig not found at {KUBECONFIG_PATH} after {timeout}s; "
+        "will attempt in-cluster Kubernetes config"
+    )
+
+
+def _ensure_namespace() -> None:
+    """Create the K8s namespace if it does not yet exist."""
+    try:
+        core_v1.read_namespace(K8S_NAMESPACE)
+        logger.info(f"Namespace '{K8S_NAMESPACE}' already exists")
+    except ApiException as exc:
+        if exc.status == 404:
+            ns = k8s_client.V1Namespace(
+                metadata=k8s_client.V1ObjectMeta(
+                    name=K8S_NAMESPACE,
+                    labels={
+                        "app.kubernetes.io/name": "deer-flow",
+                        "app.kubernetes.io/component": "sandbox",
+                    },
+                )
+            )
+            core_v1.create_namespace(ns)
+            logger.info(f"Created namespace '{K8S_NAMESPACE}'")
+        else:
+            raise
+
+
+# ── FastAPI lifespan ─────────────────────────────────────────────────────
+
+
+@asynccontextmanager
+async def lifespan(_app: FastAPI):
+    global core_v1
+    _wait_for_kubeconfig()
+    core_v1 = _init_k8s_client()
+    _ensure_namespace()
+    logger.info("Provisioner is ready (using host Kubernetes)")
+    yield
+
+
+app = FastAPI(title="DeerFlow Sandbox Provisioner", lifespan=lifespan)
+
+
+# ── Request / Response models ───────────────────────────────────────────
+
+
+class CreateSandboxRequest(BaseModel):
+    sandbox_id: str
+    thread_id: str = Field(pattern=SAFE_THREAD_ID_PATTERN)
+
+
+class SandboxResponse(BaseModel):
+    sandbox_id: str
+    sandbox_url: str  # Direct access URL, e.g. http://host.docker.internal:{NodePort}
+    status: str
+
+
+# ── K8s resource helpers ─────────────────────────────────────────────────
+
+
+def _pod_name(sandbox_id: str) -> str:
+    return f"sandbox-{sandbox_id}"
+
+
+def _svc_name(sandbox_id: str) -> str:
+    return f"sandbox-{sandbox_id}-svc"
+
+
+def _sandbox_url(node_port: int) -> str:
+    """Build the sandbox URL using the configured NODE_HOST."""
+    return f"http://{NODE_HOST}:{node_port}"
+
+
+def _build_volumes(thread_id: str) -> list[k8s_client.V1Volume]:
+    """Build volume list: PVC when configured, otherwise hostPath."""
+    if SKILLS_PVC_NAME:
+        skills_vol = k8s_client.V1Volume(
+            name="skills",
+            persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
+                claim_name=SKILLS_PVC_NAME,
+                read_only=True,
+            ),
+        )
+    else:
+        skills_vol = k8s_client.V1Volume(
+            name="skills",
+            host_path=k8s_client.V1HostPathVolumeSource(
+                path=SKILLS_HOST_PATH,
+                type="Directory",
+            ),
+        )
+
+    if USERDATA_PVC_NAME:
+        userdata_vol = k8s_client.V1Volume(
+            name="user-data",
+            persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(
+                claim_name=USERDATA_PVC_NAME,
+            ),
+        )
+    else:
+        userdata_vol = k8s_client.V1Volume(
+            name="user-data",
+            host_path=k8s_client.V1HostPathVolumeSource(
+                path=join_host_path(THREADS_HOST_PATH, thread_id, "user-data"),
+                type="DirectoryOrCreate",
+            ),
+        )
+
+    return [skills_vol, userdata_vol]
+
+
+def _build_volume_mounts(thread_id: str) -> list[k8s_client.V1VolumeMount]:
+    """Build volume mount list, using subPath for PVC user-data."""
+    userdata_mount = k8s_client.V1VolumeMount(
+        name="user-data",
+        mount_path="/mnt/user-data",
+        read_only=False,
+    )
+    if USERDATA_PVC_NAME:
+        userdata_mount.sub_path = f"threads/{thread_id}/user-data"
+
+    return [
+        k8s_client.V1VolumeMount(
+            name="skills",
+            mount_path="/mnt/skills",
+            read_only=True,
+        ),
+        userdata_mount,
+    ]
+
+
+def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
+    """Construct a Pod manifest for a single sandbox."""
+    thread_id = _validate_thread_id(thread_id)
+    return k8s_client.V1Pod(
+        metadata=k8s_client.V1ObjectMeta(
+            name=_pod_name(sandbox_id),
+            namespace=K8S_NAMESPACE,
+            labels={
+                "app": "deer-flow-sandbox",
+                "sandbox-id": sandbox_id,
+                "app.kubernetes.io/name": "deer-flow",
+                "app.kubernetes.io/component": "sandbox",
+            },
+        ),
+        spec=k8s_client.V1PodSpec(
+            containers=[
+                k8s_client.V1Container(
+                    name="sandbox",
+                    image=SANDBOX_IMAGE,
+                    image_pull_policy="IfNotPresent",
+                    ports=[
+                        k8s_client.V1ContainerPort(
+                            name="http",
+                            container_port=8080,
+                            protocol="TCP",
+                        )
+                    ],
+                    readiness_probe=k8s_client.V1Probe(
+                        http_get=k8s_client.V1HTTPGetAction(
+                            path="/v1/sandbox",
+                            port=8080,
+                        ),
+                        initial_delay_seconds=5,
+                        period_seconds=5,
+                        timeout_seconds=3,
+                        failure_threshold=3,
+                    ),
+                    liveness_probe=k8s_client.V1Probe(
+                        http_get=k8s_client.V1HTTPGetAction(
+                            path="/v1/sandbox",
+                            port=8080,
+                        ),
+                        initial_delay_seconds=10,
+                        period_seconds=10,
+                        timeout_seconds=3,
+                        failure_threshold=3,
+                    ),
+                    resources=k8s_client.V1ResourceRequirements(
+                        requests={
+                            "cpu": "100m",
+                            "memory": "256Mi",
+                            "ephemeral-storage": "500Mi",
+                        },
+                        limits={
+                            "cpu": "1000m",
+                            "memory": "1Gi",
+                            "ephemeral-storage": "500Mi",
+                        },
+                    ),
+                    volume_mounts=_build_volume_mounts(thread_id),
+                    security_context=k8s_client.V1SecurityContext(
+                        privileged=False,
+                        allow_privilege_escalation=True,
+                    ),
+                )
+            ],
+            volumes=_build_volumes(thread_id),
+            restart_policy="Always",
+        ),
+    )
+
+
+def _build_service(sandbox_id: str) -> k8s_client.V1Service:
+    """Construct a NodePort Service manifest (port auto-allocated by K8s)."""
+    return k8s_client.V1Service(
+        metadata=k8s_client.V1ObjectMeta(
+            name=_svc_name(sandbox_id),
+            namespace=K8S_NAMESPACE,
+            labels={
+                "app": "deer-flow-sandbox",
+                "sandbox-id": sandbox_id,
+                "app.kubernetes.io/name": "deer-flow",
+                "app.kubernetes.io/component": "sandbox",
+            },
+        ),
+        spec=k8s_client.V1ServiceSpec(
+            type="NodePort",
+            ports=[
+                k8s_client.V1ServicePort(
+                    name="http",
+                    port=8080,
+                    target_port=8080,
+                    protocol="TCP",
+                    # nodePort omitted → K8s auto-allocates from the range
+                )
+            ],
+            selector={
+                "sandbox-id": sandbox_id,
+            },
+        ),
+    )
+
+
+def _get_node_port(sandbox_id: str) -> int | None:
+    """Read the K8s-allocated NodePort from the Service."""
+    try:
+        svc = core_v1.read_namespaced_service(_svc_name(sandbox_id), K8S_NAMESPACE)
+        for port in svc.spec.ports or []:
+            if port.name == "http":
+                return port.node_port
+    except ApiException:
+        pass
+    return None
+
+
+def _get_pod_phase(sandbox_id: str) -> str:
+    """Return the Pod phase (Pending / Running / Succeeded / Failed / Unknown)."""
+    try:
+        pod = core_v1.read_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
+        return pod.status.phase or "Unknown"
+    except ApiException:
+        return "NotFound"
+
+
+# ── API endpoints ────────────────────────────────────────────────────────
+
+
+@app.get("/health")
+async def health():
+    """Provisioner health check."""
+    return {"status": "ok"}
+
+
+@app.post("/api/sandboxes", response_model=SandboxResponse)
+async def create_sandbox(req: CreateSandboxRequest):
+    """Create a sandbox Pod + NodePort Service for *sandbox_id*.
+
+    If the sandbox already exists, returns the existing information
+    (idempotent).
+    """
+    sandbox_id = req.sandbox_id
+    thread_id = req.thread_id
+
+    logger.info(
+        f"Received request to create sandbox '{sandbox_id}' for thread '{thread_id}'"
+    )
+
+    # ── Fast path: sandbox already exists ────────────────────────────
+    existing_port = _get_node_port(sandbox_id)
+    if existing_port:
+        return SandboxResponse(
+            sandbox_id=sandbox_id,
+            sandbox_url=_sandbox_url(existing_port),
+            status=_get_pod_phase(sandbox_id),
+        )
+
+    # ── Create Pod ───────────────────────────────────────────────────
+    try:
+        core_v1.create_namespaced_pod(K8S_NAMESPACE, _build_pod(sandbox_id, thread_id))
+        logger.info(f"Created Pod {_pod_name(sandbox_id)}")
+    except ApiException as exc:
+        if exc.status != 409:  # 409 = AlreadyExists
+            raise HTTPException(
+                status_code=500, detail=f"Pod creation failed: {exc.reason}"
+            )
+
+    # ── Create Service ───────────────────────────────────────────────
+    try:
+        core_v1.create_namespaced_service(K8S_NAMESPACE, _build_service(sandbox_id))
+        logger.info(f"Created Service {_svc_name(sandbox_id)}")
+    except ApiException as exc:
+        if exc.status != 409:
+            # Roll back the Pod on failure
+            try:
+                core_v1.delete_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
+            except ApiException:
+                pass
+            raise HTTPException(
+                status_code=500, detail=f"Service creation failed: {exc.reason}"
+            )
+
+    # ── Read the auto-allocated NodePort ─────────────────────────────
+    node_port: int | None = None
+    for _ in range(20):
+        node_port = _get_node_port(sandbox_id)
+        if node_port:
+            break
+        time.sleep(0.5)
+
+    if not node_port:
+        raise HTTPException(
+            status_code=500, detail="NodePort was not allocated in time"
+        )
+
+    return SandboxResponse(
+        sandbox_id=sandbox_id,
+        sandbox_url=_sandbox_url(node_port),
+        status=_get_pod_phase(sandbox_id),
+    )
+
+
+@app.delete("/api/sandboxes/{sandbox_id}")
+async def destroy_sandbox(sandbox_id: str):
+    """Destroy a sandbox Pod + Service."""
+    errors: list[str] = []
+
+    # Delete Service
+    try:
+        core_v1.delete_namespaced_service(_svc_name(sandbox_id), K8S_NAMESPACE)
+        logger.info(f"Deleted Service {_svc_name(sandbox_id)}")
+    except ApiException as exc:
+        if exc.status != 404:
+            errors.append(f"service: {exc.reason}")
+
+    # Delete Pod
+    try:
+        core_v1.delete_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
+        logger.info(f"Deleted Pod {_pod_name(sandbox_id)}")
+    except ApiException as exc:
+        if exc.status != 404:
+            errors.append(f"pod: {exc.reason}")
+
+    if errors:
+        raise HTTPException(
+            status_code=500, detail=f"Partial cleanup: {', '.join(errors)}"
+        )
+
+    return {"ok": True, "sandbox_id": sandbox_id}
+
+
+@app.get("/api/sandboxes/{sandbox_id}", response_model=SandboxResponse)
+async def get_sandbox(sandbox_id: str):
+    """Return current status and URL for a sandbox."""
+    node_port = _get_node_port(sandbox_id)
+    if not node_port:
+        raise HTTPException(status_code=404, detail=f"Sandbox '{sandbox_id}' not found")
+
+    return SandboxResponse(
+        sandbox_id=sandbox_id,
+        sandbox_url=_sandbox_url(node_port),
+        status=_get_pod_phase(sandbox_id),
+    )
+
+
+@app.get("/api/sandboxes")
+async def list_sandboxes():
+    """List every sandbox currently managed in the namespace."""
+    try:
+        services = core_v1.list_namespaced_service(
+            K8S_NAMESPACE,
+            label_selector="app=deer-flow-sandbox",
+        )
+    except ApiException as exc:
+        raise HTTPException(
+            status_code=500, detail=f"Failed to list services: {exc.reason}"
+        )
+
+    sandboxes: list[SandboxResponse] = []
+    for svc in services.items:
+        sid = (svc.metadata.labels or {}).get("sandbox-id")
+        if not sid:
+            continue
+        node_port = None
+        for port in svc.spec.ports or []:
+            if port.name == "http":
+                node_port = port.node_port
+                break
+        if node_port:
+            sandboxes.append(
+                SandboxResponse(
+                    sandbox_id=sid,
+                    sandbox_url=_sandbox_url(node_port),
+                    status=_get_pod_phase(sid),
+                )
+            )
+
+    return {"sandboxes": sandboxes, "count": len(sandboxes)}