Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
This commit is contained in:
234
deer-flow/skills/public/claude-to-deerflow/scripts/chat.sh
Executable file
234
deer-flow/skills/public/claude-to-deerflow/scripts/chat.sh
Executable file
@@ -0,0 +1,234 @@
|
||||
#!/usr/bin/env bash
|
||||
# chat.sh — Send a message to DeerFlow and collect the streaming response.
|
||||
#
|
||||
# Usage:
|
||||
# bash chat.sh "Your question here"
|
||||
# bash chat.sh "Your question" <thread_id> # continue conversation
|
||||
# bash chat.sh "Your question" "" pro # specify mode
|
||||
# DEERFLOW_URL=http://host:2026 bash chat.sh "hi" # custom endpoint
|
||||
#
|
||||
# Environment variables:
|
||||
# DEERFLOW_URL — Unified proxy base URL (default: http://localhost:2026)
|
||||
# DEERFLOW_GATEWAY_URL — Gateway API base URL (default: $DEERFLOW_URL)
|
||||
# DEERFLOW_LANGGRAPH_URL — LangGraph API base URL (default: $DEERFLOW_URL/api/langgraph)
|
||||
#
|
||||
# Modes: flash, standard, pro (default), ultra
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DEERFLOW_URL="${DEERFLOW_URL:-http://localhost:2026}"
|
||||
GATEWAY_URL="${DEERFLOW_GATEWAY_URL:-$DEERFLOW_URL}"
|
||||
LANGGRAPH_URL="${DEERFLOW_LANGGRAPH_URL:-$DEERFLOW_URL/api/langgraph}"
|
||||
MESSAGE="${1:?Usage: chat.sh <message> [thread_id] [mode]}"
|
||||
THREAD_ID="${2:-}"
|
||||
MODE="${3:-pro}"
|
||||
|
||||
# --- Health check ---
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${GATEWAY_URL}/health" 2>/dev/null || echo "000")
|
||||
if [ "$HTTP_CODE" = "000" ] || [ "$HTTP_CODE" -ge 400 ]; then
|
||||
echo "ERROR: DeerFlow is not reachable at ${GATEWAY_URL} (HTTP ${HTTP_CODE})" >&2
|
||||
echo "Make sure DeerFlow is running. Start it with: cd <deerflow-dir> && make dev" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Create or reuse thread ---
|
||||
if [ -z "$THREAD_ID" ]; then
|
||||
THREAD_RESP=$(curl -s -X POST "${LANGGRAPH_URL}/threads" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{}')
|
||||
THREAD_ID=$(echo "$THREAD_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['thread_id'])" 2>/dev/null)
|
||||
if [ -z "$THREAD_ID" ]; then
|
||||
echo "ERROR: Failed to create thread. Response: ${THREAD_RESP}" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Thread: ${THREAD_ID}" >&2
|
||||
fi
|
||||
|
||||
# --- Build context based on mode ---
|
||||
case "$MODE" in
|
||||
flash)
|
||||
CONTEXT='{"thinking_enabled":false,"is_plan_mode":false,"subagent_enabled":false,"thread_id":"'"$THREAD_ID"'"}'
|
||||
;;
|
||||
standard)
|
||||
CONTEXT='{"thinking_enabled":true,"is_plan_mode":false,"subagent_enabled":false,"thread_id":"'"$THREAD_ID"'"}'
|
||||
;;
|
||||
pro)
|
||||
CONTEXT='{"thinking_enabled":true,"is_plan_mode":true,"subagent_enabled":false,"thread_id":"'"$THREAD_ID"'"}'
|
||||
;;
|
||||
ultra)
|
||||
CONTEXT='{"thinking_enabled":true,"is_plan_mode":true,"subagent_enabled":true,"thread_id":"'"$THREAD_ID"'"}'
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown mode '${MODE}'. Use: flash, standard, pro, ultra" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# --- Escape message for JSON ---
|
||||
ESCAPED_MSG=$(python3 -c "import json,sys; print(json.dumps(sys.argv[1]))" "$MESSAGE")
|
||||
|
||||
# --- Build request body ---
|
||||
BODY=$(cat <<ENDJSON
|
||||
{
|
||||
"assistant_id": "lead_agent",
|
||||
"input": {
|
||||
"messages": [
|
||||
{
|
||||
"type": "human",
|
||||
"content": [{"type": "text", "text": ${ESCAPED_MSG}}]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stream_mode": ["values", "messages-tuple"],
|
||||
"stream_subgraphs": true,
|
||||
"config": {
|
||||
"recursion_limit": 1000
|
||||
},
|
||||
"context": ${CONTEXT}
|
||||
}
|
||||
ENDJSON
|
||||
)
|
||||
|
||||
# --- Stream the run and extract final response ---
|
||||
# We collect the full SSE output, then parse the last values event to get the AI response.
|
||||
TMPFILE=$(mktemp)
|
||||
trap "rm -f '$TMPFILE'" EXIT
|
||||
|
||||
curl -s -N -X POST "${LANGGRAPH_URL}/threads/${THREAD_ID}/runs/stream" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$BODY" > "$TMPFILE"
|
||||
|
||||
# Parse the SSE output: extract the last "event: values" data block and get the final AI message
|
||||
python3 - "$TMPFILE" "$GATEWAY_URL" "$THREAD_ID" << 'PYEOF'
|
||||
import json
|
||||
import sys
|
||||
|
||||
sse_file = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
gateway_url = sys.argv[2].rstrip("/") if len(sys.argv) > 2 else "http://localhost:2026"
|
||||
thread_id = sys.argv[3] if len(sys.argv) > 3 else ""
|
||||
if not sse_file:
|
||||
sys.exit(1)
|
||||
|
||||
with open(sse_file, "r") as f:
|
||||
raw = f.read()
|
||||
|
||||
# Parse SSE events
|
||||
events = []
|
||||
current_event = None
|
||||
current_data_lines = []
|
||||
|
||||
for line in raw.split("\n"):
|
||||
if line.startswith("event:"):
|
||||
if current_event and current_data_lines:
|
||||
events.append((current_event, "\n".join(current_data_lines)))
|
||||
current_event = line[len("event:"):].strip()
|
||||
current_data_lines = []
|
||||
elif line.startswith("data:"):
|
||||
current_data_lines.append(line[len("data:"):].strip())
|
||||
elif line == "" and current_event:
|
||||
if current_data_lines:
|
||||
events.append((current_event, "\n".join(current_data_lines)))
|
||||
current_event = None
|
||||
current_data_lines = []
|
||||
|
||||
# Flush remaining
|
||||
if current_event and current_data_lines:
|
||||
events.append((current_event, "\n".join(current_data_lines)))
|
||||
|
||||
import posixpath
|
||||
|
||||
def extract_response_text(messages):
|
||||
"""Mirror manager.py _extract_response_text: handles ask_clarification interrupt + regular AI."""
|
||||
for msg in reversed(messages):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg_type = msg.get("type")
|
||||
# ask_clarification interrupt: tool message with name ask_clarification
|
||||
if msg_type == "tool" and msg.get("name") == "ask_clarification":
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str) and content:
|
||||
return content
|
||||
# Regular AI message
|
||||
if msg_type == "ai":
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str) and content:
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
parts.append(block.get("text", ""))
|
||||
elif isinstance(block, str):
|
||||
parts.append(block)
|
||||
text = "".join(parts)
|
||||
if text:
|
||||
return text
|
||||
return ""
|
||||
|
||||
def extract_artifacts(messages):
|
||||
"""Mirror manager.py _extract_artifacts: only artifacts from the last response cycle."""
|
||||
artifacts = []
|
||||
for msg in reversed(messages):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if msg.get("type") == "human":
|
||||
break
|
||||
if msg.get("type") == "ai":
|
||||
for tc in msg.get("tool_calls", []):
|
||||
if isinstance(tc, dict) and tc.get("name") == "present_files":
|
||||
paths = tc.get("args", {}).get("filepaths", [])
|
||||
if isinstance(paths, list):
|
||||
artifacts.extend(p for p in paths if isinstance(p, str))
|
||||
return artifacts
|
||||
|
||||
def artifact_url(virtual_path):
|
||||
# virtual_path like /mnt/user-data/outputs/file.md
|
||||
# API endpoint: {gateway}/api/threads/{thread_id}/artifacts/{path without leading slash}
|
||||
path = virtual_path.lstrip("/")
|
||||
return f"{gateway_url}/api/threads/{thread_id}/artifacts/{path}"
|
||||
|
||||
def format_artifact_text(artifacts):
|
||||
urls = [artifact_url(p) for p in artifacts]
|
||||
if len(urls) == 1:
|
||||
return f"Created File: {urls[0]}"
|
||||
return "Created Files:\n" + "\n".join(urls)
|
||||
|
||||
# Find the last "values" event with messages
|
||||
result_messages = None
|
||||
for event_type, data_str in reversed(events):
|
||||
if event_type != "values":
|
||||
continue
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if "messages" in data:
|
||||
result_messages = data["messages"]
|
||||
break
|
||||
|
||||
if result_messages is not None:
|
||||
response_text = extract_response_text(result_messages)
|
||||
artifacts = extract_artifacts(result_messages)
|
||||
if artifacts:
|
||||
artifact_text = format_artifact_text(artifacts)
|
||||
response_text = (response_text + "\n\n" + artifact_text) if response_text else artifact_text
|
||||
if response_text:
|
||||
print(response_text)
|
||||
else:
|
||||
print("(No response from agent)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Check for error events
|
||||
for event_type, data_str in events:
|
||||
if event_type == "error":
|
||||
print(f"ERROR from DeerFlow: {data_str}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print("No AI response found in the stream.", file=sys.stderr)
|
||||
if len(raw) < 2000:
|
||||
print(f"Raw SSE output:\n{raw}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
PYEOF
|
||||
|
||||
echo ""
|
||||
echo "---"
|
||||
echo "Thread ID: ${THREAD_ID}" >&2
|
||||
98
deer-flow/skills/public/claude-to-deerflow/scripts/status.sh
Executable file
98
deer-flow/skills/public/claude-to-deerflow/scripts/status.sh
Executable file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env bash
|
||||
# status.sh — Check DeerFlow status and list available resources.
|
||||
#
|
||||
# Usage:
|
||||
# bash status.sh # health + summary
|
||||
# bash status.sh models # list models
|
||||
# bash status.sh skills # list skills
|
||||
# bash status.sh agents # list agents
|
||||
# bash status.sh threads # list recent threads
|
||||
# bash status.sh memory # show memory
|
||||
# bash status.sh thread <id> # show thread history
|
||||
#
|
||||
# Environment variables:
|
||||
# DEERFLOW_URL — Unified proxy base URL (default: http://localhost:2026)
|
||||
# DEERFLOW_GATEWAY_URL — Gateway API base URL (default: $DEERFLOW_URL)
|
||||
# DEERFLOW_LANGGRAPH_URL — LangGraph API base URL (default: $DEERFLOW_URL/api/langgraph)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DEERFLOW_URL="${DEERFLOW_URL:-http://localhost:2026}"
|
||||
GATEWAY_URL="${DEERFLOW_GATEWAY_URL:-$DEERFLOW_URL}"
|
||||
LANGGRAPH_URL="${DEERFLOW_LANGGRAPH_URL:-$DEERFLOW_URL/api/langgraph}"
|
||||
CMD="${1:-health}"
|
||||
ARG="${2:-}"
|
||||
|
||||
case "$CMD" in
|
||||
health)
|
||||
echo "Checking DeerFlow at ${GATEWAY_URL}..."
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${GATEWAY_URL}/health" 2>/dev/null || echo "000")
|
||||
if [ "$HTTP_CODE" = "000" ]; then
|
||||
echo "UNREACHABLE — DeerFlow is not running at ${GATEWAY_URL}"
|
||||
exit 1
|
||||
elif [ "$HTTP_CODE" -ge 400 ]; then
|
||||
echo "ERROR — Health check returned HTTP ${HTTP_CODE}"
|
||||
exit 1
|
||||
else
|
||||
echo "OK — DeerFlow is running (HTTP ${HTTP_CODE})"
|
||||
fi
|
||||
;;
|
||||
models)
|
||||
curl -s "${GATEWAY_URL}/api/models" | python3 -m json.tool
|
||||
;;
|
||||
skills)
|
||||
curl -s "${GATEWAY_URL}/api/skills" | python3 -m json.tool
|
||||
;;
|
||||
agents)
|
||||
curl -s "${GATEWAY_URL}/api/agents" | python3 -m json.tool
|
||||
;;
|
||||
threads)
|
||||
curl -s -X POST "${LANGGRAPH_URL}/threads/search" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"limit": 20, "sort_by": "updated_at", "sort_order": "desc", "select": ["thread_id", "updated_at", "values"]}' \
|
||||
| python3 -c "
|
||||
import json, sys
|
||||
threads = json.load(sys.stdin)
|
||||
if not threads:
|
||||
print('No threads found.')
|
||||
sys.exit(0)
|
||||
for t in threads:
|
||||
tid = t.get('thread_id', '?')
|
||||
updated = t.get('updated_at', '?')
|
||||
title = (t.get('values') or {}).get('title', '(untitled)')
|
||||
print(f'{tid} {updated} {title}')
|
||||
"
|
||||
;;
|
||||
memory)
|
||||
curl -s "${GATEWAY_URL}/api/memory" | python3 -m json.tool
|
||||
;;
|
||||
thread)
|
||||
if [ -z "$ARG" ]; then
|
||||
echo "Usage: status.sh thread <thread_id>" >&2
|
||||
exit 1
|
||||
fi
|
||||
curl -s "${LANGGRAPH_URL}/threads/${ARG}/history" | python3 -c "
|
||||
import json, sys
|
||||
data = json.load(sys.stdin)
|
||||
if isinstance(data, list):
|
||||
for state in data[:5]:
|
||||
values = state.get('values', {})
|
||||
msgs = values.get('messages', [])
|
||||
for m in msgs[-5:]:
|
||||
role = m.get('type', '?')
|
||||
content = m.get('content', '')
|
||||
if isinstance(content, list):
|
||||
content = ' '.join(p.get('text','') for p in content if isinstance(p, dict))
|
||||
preview = content[:200] if content else '(empty)'
|
||||
print(f'[{role}] {preview}')
|
||||
print('---')
|
||||
else:
|
||||
print(json.dumps(data, indent=2))
|
||||
"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command: ${CMD}" >&2
|
||||
echo "Usage: status.sh [health|models|skills|agents|threads|memory|thread <id>]" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user