Files
deerflow-factory/deer-flow/backend/packages/harness/deerflow/skills/manager.py
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

160 lines
5.5 KiB
Python

"""Utilities for managing custom skills and their history."""
from __future__ import annotations
import json
import re
import tempfile
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from deerflow.config import get_app_config
from deerflow.skills.loader import load_skills
from deerflow.skills.validation import _validate_skill_frontmatter
SKILL_FILE_NAME = "SKILL.md"
HISTORY_FILE_NAME = "HISTORY.jsonl"
HISTORY_DIR_NAME = ".history"
ALLOWED_SUPPORT_SUBDIRS = {"references", "templates", "scripts", "assets"}
_SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
def get_skills_root_dir() -> Path:
return get_app_config().skills.get_skills_path()
def get_public_skills_dir() -> Path:
return get_skills_root_dir() / "public"
def get_custom_skills_dir() -> Path:
path = get_skills_root_dir() / "custom"
path.mkdir(parents=True, exist_ok=True)
return path
def validate_skill_name(name: str) -> str:
normalized = name.strip()
if not _SKILL_NAME_PATTERN.fullmatch(normalized):
raise ValueError("Skill name must be hyphen-case using lowercase letters, digits, and hyphens only.")
if len(normalized) > 64:
raise ValueError("Skill name must be 64 characters or fewer.")
return normalized
def get_custom_skill_dir(name: str) -> Path:
return get_custom_skills_dir() / validate_skill_name(name)
def get_custom_skill_file(name: str) -> Path:
return get_custom_skill_dir(name) / SKILL_FILE_NAME
def get_custom_skill_history_dir() -> Path:
path = get_custom_skills_dir() / HISTORY_DIR_NAME
path.mkdir(parents=True, exist_ok=True)
return path
def get_skill_history_file(name: str) -> Path:
return get_custom_skill_history_dir() / f"{validate_skill_name(name)}.jsonl"
def get_public_skill_dir(name: str) -> Path:
return get_public_skills_dir() / validate_skill_name(name)
def custom_skill_exists(name: str) -> bool:
return get_custom_skill_file(name).exists()
def public_skill_exists(name: str) -> bool:
return (get_public_skill_dir(name) / SKILL_FILE_NAME).exists()
def ensure_custom_skill_is_editable(name: str) -> None:
if custom_skill_exists(name):
return
if public_skill_exists(name):
raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.")
raise FileNotFoundError(f"Custom skill '{name}' not found.")
def ensure_safe_support_path(name: str, relative_path: str) -> Path:
skill_dir = get_custom_skill_dir(name).resolve()
if not relative_path or relative_path.endswith("/"):
raise ValueError("Supporting file path must include a filename.")
relative = Path(relative_path)
if relative.is_absolute():
raise ValueError("Supporting file path must be relative.")
if any(part in {"..", ""} for part in relative.parts):
raise ValueError("Supporting file path must not contain parent-directory traversal.")
top_level = relative.parts[0] if relative.parts else ""
if top_level not in ALLOWED_SUPPORT_SUBDIRS:
raise ValueError(f"Supporting files must live under one of: {', '.join(sorted(ALLOWED_SUPPORT_SUBDIRS))}.")
target = (skill_dir / relative).resolve()
allowed_root = (skill_dir / top_level).resolve()
try:
target.relative_to(allowed_root)
except ValueError as exc:
raise ValueError("Supporting file path must stay within the selected support directory.") from exc
return target
def validate_skill_markdown_content(name: str, content: str) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
temp_skill_dir = Path(tmp_dir) / validate_skill_name(name)
temp_skill_dir.mkdir(parents=True, exist_ok=True)
(temp_skill_dir / SKILL_FILE_NAME).write_text(content, encoding="utf-8")
is_valid, message, parsed_name = _validate_skill_frontmatter(temp_skill_dir)
if not is_valid:
raise ValueError(message)
if parsed_name != name:
raise ValueError(f"Frontmatter name '{parsed_name}' must match requested skill name '{name}'.")
def atomic_write(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False, dir=str(path.parent)) as tmp_file:
tmp_file.write(content)
tmp_path = Path(tmp_file.name)
tmp_path.replace(path)
def append_history(name: str, record: dict[str, Any]) -> None:
history_path = get_skill_history_file(name)
history_path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"ts": datetime.now(UTC).isoformat(),
**record,
}
with history_path.open("a", encoding="utf-8") as f:
f.write(json.dumps(payload, ensure_ascii=False))
f.write("\n")
def read_history(name: str) -> list[dict[str, Any]]:
history_path = get_skill_history_file(name)
if not history_path.exists():
return []
records: list[dict[str, Any]] = []
for line in history_path.read_text(encoding="utf-8").splitlines():
if not line.strip():
continue
records.append(json.loads(line))
return records
def list_custom_skills() -> list:
return [skill for skill in load_skills(enabled_only=False) if skill.category == "custom"]
def read_custom_skill_content(name: str) -> str:
skill_file = get_custom_skill_file(name)
if not skill_file.exists():
raise FileNotFoundError(f"Custom skill '{name}' not found.")
return skill_file.read_text(encoding="utf-8")