Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions
--- a/deer-flow/skills/public/image-generation/scripts/generate.py
+++ b/deer-flow/skills/public/image-generation/scripts/generate.py
@@ -0,0 +1,132 @@
+import base64
+import os
+
+import requests
+from PIL import Image
+
+
+def validate_image(image_path: str) -> bool:
+    """
+    Validate if an image file can be opened and is not corrupted.
+    
+    Args:
+        image_path: Path to the image file
+        
+    Returns:
+        True if the image is valid and can be opened, False otherwise
+    """
+    try:
+        with Image.open(image_path) as img:
+            img.verify()  # Verify that it's a valid image
+        # Re-open to check if it can be fully loaded (verify() may not catch all issues)
+        with Image.open(image_path) as img:
+            img.load()  # Force load the image data
+        return True
+    except Exception as e:
+        print(f"Warning: Image '{image_path}' is invalid or corrupted: {e}")
+        return False
+
+
+def generate_image(
+    prompt_file: str,
+    reference_images: list[str],
+    output_file: str,
+    aspect_ratio: str = "16:9",
+) -> str:
+    with open(prompt_file, "r", encoding="utf-8") as f:
+        prompt = f.read()
+    parts = []
+    i = 0
+    
+    # Filter out invalid reference images
+    valid_reference_images = []
+    for ref_img in reference_images:
+        if validate_image(ref_img):
+            valid_reference_images.append(ref_img)
+        else:
+            print(f"Skipping invalid reference image: {ref_img}")
+    
+    if len(valid_reference_images) < len(reference_images):
+        print(f"Note: {len(reference_images) - len(valid_reference_images)} reference image(s) were skipped due to validation failure.")
+    
+    for reference_image in valid_reference_images:
+        i += 1
+        with open(reference_image, "rb") as f:
+            image_b64 = base64.b64encode(f.read()).decode("utf-8")
+        parts.append(
+            {
+                "inlineData": {
+                    "mimeType": "image/jpeg",
+                    "data": image_b64,
+                }
+            }
+        )
+
+    api_key = os.getenv("GEMINI_API_KEY")
+    if not api_key:
+        return "GEMINI_API_KEY is not set"
+    response = requests.post(
+        "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent",
+        headers={
+            "x-goog-api-key": api_key,
+            "Content-Type": "application/json",
+        },
+        json={
+            "generationConfig": {"imageConfig": {"aspectRatio": aspect_ratio}},
+            "contents": [{"parts": [*parts, {"text": prompt}]}],
+        },
+    )
+    response.raise_for_status()
+    json = response.json()
+    parts: list[dict] = json["candidates"][0]["content"]["parts"]
+    image_parts = [part for part in parts if part.get("inlineData", False)]
+    if len(image_parts) == 1:
+        base64_image = image_parts[0]["inlineData"]["data"]
+        # Save the image to a file
+        with open(output_file, "wb") as f:
+            f.write(base64.b64decode(base64_image))
+        return f"Successfully generated image to {output_file}"
+    else:
+        raise Exception("Failed to generate image")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate images using Gemini API")
+    parser.add_argument(
+        "--prompt-file",
+        required=True,
+        help="Absolute path to JSON prompt file",
+    )
+    parser.add_argument(
+        "--reference-images",
+        nargs="*",
+        default=[],
+        help="Absolute paths to reference images (space-separated)",
+    )
+    parser.add_argument(
+        "--output-file",
+        required=True,
+        help="Output path for generated image",
+    )
+    parser.add_argument(
+        "--aspect-ratio",
+        required=False,
+        default="16:9",
+        help="Aspect ratio of the generated image",
+    )
+
+    args = parser.parse_args()
+
+    try:
+        print(
+            generate_image(
+                args.prompt_file,
+                args.reference_images,
+                args.output_file,
+                args.aspect_ratio,
+            )
+        )
+    except Exception as e:
+        print(f"Error while generating image: {e}")