Files
DATA 6de0bf9f5b Initial commit: hardened DeerFlow factory
Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection
hardening:

- New deerflow.security package: content_delimiter, html_cleaner,
  sanitizer (8 layers — invisible chars, control chars, symbols, NFC,
  PUA, tag chars, horizontal whitespace collapse with newline/tab
  preservation, length cap)
- New deerflow.community.searx package: web_search, web_fetch,
  image_search backed by a private SearX instance, every external
  string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>>
  delimiters
- All native community web providers (ddg_search, tavily, exa,
  firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail
  stubs that raise NativeWebToolDisabledError at import time, so a
  misconfigured tool.use path fails loud rather than silently falling
  back to unsanitized output
- Native client back-doors (jina_client.py, infoquest_client.py)
  stubbed too
- Native-tool tests quarantined under tests/_disabled_native/
  (collect_ignore_glob via local conftest.py)
- Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve
  newlines and tabs so list/table structure survives
- Hardened runtime config.yaml references only the searx-backed tools
- Factory overlay (backend/) kept in sync with deer-flow tree as a
  reference / source

See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00

117 lines
3.1 KiB
Python

import base64
import os
import time
import requests
def generate_video(
prompt_file: str,
reference_images: list[str],
output_file: str,
aspect_ratio: str = "16:9",
) -> str:
with open(prompt_file, "r", encoding="utf-8") as f:
prompt = f.read()
referenceImages = []
i = 0
json = {
"instances": [{"prompt": prompt}],
}
for reference_image in reference_images:
i += 1
with open(reference_image, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode("utf-8")
referenceImages.append(
{
"image": {"mimeType": "image/jpeg", "bytesBase64Encoded": image_b64},
"referenceType": "asset",
}
)
if i > 0:
json["instances"][0]["referenceImages"] = referenceImages
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
return "GEMINI_API_KEY is not set"
response = requests.post(
"https://generativelanguage.googleapis.com/v1beta/models/veo-3.1-generate-preview:predictLongRunning",
headers={
"x-goog-api-key": api_key,
"Content-Type": "application/json",
},
json=json,
)
json = response.json()
operation_name = json["name"]
while True:
response = requests.get(
f"https://generativelanguage.googleapis.com/v1beta/{operation_name}",
headers={
"x-goog-api-key": api_key,
},
)
json = response.json()
if json.get("done", False):
sample = json["response"]["generateVideoResponse"]["generatedSamples"][0]
url = sample["video"]["uri"]
download(url, output_file)
break
time.sleep(3)
return f"The video has been generated successfully to {output_file}"
def download(url: str, output_file: str):
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
return "GEMINI_API_KEY is not set"
response = requests.get(
url,
headers={
"x-goog-api-key": api_key,
},
)
with open(output_file, "wb") as f:
f.write(response.content)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate videos using Gemini API")
parser.add_argument(
"--prompt-file",
required=True,
help="Absolute path to JSON prompt file",
)
parser.add_argument(
"--reference-images",
nargs="*",
default=[],
help="Absolute paths to reference images (space-separated)",
)
parser.add_argument(
"--output-file",
required=True,
help="Output path for generated image",
)
parser.add_argument(
"--aspect-ratio",
required=False,
default="16:9",
help="Aspect ratio of the generated image",
)
args = parser.parse_args()
try:
print(
generate_video(
args.prompt_file,
args.reference_images,
args.output_file,
args.aspect_ratio,
)
)
except Exception as e:
print(f"Error while generating video: {e}")