deerflow-factory/backend/tests/test_security_sanitizer.py

"""Tests for prompt injection sanitizer."""

import pytest
from deerflow.security.sanitizer import PromptInjectionSanitizer


class TestPromptInjectionSanitizer:
    """Test cases based on OpenClaw patterns."""

    def test_removes_zero_width_spaces(self):
        """Zero-width characters are common steganography vectors."""
        sanitizer = PromptInjectionSanitizer()
        text = "Hello\u200bWorld\u200c"  # ZWSP and ZWNJ
        result = sanitizer.sanitize(text)
        assert "\u200b" not in result
        assert "\u200c" not in result
        assert result == "HelloWorld"

    def test_removes_control_chars(self):
        """Control chars can disrupt prompt parsing."""
        sanitizer = PromptInjectionSanitizer()
        text = "Hello\x00World\x01Test"
        result = sanitizer.sanitize(text)
        assert "\x00" not in result
        assert "\x01" not in result
        assert "Hello" in result

    def test_preserves_newlines_and_tabs(self):
        """Structural characters should be preserved."""
        sanitizer = PromptInjectionSanitizer()
        text = "Line1\nLine2\tTabbed"
        result = sanitizer.sanitize(text)
        assert "\n" in result
        assert "\t" in result

    def test_truncates_long_content(self):
        """Length limiting prevents context overflow."""
        sanitizer = PromptInjectionSanitizer()
        text = "A" * 1000
        result = sanitizer.sanitize(text, max_length=100)
        assert len(result) == 100
        assert result.endswith("...")

    def test_handles_pua_characters(self):
        """Private Use Area chars can encode hidden data."""
        sanitizer = PromptInjectionSanitizer()
        text = "Hello\uE000World"  # PUA start
        result = sanitizer.sanitize(text)
        assert "\uE000" not in result


class TestContentDelimiter:
    """Test delimiter wrapping."""

    def test_wraps_dict_content(self):
        from deerflow.security.content_delimiter import wrap_untrusted_content

        content = {"title": "Test", "url": "http://example.com"}
        result = wrap_untrusted_content(content)

        assert "<<<EXTERNAL_UNTRUSTED_CONTENT>>>" in result
        assert "<<<END_EXTERNAL_UNTRUSTED_CONTENT>>>" in result
        assert "Test" in result

    def test_wraps_string_content(self):
        from deerflow.security.content_delimiter import wrap_untrusted_content

        content = "Raw text from web"
        result = wrap_untrusted_content(content)

        assert "<<<EXTERNAL_UNTRUSTED_CONTENT>>>" in result
        assert "Raw text from web" in result