"""Tests for prompt injection sanitizer.""" import pytest from deerflow.security.sanitizer import PromptInjectionSanitizer class TestPromptInjectionSanitizer: """Test cases based on OpenClaw patterns.""" def test_removes_zero_width_spaces(self): """Zero-width characters are common steganography vectors.""" sanitizer = PromptInjectionSanitizer() text = "Hello\u200bWorld\u200c" # ZWSP and ZWNJ result = sanitizer.sanitize(text) assert "\u200b" not in result assert "\u200c" not in result assert result == "HelloWorld" def test_removes_control_chars(self): """Control chars can disrupt prompt parsing.""" sanitizer = PromptInjectionSanitizer() text = "Hello\x00World\x01Test" result = sanitizer.sanitize(text) assert "\x00" not in result assert "\x01" not in result assert "Hello" in result def test_preserves_newlines_and_tabs(self): """Structural characters should be preserved.""" sanitizer = PromptInjectionSanitizer() text = "Line1\nLine2\tTabbed" result = sanitizer.sanitize(text) assert "\n" in result assert "\t" in result def test_truncates_long_content(self): """Length limiting prevents context overflow.""" sanitizer = PromptInjectionSanitizer() text = "A" * 1000 result = sanitizer.sanitize(text, max_length=100) assert len(result) == 100 assert result.endswith("...") def test_handles_pua_characters(self): """Private Use Area chars can encode hidden data.""" sanitizer = PromptInjectionSanitizer() text = "Hello\uE000World" # PUA start result = sanitizer.sanitize(text) assert "\uE000" not in result class TestContentDelimiter: """Test delimiter wrapping.""" def test_wraps_dict_content(self): from deerflow.security.content_delimiter import wrap_untrusted_content content = {"title": "Test", "url": "http://example.com"} result = wrap_untrusted_content(content) assert "<<>>" in result assert "<<>>" in result assert "Test" in result def test_wraps_string_content(self): from deerflow.security.content_delimiter import wrap_untrusted_content content = "Raw text from web" result = wrap_untrusted_content(content) assert "<<>>" in result assert "Raw text from web" in result