Initial commit: hardened DeerFlow factory

Vendored deer-flow upstream (bytedance/deer-flow) plus prompt-injection hardening: - New deerflow.security package: content_delimiter, html_cleaner, sanitizer (8 layers — invisible chars, control chars, symbols, NFC, PUA, tag chars, horizontal whitespace collapse with newline/tab preservation, length cap) - New deerflow.community.searx package: web_search, web_fetch, image_search backed by a private SearX instance, every external string sanitized and wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> delimiters - All native community web providers (ddg_search, tavily, exa, firecrawl, jina_ai, infoquest, image_search) replaced with hard-fail stubs that raise NativeWebToolDisabledError at import time, so a misconfigured tool.use path fails loud rather than silently falling back to unsanitized output - Native client back-doors (jina_client.py, infoquest_client.py) stubbed too - Native-tool tests quarantined under tests/_disabled_native/ (collect_ignore_glob via local conftest.py) - Sanitizer Layer 7 fix: only collapse horizontal whitespace, preserve newlines and tabs so list/table structure survives - Hardened runtime config.yaml references only the searx-backed tools - Factory overlay (backend/) kept in sync with deer-flow tree as a reference / source See HARDENING.md for the full audit trail and verification steps.
2026-04-12 14:23:57 +02:00
commit 6de0bf9f5b
889 changed files with 173052 additions and 0 deletions
--- a/deer-flow/backend/tests/test_client_e2e.py
+++ b/deer-flow/backend/tests/test_client_e2e.py
@@ -0,0 +1,769 @@
+"""End-to-end tests for DeerFlowClient.
+
+Middle tier of the test pyramid:
+- Top:    test_client_live.py  — real LLM, needs API key
+- Middle: test_client_e2e.py   — real LLM + real modules  ← THIS FILE
+- Bottom: test_client.py       — unit tests, mock everything
+
+Core principle: use the real LLM from config.yaml, let config, middleware
+chain, tool registration, file I/O, and event serialization all run for real.
+Only DEER_FLOW_HOME is redirected to tmp_path for filesystem isolation.
+
+Tests that call the LLM are marked ``requires_llm`` and skipped in CI.
+File-management tests (upload/list/delete) don't need LLM and run everywhere.
+"""
+
+import json
+import os
+import uuid
+import zipfile
+
+import pytest
+from dotenv import load_dotenv
+
+from deerflow.client import DeerFlowClient, StreamEvent
+from deerflow.config.app_config import AppConfig
+from deerflow.config.model_config import ModelConfig
+from deerflow.config.sandbox_config import SandboxConfig
+
+# Load .env from project root (for OPENAI_API_KEY etc.)
+load_dotenv(os.path.join(os.path.dirname(__file__), "../../.env"))
+
+# ---------------------------------------------------------------------------
+# Markers
+# ---------------------------------------------------------------------------
+
+requires_llm = pytest.mark.skipif(
+    os.getenv("CI", "").lower() in ("true", "1") or not os.getenv("OPENAI_API_KEY"),
+    reason="Requires LLM API key — skipped in CI or when OPENAI_API_KEY is unset",
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_e2e_config() -> AppConfig:
+    """Build a minimal AppConfig using real LLM credentials from environment.
+
+    All LLM connection details come from environment variables so that both
+    internal CI and external contributors can run the tests:
+
+    - ``E2E_MODEL_NAME``  (default: ``volcengine-ark``)
+    - ``E2E_MODEL_USE``   (default: ``langchain_openai:ChatOpenAI``)
+    - ``E2E_MODEL_ID``    (default: ``ep-20251211175242-llcmh``)
+    - ``E2E_BASE_URL``    (default: ``https://ark-cn-beijing.bytedance.net/api/v3``)
+    - ``OPENAI_API_KEY``  (required for LLM tests)
+    """
+    return AppConfig(
+        models=[
+            ModelConfig(
+                name=os.getenv("E2E_MODEL_NAME", "volcengine-ark"),
+                display_name="E2E Test Model",
+                use=os.getenv("E2E_MODEL_USE", "langchain_openai:ChatOpenAI"),
+                model=os.getenv("E2E_MODEL_ID", "ep-20251211175242-llcmh"),
+                base_url=os.getenv("E2E_BASE_URL", "https://ark-cn-beijing.bytedance.net/api/v3"),
+                api_key=os.getenv("OPENAI_API_KEY", ""),
+                max_tokens=512,
+                temperature=0.7,
+                supports_thinking=False,
+                supports_reasoning_effort=False,
+                supports_vision=False,
+            )
+        ],
+        sandbox=SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", allow_host_bash=True),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def e2e_env(tmp_path, monkeypatch):
+    """Isolated filesystem environment for E2E tests.
+
+    - DEER_FLOW_HOME → tmp_path (all thread data lands in a temp dir)
+    - Singletons reset so they pick up the new env
+    - Title/memory/summarization disabled to avoid extra LLM calls
+    - AppConfig built programmatically (avoids config.yaml param-name issues)
+    """
+    # 1. Filesystem isolation
+    monkeypatch.setenv("DEER_FLOW_HOME", str(tmp_path))
+    monkeypatch.setattr("deerflow.config.paths._paths", None)
+    monkeypatch.setattr("deerflow.sandbox.sandbox_provider._default_sandbox_provider", None)
+
+    # 2. Inject a clean AppConfig via the global singleton.
+    config = _make_e2e_config()
+    monkeypatch.setattr("deerflow.config.app_config._app_config", config)
+    monkeypatch.setattr("deerflow.config.app_config._app_config_is_custom", True)
+
+    # 3. Disable title generation (extra LLM call, non-deterministic)
+    from deerflow.config.title_config import TitleConfig
+
+    monkeypatch.setattr("deerflow.config.title_config._title_config", TitleConfig(enabled=False))
+
+    # 4. Disable memory queueing (avoids background threads & file writes)
+    from deerflow.config.memory_config import MemoryConfig
+
+    monkeypatch.setattr(
+        "deerflow.agents.middlewares.memory_middleware.get_memory_config",
+        lambda: MemoryConfig(enabled=False),
+    )
+
+    # 5. Ensure summarization is off (default, but be explicit)
+    from deerflow.config.summarization_config import SummarizationConfig
+
+    monkeypatch.setattr("deerflow.config.summarization_config._summarization_config", SummarizationConfig(enabled=False))
+
+    # 6. Exclude TitleMiddleware from the chain.
+    #    It triggers an extra LLM call to generate a thread title, which adds
+    #    non-determinism and cost to E2E tests (title generation is already
+    #    disabled via TitleConfig above, but the middleware still participates
+    #    in the chain and can interfere with event ordering).
+    from deerflow.agents.lead_agent.agent import _build_middlewares as _original_build_middlewares
+    from deerflow.agents.middlewares.title_middleware import TitleMiddleware
+
+    def _sync_safe_build_middlewares(*args, **kwargs):
+        mws = _original_build_middlewares(*args, **kwargs)
+        return [m for m in mws if not isinstance(m, TitleMiddleware)]
+
+    monkeypatch.setattr("deerflow.client._build_middlewares", _sync_safe_build_middlewares)
+
+    return {"tmp_path": tmp_path}
+
+
+@pytest.fixture()
+def client(e2e_env):
+    """A DeerFlowClient wired to the isolated e2e_env."""
+    return DeerFlowClient(checkpointer=None, thinking_enabled=False)
+
+
+# ---------------------------------------------------------------------------
+# Step 2: Basic streaming (requires LLM)
+# ---------------------------------------------------------------------------
+
+
+class TestBasicChat:
+    """Basic chat and streaming behavior with real LLM."""
+
+    @requires_llm
+    def test_basic_chat(self, client):
+        """chat() returns a non-empty text response."""
+        result = client.chat("Say exactly: pong")
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    @requires_llm
+    def test_stream_event_sequence(self, client):
+        """stream() yields events: messages-tuple, values, and end."""
+        events = list(client.stream("Say hi"))
+
+        types = [e.type for e in events]
+        assert types[-1] == "end"
+        assert "messages-tuple" in types
+        assert "values" in types
+
+    @requires_llm
+    def test_stream_event_data_format(self, client):
+        """Each event type has the expected data structure."""
+        events = list(client.stream("Say hello"))
+
+        for event in events:
+            assert isinstance(event, StreamEvent)
+            assert isinstance(event.type, str)
+            assert isinstance(event.data, dict)
+
+            if event.type == "messages-tuple" and event.data.get("type") == "ai":
+                assert "content" in event.data
+                assert "id" in event.data
+            elif event.type == "values":
+                assert "messages" in event.data
+                assert "artifacts" in event.data
+            elif event.type == "end":
+                # end event may contain usage stats after token tracking was added
+                assert isinstance(event.data, dict)
+
+    @requires_llm
+    def test_multi_turn_stateless(self, client):
+        """Without checkpointer, two calls to the same thread_id are independent."""
+        tid = str(uuid.uuid4())
+
+        r1 = client.chat("Remember the number 42", thread_id=tid)
+        # Reset so agent is recreated (simulates no cross-turn state)
+        client.reset_agent()
+        r2 = client.chat("What number did I say?", thread_id=tid)
+
+        # Without a checkpointer the second call has no memory of the first.
+        # We can't assert exact content, but both should be non-empty.
+        assert isinstance(r1, str) and len(r1) > 0
+        assert isinstance(r2, str) and len(r2) > 0
+
+
+# ---------------------------------------------------------------------------
+# Step 3: Tool call flow (requires LLM)
+# ---------------------------------------------------------------------------
+
+
+class TestToolCallFlow:
+    """Verify the LLM actually invokes tools through the real agent pipeline."""
+
+    @requires_llm
+    def test_tool_call_produces_events(self, client):
+        """When the LLM decides to use a tool, we see tool call + result events."""
+        # Give a clear instruction that forces a tool call
+        events = list(client.stream("Use the bash tool to run: echo hello_e2e_test"))
+
+        types = [e.type for e in events]
+        assert types[-1] == "end"
+
+        # Should have at least one tool call event
+        tool_call_events = [e for e in events if e.type == "messages-tuple" and e.data.get("tool_calls")]
+        tool_result_events = [e for e in events if e.type == "messages-tuple" and e.data.get("type") == "tool"]
+        assert len(tool_call_events) >= 1, "Expected at least one tool_call event"
+        assert len(tool_result_events) >= 1, "Expected at least one tool result event"
+
+    @requires_llm
+    def test_tool_call_event_structure(self, client):
+        """Tool call events contain name, args, and id fields."""
+        events = list(client.stream("Use the read_file tool to read /mnt/user-data/workspace/nonexistent.txt"))
+
+        tc_events = [e for e in events if e.type == "messages-tuple" and e.data.get("tool_calls")]
+        if tc_events:
+            tc = tc_events[0].data["tool_calls"][0]
+            assert "name" in tc
+            assert "args" in tc
+            assert "id" in tc
+
+
+# ---------------------------------------------------------------------------
+# Step 4: File upload integration (no LLM needed for most)
+# ---------------------------------------------------------------------------
+
+
+class TestFileUploadIntegration:
+    """Upload, list, and delete files through the real client path."""
+
+    def test_upload_files(self, e2e_env, tmp_path):
+        """upload_files() copies files and returns metadata."""
+        test_file = tmp_path / "source" / "readme.txt"
+        test_file.parent.mkdir(parents=True, exist_ok=True)
+        test_file.write_text("Hello world")
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+
+        result = c.upload_files(tid, [test_file])
+        assert result["success"] is True
+        assert len(result["files"]) == 1
+        assert result["files"][0]["filename"] == "readme.txt"
+
+        # Physically exists
+        from deerflow.config.paths import get_paths
+
+        assert (get_paths().sandbox_uploads_dir(tid) / "readme.txt").exists()
+
+    def test_upload_duplicate_rename(self, e2e_env, tmp_path):
+        """Uploading two files with the same name auto-renames the second."""
+        d1 = tmp_path / "dir1"
+        d2 = tmp_path / "dir2"
+        d1.mkdir()
+        d2.mkdir()
+        (d1 / "data.txt").write_text("content A")
+        (d2 / "data.txt").write_text("content B")
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+
+        result = c.upload_files(tid, [d1 / "data.txt", d2 / "data.txt"])
+        assert result["success"] is True
+        assert len(result["files"]) == 2
+
+        filenames = {f["filename"] for f in result["files"]}
+        assert "data.txt" in filenames
+        assert "data_1.txt" in filenames
+
+    def test_upload_list_and_delete(self, e2e_env, tmp_path):
+        """Upload → list → delete → list lifecycle."""
+        test_file = tmp_path / "lifecycle.txt"
+        test_file.write_text("lifecycle test")
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+
+        c.upload_files(tid, [test_file])
+
+        listing = c.list_uploads(tid)
+        assert listing["count"] == 1
+        assert listing["files"][0]["filename"] == "lifecycle.txt"
+
+        del_result = c.delete_upload(tid, "lifecycle.txt")
+        assert del_result["success"] is True
+
+        listing = c.list_uploads(tid)
+        assert listing["count"] == 0
+
+    @requires_llm
+    def test_upload_then_chat(self, e2e_env, tmp_path):
+        """Upload a file then ask the LLM about it — UploadsMiddleware injects file info."""
+        test_file = tmp_path / "source" / "notes.txt"
+        test_file.parent.mkdir(parents=True, exist_ok=True)
+        test_file.write_text("The secret code is 7749.")
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+
+        c.upload_files(tid, [test_file])
+        # Chat — the middleware should inject <uploaded_files> context
+        response = c.chat("What files are available?", thread_id=tid)
+        assert isinstance(response, str) and len(response) > 0
+
+
+# ---------------------------------------------------------------------------
+# Step 5: Lifecycle and configuration (no LLM needed)
+# ---------------------------------------------------------------------------
+
+
+class TestLifecycleAndConfig:
+    """Agent recreation and configuration behavior."""
+
+    @requires_llm
+    def test_agent_recreation_on_config_change(self, client):
+        """Changing thinking_enabled triggers agent recreation (different config key)."""
+        list(client.stream("hi"))
+        key1 = client._agent_config_key
+
+        # Stream with a different config override
+        client.reset_agent()
+        list(client.stream("hi", thinking_enabled=True))
+        key2 = client._agent_config_key
+
+        # thinking_enabled changed: False → True → keys differ
+        assert key1 != key2
+
+    def test_reset_agent_clears_state(self, e2e_env):
+        """reset_agent() sets the internal agent to None."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        # Before any call, agent is None
+        assert c._agent is None
+
+        c.reset_agent()
+        assert c._agent is None
+        assert c._agent_config_key is None
+
+    def test_plan_mode_config_key(self, e2e_env):
+        """plan_mode is part of the config key tuple."""
+        c = DeerFlowClient(checkpointer=None, plan_mode=False)
+        cfg1 = c._get_runnable_config("test-thread")
+        key1 = (
+            cfg1["configurable"]["model_name"],
+            cfg1["configurable"]["thinking_enabled"],
+            cfg1["configurable"]["is_plan_mode"],
+            cfg1["configurable"]["subagent_enabled"],
+        )
+
+        c2 = DeerFlowClient(checkpointer=None, plan_mode=True)
+        cfg2 = c2._get_runnable_config("test-thread")
+        key2 = (
+            cfg2["configurable"]["model_name"],
+            cfg2["configurable"]["thinking_enabled"],
+            cfg2["configurable"]["is_plan_mode"],
+            cfg2["configurable"]["subagent_enabled"],
+        )
+
+        assert key1 != key2
+        assert key1[2] is False
+        assert key2[2] is True
+
+
+# ---------------------------------------------------------------------------
+# Step 6: Middleware chain verification (requires LLM)
+# ---------------------------------------------------------------------------
+
+
+class TestMiddlewareChain:
+    """Verify middleware side effects through real execution."""
+
+    @requires_llm
+    def test_thread_data_paths_in_state(self, client):
+        """After streaming, thread directory paths are computed correctly."""
+        tid = str(uuid.uuid4())
+        events = list(client.stream("hi", thread_id=tid))
+
+        # The values event should contain messages
+        values_events = [e for e in events if e.type == "values"]
+        assert len(values_events) >= 1
+
+        # ThreadDataMiddleware should have set paths in the state.
+        # We verify the paths singleton can resolve the thread dir.
+        from deerflow.config.paths import get_paths
+
+        thread_dir = get_paths().thread_dir(tid)
+        assert str(thread_dir).endswith(tid)
+
+    @requires_llm
+    def test_stream_completes_without_middleware_errors(self, client):
+        """Full middleware chain (ThreadData, Uploads, Sandbox, DanglingToolCall,
+        Memory, Clarification) executes without errors."""
+        events = list(client.stream("What is 1+1?"))
+
+        types = [e.type for e in events]
+        assert types[-1] == "end"
+        # Should have at least one AI response
+        ai_events = [e for e in events if e.type == "messages-tuple" and e.data.get("type") == "ai"]
+        assert len(ai_events) >= 1
+
+
+# ---------------------------------------------------------------------------
+# Step 7: Error and boundary conditions
+# ---------------------------------------------------------------------------
+
+
+class TestErrorAndBoundary:
+    """Error propagation and edge cases."""
+
+    def test_upload_nonexistent_file_raises(self, e2e_env):
+        """Uploading a file that doesn't exist raises FileNotFoundError."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(FileNotFoundError):
+            c.upload_files("test-thread", ["/nonexistent/file.txt"])
+
+    def test_delete_nonexistent_upload_raises(self, e2e_env):
+        """Deleting a file that doesn't exist raises FileNotFoundError."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+        # Ensure the uploads dir exists first
+        c.list_uploads(tid)
+        with pytest.raises(FileNotFoundError):
+            c.delete_upload(tid, "ghost.txt")
+
+    def test_artifact_path_traversal_blocked(self, e2e_env):
+        """get_artifact blocks path traversal attempts."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(ValueError):
+            c.get_artifact("test-thread", "../../etc/passwd")
+
+    def test_upload_directory_rejected(self, e2e_env, tmp_path):
+        """Uploading a directory (not a file) is rejected."""
+        d = tmp_path / "a_directory"
+        d.mkdir()
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(ValueError, match="not a file"):
+            c.upload_files("test-thread", [d])
+
+    @requires_llm
+    def test_empty_message_still_gets_response(self, client):
+        """Even an empty-ish message should produce a valid event stream."""
+        events = list(client.stream(" "))
+        types = [e.type for e in events]
+        assert types[-1] == "end"
+
+
+# ---------------------------------------------------------------------------
+# Step 8: Artifact access (no LLM needed)
+# ---------------------------------------------------------------------------
+
+
+class TestArtifactAccess:
+    """Read artifacts through get_artifact() with real filesystem."""
+
+    def test_get_artifact_happy_path(self, e2e_env):
+        """Write a file to outputs, then read it back via get_artifact()."""
+        from deerflow.config.paths import get_paths
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+
+        # Create an output file in the thread's outputs directory
+        outputs_dir = get_paths().sandbox_outputs_dir(tid)
+        outputs_dir.mkdir(parents=True, exist_ok=True)
+        (outputs_dir / "result.txt").write_text("hello artifact")
+
+        data, mime = c.get_artifact(tid, "mnt/user-data/outputs/result.txt")
+        assert data == b"hello artifact"
+        assert "text" in mime
+
+    def test_get_artifact_nested_path(self, e2e_env):
+        """Artifacts in subdirectories are accessible."""
+        from deerflow.config.paths import get_paths
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        tid = str(uuid.uuid4())
+
+        outputs_dir = get_paths().sandbox_outputs_dir(tid)
+        sub = outputs_dir / "charts"
+        sub.mkdir(parents=True, exist_ok=True)
+        (sub / "data.json").write_text('{"x": 1}')
+
+        data, mime = c.get_artifact(tid, "mnt/user-data/outputs/charts/data.json")
+        assert b'"x"' in data
+        assert "json" in mime
+
+    def test_get_artifact_nonexistent_raises(self, e2e_env):
+        """Reading a nonexistent artifact raises FileNotFoundError."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(FileNotFoundError):
+            c.get_artifact("test-thread", "mnt/user-data/outputs/ghost.txt")
+
+    def test_get_artifact_traversal_within_prefix_blocked(self, e2e_env):
+        """Path traversal within the valid prefix is still blocked."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises((PermissionError, ValueError, FileNotFoundError)):
+            c.get_artifact("test-thread", "mnt/user-data/outputs/../../etc/passwd")
+
+
+# ---------------------------------------------------------------------------
+# Step 9: Skill installation (no LLM needed)
+# ---------------------------------------------------------------------------
+
+
+class TestSkillInstallation:
+    """install_skill() with real ZIP handling and filesystem."""
+
+    @pytest.fixture(autouse=True)
+    def _isolate_skills_dir(self, tmp_path, monkeypatch):
+        """Redirect skill installation to a temp directory."""
+        skills_root = tmp_path / "skills"
+        (skills_root / "public").mkdir(parents=True)
+        (skills_root / "custom").mkdir(parents=True)
+        monkeypatch.setattr(
+            "deerflow.skills.installer.get_skills_root_path",
+            lambda: skills_root,
+        )
+        self._skills_root = skills_root
+
+    @staticmethod
+    def _make_skill_zip(tmp_path, skill_name="test-e2e-skill"):
+        """Create a minimal valid .skill archive."""
+        skill_dir = tmp_path / "build" / skill_name
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(f"---\nname: {skill_name}\ndescription: E2E test skill\n---\n\nTest content.\n")
+        archive_path = tmp_path / f"{skill_name}.skill"
+        with zipfile.ZipFile(archive_path, "w") as zf:
+            for file in skill_dir.rglob("*"):
+                zf.write(file, file.relative_to(tmp_path / "build"))
+        return archive_path
+
+    def test_install_skill_success(self, e2e_env, tmp_path):
+        """A valid .skill archive installs to the custom skills directory."""
+        archive = self._make_skill_zip(tmp_path)
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+
+        result = c.install_skill(archive)
+        assert result["success"] is True
+        assert result["skill_name"] == "test-e2e-skill"
+        assert (self._skills_root / "custom" / "test-e2e-skill" / "SKILL.md").exists()
+
+    def test_install_skill_duplicate_rejected(self, e2e_env, tmp_path):
+        """Installing the same skill twice raises ValueError."""
+        archive = self._make_skill_zip(tmp_path)
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+
+        c.install_skill(archive)
+        with pytest.raises(ValueError, match="already exists"):
+            c.install_skill(archive)
+
+    def test_install_skill_invalid_extension(self, e2e_env, tmp_path):
+        """A file without .skill extension is rejected."""
+        bad_file = tmp_path / "not_a_skill.zip"
+        bad_file.write_bytes(b"PK\x03\x04")  # ZIP magic bytes
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(ValueError, match=".skill extension"):
+            c.install_skill(bad_file)
+
+    def test_install_skill_missing_frontmatter(self, e2e_env, tmp_path):
+        """A .skill archive without valid SKILL.md frontmatter is rejected."""
+        skill_dir = tmp_path / "build" / "bad-skill"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text("No frontmatter here.")
+
+        archive = tmp_path / "bad-skill.skill"
+        with zipfile.ZipFile(archive, "w") as zf:
+            for file in skill_dir.rglob("*"):
+                zf.write(file, file.relative_to(tmp_path / "build"))
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(ValueError, match="Invalid skill"):
+            c.install_skill(archive)
+
+    def test_install_skill_nonexistent_file(self, e2e_env):
+        """Installing from a nonexistent path raises FileNotFoundError."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(FileNotFoundError):
+            c.install_skill("/nonexistent/skill.skill")
+
+
+# ---------------------------------------------------------------------------
+# Step 10: Configuration management (no LLM needed)
+# ---------------------------------------------------------------------------
+
+
+class TestConfigManagement:
+    """Config queries and updates through real code paths."""
+
+    def test_list_models_returns_injected_config(self, e2e_env):
+        """list_models() returns the model from the injected AppConfig."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.list_models()
+        assert "models" in result
+        assert len(result["models"]) == 1
+        assert result["models"][0]["name"] == "volcengine-ark"
+        assert result["models"][0]["display_name"] == "E2E Test Model"
+
+    def test_get_model_found(self, e2e_env):
+        """get_model() returns the model when it exists."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        model = c.get_model("volcengine-ark")
+        assert model is not None
+        assert model["name"] == "volcengine-ark"
+        assert model["supports_thinking"] is False
+
+    def test_get_model_not_found(self, e2e_env):
+        """get_model() returns None for nonexistent model."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        assert c.get_model("nonexistent-model") is None
+
+    def test_list_skills_returns_list(self, e2e_env):
+        """list_skills() returns a dict with 'skills' key from real directory scan."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.list_skills()
+        assert "skills" in result
+        assert isinstance(result["skills"], list)
+        # The real skills/ directory should have some public skills
+        assert len(result["skills"]) > 0
+
+    def test_get_skill_found(self, e2e_env):
+        """get_skill() returns skill info for a known public skill."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        # 'deep-research' is a built-in public skill
+        skill = c.get_skill("deep-research")
+        if skill is not None:
+            assert skill["name"] == "deep-research"
+            assert "description" in skill
+            assert "enabled" in skill
+
+    def test_get_skill_not_found(self, e2e_env):
+        """get_skill() returns None for nonexistent skill."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        assert c.get_skill("nonexistent-skill-xyz") is None
+
+    def test_get_mcp_config_returns_dict(self, e2e_env):
+        """get_mcp_config() returns a dict with 'mcp_servers' key."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.get_mcp_config()
+        assert "mcp_servers" in result
+        assert isinstance(result["mcp_servers"], dict)
+
+    def test_update_mcp_config_writes_and_invalidates(self, e2e_env, tmp_path, monkeypatch):
+        """update_mcp_config() writes extensions_config.json and invalidates the agent."""
+        # Set up a writable extensions_config.json
+        config_file = tmp_path / "extensions_config.json"
+        config_file.write_text(json.dumps({"mcpServers": {}, "skills": {}}))
+        monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(config_file))
+
+        # Force reload so the singleton picks up our test file
+        from deerflow.config.extensions_config import reload_extensions_config
+
+        reload_extensions_config()
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        # Simulate a cached agent
+        c._agent = "fake-agent-placeholder"
+        c._agent_config_key = ("a", "b", "c", "d")
+
+        result = c.update_mcp_config({"test-server": {"enabled": True, "type": "stdio", "command": "echo"}})
+        assert "mcp_servers" in result
+
+        # Agent should be invalidated
+        assert c._agent is None
+        assert c._agent_config_key is None
+
+        # File should be written
+        written = json.loads(config_file.read_text())
+        assert "test-server" in written["mcpServers"]
+
+    def test_update_skill_writes_and_invalidates(self, e2e_env, tmp_path, monkeypatch):
+        """update_skill() writes extensions_config.json and invalidates the agent."""
+        config_file = tmp_path / "extensions_config.json"
+        config_file.write_text(json.dumps({"mcpServers": {}, "skills": {}}))
+        monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(config_file))
+
+        from deerflow.config.extensions_config import reload_extensions_config
+
+        reload_extensions_config()
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        c._agent = "fake-agent-placeholder"
+        c._agent_config_key = ("a", "b", "c", "d")
+
+        # Use a real skill name from the public skills directory
+        skills = c.list_skills()
+        if not skills["skills"]:
+            pytest.skip("No skills available for testing")
+        skill_name = skills["skills"][0]["name"]
+
+        result = c.update_skill(skill_name, enabled=False)
+        assert result["name"] == skill_name
+        assert result["enabled"] is False
+
+        # Agent should be invalidated
+        assert c._agent is None
+        assert c._agent_config_key is None
+
+    def test_update_skill_nonexistent_raises(self, e2e_env, tmp_path, monkeypatch):
+        """update_skill() raises ValueError for nonexistent skill."""
+        config_file = tmp_path / "extensions_config.json"
+        config_file.write_text(json.dumps({"mcpServers": {}, "skills": {}}))
+        monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(config_file))
+
+        from deerflow.config.extensions_config import reload_extensions_config
+
+        reload_extensions_config()
+
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        with pytest.raises(ValueError, match="not found"):
+            c.update_skill("nonexistent-skill-xyz", enabled=True)
+
+
+# ---------------------------------------------------------------------------
+# Step 11: Memory access (no LLM needed)
+# ---------------------------------------------------------------------------
+
+
+class TestMemoryAccess:
+    """Memory system queries through real code paths."""
+
+    def test_get_memory_returns_dict(self, e2e_env):
+        """get_memory() returns a dict (may be empty initial state)."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.get_memory()
+        assert isinstance(result, dict)
+
+    def test_reload_memory_returns_dict(self, e2e_env):
+        """reload_memory() forces reload and returns a dict."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.reload_memory()
+        assert isinstance(result, dict)
+
+    def test_get_memory_config_fields(self, e2e_env):
+        """get_memory_config() returns expected config fields."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.get_memory_config()
+        assert "enabled" in result
+        assert "storage_path" in result
+        assert "debounce_seconds" in result
+        assert "max_facts" in result
+        assert "fact_confidence_threshold" in result
+        assert "injection_enabled" in result
+        assert "max_injection_tokens" in result
+
+    def test_get_memory_status_combines_config_and_data(self, e2e_env):
+        """get_memory_status() returns both 'config' and 'data' keys."""
+        c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
+        result = c.get_memory_status()
+        assert "config" in result
+        assert "data" in result
+        assert "enabled" in result["config"]
+        assert isinstance(result["data"], dict)