cppalliance · bradjin8 · May 29, 2026 · May 29, 2026 · May 29, 2026 · Jun 2, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- **Summary disk cache (Phase 3)** — project list and tab summaries cached under
+  `~/.cache/cursor-chat-browser/`, invalidated when global or per-workspace DB
+  mtimes change; bypass with `?nocache=1` or `CURSOR_CHAT_BROWSER_NOCACHE=1` (#84)
+- **Lazy-load workspace UI** — workspace sidebar renders from a lightweight summary
+  payload; full bubble content is fetched per-conversation when the user selects it,
+  reducing first-paint time from 1–2 min to < 3 s on large local fixtures (#84)
+- **`GET /api/workspaces/<id>/tabs?summary=1`** — new summary-only variant returns
+  `id`, `title`, `timestamp`, `messageCount`, and optional `metadata.modelsUsed`
+  without loading any bubble data (#84)
+- **`GET /api/workspaces/<id>/tabs/<composer_id>`** — new single-conversation
+  endpoint loads only scoped `bubbleId:{id}:%`, `messageRequestContext:{id}:%`,
+  and `codeBlockDiff:{id}:%` KV rows, avoiding a full global bubble scan (#84)
+- **Scoped KV loaders** in `services/workspace_db.py`:
+  `load_bubbles_for_composer`, `load_message_request_context_for_composer`,
+  `load_code_block_diffs_for_composer` — used by the single-tab path (#84)
+
+### Changed
+- **List-path performance** — skip full `messageRequestContext` scan unless
+  invalid workspace aliases are needed; filter `composerData` in SQL; skip
+  `Composer.from_dict` on list/summary paths; cache `composer_id_to_ws` mapping (#84)
+- **`GET /api/workspaces`** (`list_workspace_projects`) no longer performs a
+  global `bubbleId:%` scan; conversation presence is determined from
+  `fullConversationHeadersOnly` headers alone, and workspace assignment relies
+  on `composer_id_to_ws` (primary) plus `projectLayouts` from MRC (#84)
+- **`assemble_workspace_tabs`** inner per-composer loop refactored into a shared
+  `_assemble_tab_from_composer_data` helper reused by `assemble_single_tab`; full
+  path behaviour is unchanged (#84)
+
+### Deprecated
+- Direct use of `GET /api/workspaces/<id>/tabs` (no `?summary=1`) from the workspace
+  UI on page load; the UI now calls `?summary=1` for first paint and lazy-fetches
+  individual tabs. The full-assembly endpoint remains available for export,
+  search, and backward-compatible consumers (planned removal: post-1.0) (#84)
+
+
 - **Web UI** — browse and search all Cursor AI workspaces; conversation view with syntax-highlighted code blocks, dark/light mode, and bookmarkable chat URLs (#63)
 - **Export formats** — one-click export of chats as Markdown, HTML, PDF, JSON, and CSV from the web UI (#63)
 - **CLI export** (`cursor-chat-export` / `scripts/export.py`) — zip archive or individual Markdown files with YAML frontmatter; incremental mode (`--since last`) preserves state across runs (#63, #42, #61)

diff --git a/api/workspaces.py b/api/workspaces.py
@@ -11,7 +11,7 @@
 import os
 from datetime import datetime, timezone
 
-from flask import Blueprint, jsonify
+from flask import Blueprint, jsonify, request
 
 from api.flask_config import exclusion_rules
 
@@ -29,7 +29,11 @@
 )
 from services.cli_tabs import get_cli_workspace_tabs
 from services.workspace_listing import list_workspace_projects
-from services.workspace_tabs import assemble_workspace_tabs
+from services.workspace_tabs import (
+    assemble_single_tab,
+    assemble_workspace_tabs,
+    list_workspace_tab_summaries,
+)
 
 # Re-exported for tests/test_models_wired_at_read_sites.py — the typed-model
 # spy harness patches `workspaces_mod.Bubble` / `.Composer` / `.Workspace` to
@@ -46,12 +50,18 @@
 # GET /api/workspaces
 # ---------------------------------------------------------------------------
 
+def _request_nocache() -> bool:
+    return request.args.get("nocache") in ("1", "true")
+
+
 @bp.route("/api/workspaces")
 def list_workspaces():
     try:
         workspace_path = resolve_workspace_path()
         rules = exclusion_rules()
-        projects, warnings = list_workspace_projects(workspace_path, rules)
+        projects, warnings = list_workspace_projects(
+            workspace_path, rules, nocache=_request_nocache(),
+        )
         payload: dict = {"projects": projects}
         if warnings:
             payload["warnings"] = warnings
@@ -154,9 +164,33 @@ def get_workspace_tabs(workspace_id):
     try:
         workspace_path = resolve_workspace_path()
         rules = exclusion_rules()
-        payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules)
+        summary = request.args.get("summary") in ("1", "true")
+        if summary:
+            payload, status = list_workspace_tab_summaries(
+                workspace_id, workspace_path, rules, nocache=_request_nocache(),
+            )
+        else:
+            payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules)
         return jsonify(payload), status
     except Exception:
         _logger.exception("Failed to get workspace tabs")
         return jsonify({"error": "Failed to get workspace tabs"}), 500
 
+
+# ---------------------------------------------------------------------------
+# GET /api/workspaces/<id>/tabs/<composer_id>
+# ---------------------------------------------------------------------------
+
+@bp.route("/api/workspaces/<workspace_id>/tabs/<composer_id>")
+def get_workspace_tab(workspace_id, composer_id):
+    if workspace_id.startswith("cli:"):
+        return jsonify({"error": "Per-tab lazy load is not supported for CLI workspaces"}), 400
+    try:
+        workspace_path = resolve_workspace_path()
+        rules = exclusion_rules()
+        payload, status = assemble_single_tab(workspace_id, composer_id, workspace_path, rules)
+        return jsonify(payload), status
+    except Exception:
+        _logger.exception("Failed to get workspace tab")
+        return jsonify({"error": "Failed to get workspace tab"}), 500
+
diff --git a/services/summary_cache.py b/services/summary_cache.py
@@ -0,0 +1,226 @@
+"""Disk cache for derived workspace summaries (issue #84 Phase 3).
+
+Caches project lists and per-workspace tab summaries keyed by storage mtimes
+so repeat page loads avoid re-scanning Cursor's global KV index.
+
+Bypass: set env ``CURSOR_CHAT_BROWSER_NOCACHE=1`` or pass ``?nocache=1`` on API
+requests. Cache files live under ``~/.cache/cursor-chat-browser/``.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any
+
+_logger = logging.getLogger(__name__)
+
+CACHE_VERSION = 1
+CACHE_DIR = Path.home() / ".cache" / "cursor-chat-browser"
+PROJECTS_CACHE_FILE = CACHE_DIR / "projects.json"
+COMPOSER_MAP_CACHE_FILE = CACHE_DIR / "composer-id-to-ws.json"
+TAB_SUMMARIES_PREFIX = "tab-summaries-"
+
+
+def nocache_enabled(*, request_nocache: bool = False) -> bool:
+    if request_nocache:
+        return True
+    return os.environ.get("CURSOR_CHAT_BROWSER_NOCACHE", "").strip().lower() in (
+        "1",
+        "true",
+        "yes",
+    )
+
+
+def _rules_digest(rules: list) -> str:
+    try:
+        payload = json.dumps(rules, sort_keys=True, ensure_ascii=False)
+    except (TypeError, ValueError):
+        payload = repr(rules)
+    return hashlib.sha256(payload.encode("utf-8")).hexdigest()[:16]
+
+
+def _file_mtime_ns(path: str | None) -> int | None:
+    if not path or not os.path.isfile(path):
+        return None
+    try:
+        return os.stat(path).st_mtime_ns
+    except OSError:
+        return None
+
+
+def fingerprint_workspace_storage(
+    workspace_path: str,
+    workspace_entries: list[dict],
+    *,
+    global_db_path: str | None,
+    rules: list,
+    cli_chats_path: str | None = None,
+) -> dict[str, Any]:
+    """Build a fingerprint dict for cache invalidation."""
+    ws_mt: list[list[str | int]] = []
+    for entry in workspace_entries:
+        name = entry.get("name")
+        if not isinstance(name, str):
+            continue
+        base = os.path.join(workspace_path, name)
+        for rel in ("state.vscdb", "workspace.json"):
+            p = os.path.join(base, rel)
+            mtime = _file_mtime_ns(p)
+            if mtime is not None:
+                ws_mt.append([f"{name}/{rel}", mtime])
+    ws_mt.sort(key=lambda row: row[0])
+
+    return {
+        "version": CACHE_VERSION,
+        "workspace_path": os.path.normpath(workspace_path),
+        "global_db_mtime_ns": _file_mtime_ns(global_db_path),
+        "workspace_files": ws_mt,
+        "rules_digest": _rules_digest(rules),
+        "cli_chats_mtime_ns": _file_mtime_ns(cli_chats_path),
+    }
+
+
+def _normalize_fingerprint(fp: dict[str, Any]) -> dict[str, Any]:
+    """Normalize fingerprint for comparison (JSON round-trip uses lists, not tuples)."""
+    normalized = dict(fp)
+    wf = fp.get("workspace_files")
+    if isinstance(wf, list):
+        normalized["workspace_files"] = [
+            [row[0], row[1]] if isinstance(row, (list, tuple)) and len(row) == 2 else row
+            for row in wf
+        ]
+    return normalized
+
+
+def _fingerprint_equal(a: object, b: dict[str, Any]) -> bool:
+    if not isinstance(a, dict):
+        return False
+    return _normalize_fingerprint(a) == _normalize_fingerprint(b)
+
+
+def _read_cache_file(path: Path | str) -> dict[str, Any] | None:
+    p = Path(path)
+    if not p.is_file():
+        return None
+    try:
+        with p.open(encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return None
+        return data
+    except (OSError, json.JSONDecodeError) as e:
+        _logger.debug("Summary cache read failed for %s: %s", path, e)
+        return None
+
+
+def _write_cache_file(path: Path | str, payload: dict[str, Any]) -> None:
+    p = Path(path)
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        tmp = p.with_suffix(p.suffix + ".tmp")
+        with tmp.open("w", encoding="utf-8") as f:
+            json.dump(payload, f, ensure_ascii=False)
+        tmp.replace(p)
+    except OSError as e:
+        _logger.warning("Summary cache write failed for %s: %s", path, e)
+
+
+def get_cached_projects(fingerprint: dict[str, Any]) -> tuple[list[dict], list[dict]] | None:
+    data = _read_cache_file(PROJECTS_CACHE_FILE)
+    if not data:
+        return None
+    if not _fingerprint_equal(data.get("fingerprint"), fingerprint):
+        return None
+    projects = data.get("projects")
+    warnings = data.get("warnings")
+    if not isinstance(projects, list):
+        return None
+    if not isinstance(warnings, list):
+        warnings = []
+    return projects, warnings
+
+
+def set_cached_projects(
+    fingerprint: dict[str, Any],
+    projects: list[dict],
+    warnings: list[dict],
+) -> None:
+    _write_cache_file(
+        PROJECTS_CACHE_FILE,
+        {
+            "fingerprint": fingerprint,
+            "projects": projects,
+            "warnings": warnings,
+        },
+    )
+
+
+def get_cached_composer_id_to_ws(
+    fingerprint: dict[str, Any],
+) -> dict[str, str] | None:
+    data = _read_cache_file(COMPOSER_MAP_CACHE_FILE)
+    if not data:
+        return None
+    if not _fingerprint_equal(data.get("fingerprint"), fingerprint):
+        return None
+    mapping = data.get("composer_id_to_ws")
+    if not isinstance(mapping, dict):
+        return None
+    return {str(k): str(v) for k, v in mapping.items()}
+
+
+def set_cached_composer_id_to_ws(
+    fingerprint: dict[str, Any],
+    mapping: dict[str, str],
+) -> None:
+    _write_cache_file(
+        COMPOSER_MAP_CACHE_FILE,
+        {
+            "fingerprint": fingerprint,
+            "composer_id_to_ws": mapping,
+        },
+    )
+
+
+def _tab_summaries_path(workspace_id: str) -> Path:
+    safe = hashlib.sha256(workspace_id.encode("utf-8")).hexdigest()[:16]
+    return CACHE_DIR / f"{TAB_SUMMARIES_PREFIX}{safe}.json"
+
+
+def get_cached_tab_summaries(
+    fingerprint: dict[str, Any],
+    workspace_id: str,
+) -> tuple[dict, int] | None:
+    data = _read_cache_file(_tab_summaries_path(workspace_id))
+    if not data:
+        return None
+    if data.get("workspace_id") != workspace_id:
+        return None
+    if not _fingerprint_equal(data.get("fingerprint"), fingerprint):
+        return None
+    payload = data.get("payload")
+    status = data.get("status", 200)
+    if not isinstance(payload, dict) or not isinstance(status, int):
+        return None
+    return payload, status
+
+
+def set_cached_tab_summaries(
+    fingerprint: dict[str, Any],
+    workspace_id: str,
+    payload: dict,
+    status: int,
+) -> None:
+    _write_cache_file(
+        _tab_summaries_path(workspace_id),
+        {
+            "workspace_id": workspace_id,
+            "fingerprint": fingerprint,
+            "payload": payload,
+            "status": status,
+        },
+    )