Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- **Summary disk cache (Phase 3)** — project list and tab summaries cached under
`~/.cache/cursor-chat-browser/`, invalidated when global or per-workspace DB
mtimes change; bypass with `?nocache=1` or `CURSOR_CHAT_BROWSER_NOCACHE=1` (#84)
- **Lazy-load workspace UI** — workspace sidebar renders from a lightweight summary
payload; full bubble content is fetched per-conversation when the user selects it,
reducing first-paint time from 1–2 min to < 3 s on large local fixtures (#84)
- **`GET /api/workspaces/<id>/tabs?summary=1`** — new summary-only variant returns
`id`, `title`, `timestamp`, `messageCount`, and optional `metadata.modelsUsed`
without loading any bubble data (#84)
- **`GET /api/workspaces/<id>/tabs/<composer_id>`** — new single-conversation
endpoint loads only scoped `bubbleId:{id}:%`, `messageRequestContext:{id}:%`,
and `codeBlockDiff:{id}:%` KV rows, avoiding a full global bubble scan (#84)
- **Scoped KV loaders** in `services/workspace_db.py`:
`load_bubbles_for_composer`, `load_message_request_context_for_composer`,
`load_code_block_diffs_for_composer` — used by the single-tab path (#84)

### Changed
- **List-path performance** — skip full `messageRequestContext` scan unless
invalid workspace aliases are needed; filter `composerData` in SQL; skip
`Composer.from_dict` on list/summary paths; cache `composer_id_to_ws` mapping (#84)
- **`GET /api/workspaces`** (`list_workspace_projects`) no longer performs a
global `bubbleId:%` scan; conversation presence is determined from
`fullConversationHeadersOnly` headers alone, and workspace assignment relies
on `composer_id_to_ws` (primary) plus `projectLayouts` from MRC (#84)
- **`assemble_workspace_tabs`** inner per-composer loop refactored into a shared
`_assemble_tab_from_composer_data` helper reused by `assemble_single_tab`; full
path behaviour is unchanged (#84)

### Deprecated
- Direct use of `GET /api/workspaces/<id>/tabs` (no `?summary=1`) from the workspace
UI on page load; the UI now calls `?summary=1` for first paint and lazy-fetches
individual tabs. The full-assembly endpoint remains available for export,
search, and backward-compatible consumers (planned removal: post-1.0) (#84)


- **Web UI** — browse and search all Cursor AI workspaces; conversation view with syntax-highlighted code blocks, dark/light mode, and bookmarkable chat URLs (#63)
- **Export formats** — one-click export of chats as Markdown, HTML, PDF, JSON, and CSV from the web UI (#63)
- **CLI export** (`cursor-chat-export` / `scripts/export.py`) — zip archive or individual Markdown files with YAML frontmatter; incremental mode (`--since last`) preserves state across runs (#63, #42, #61)
Expand Down
42 changes: 38 additions & 4 deletions api/workspaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import os
from datetime import datetime, timezone

from flask import Blueprint, jsonify
from flask import Blueprint, jsonify, request

from api.flask_config import exclusion_rules

Expand All @@ -29,7 +29,11 @@
)
from services.cli_tabs import get_cli_workspace_tabs
from services.workspace_listing import list_workspace_projects
from services.workspace_tabs import assemble_workspace_tabs
from services.workspace_tabs import (
assemble_single_tab,
assemble_workspace_tabs,
list_workspace_tab_summaries,
)

# Re-exported for tests/test_models_wired_at_read_sites.py — the typed-model
# spy harness patches `workspaces_mod.Bubble` / `.Composer` / `.Workspace` to
Expand All @@ -46,12 +50,18 @@
# GET /api/workspaces
# ---------------------------------------------------------------------------

def _request_nocache() -> bool:
return request.args.get("nocache") in ("1", "true")


@bp.route("/api/workspaces")
def list_workspaces():
try:
workspace_path = resolve_workspace_path()
rules = exclusion_rules()
projects, warnings = list_workspace_projects(workspace_path, rules)
projects, warnings = list_workspace_projects(
workspace_path, rules, nocache=_request_nocache(),
)
payload: dict = {"projects": projects}
if warnings:
payload["warnings"] = warnings
Expand Down Expand Up @@ -154,9 +164,33 @@ def get_workspace_tabs(workspace_id):
try:
workspace_path = resolve_workspace_path()
rules = exclusion_rules()
payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules)
summary = request.args.get("summary") in ("1", "true")
if summary:
payload, status = list_workspace_tab_summaries(
workspace_id, workspace_path, rules, nocache=_request_nocache(),
)
else:
payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules)
return jsonify(payload), status
except Exception:
_logger.exception("Failed to get workspace tabs")
return jsonify({"error": "Failed to get workspace tabs"}), 500


# ---------------------------------------------------------------------------
# GET /api/workspaces/<id>/tabs/<composer_id>
# ---------------------------------------------------------------------------

@bp.route("/api/workspaces/<workspace_id>/tabs/<composer_id>")
def get_workspace_tab(workspace_id, composer_id):
if workspace_id.startswith("cli:"):
return jsonify({"error": "Per-tab lazy load is not supported for CLI workspaces"}), 400
try:
workspace_path = resolve_workspace_path()
rules = exclusion_rules()
payload, status = assemble_single_tab(workspace_id, composer_id, workspace_path, rules)
return jsonify(payload), status
except Exception:
_logger.exception("Failed to get workspace tab")
return jsonify({"error": "Failed to get workspace tab"}), 500

226 changes: 226 additions & 0 deletions services/summary_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
"""Disk cache for derived workspace summaries (issue #84 Phase 3).

Caches project lists and per-workspace tab summaries keyed by storage mtimes
so repeat page loads avoid re-scanning Cursor's global KV index.

Bypass: set env ``CURSOR_CHAT_BROWSER_NOCACHE=1`` or pass ``?nocache=1`` on API
requests. Cache files live under ``~/.cache/cursor-chat-browser/``.
"""

from __future__ import annotations

import hashlib
import json
import logging
import os
from pathlib import Path
from typing import Any

_logger = logging.getLogger(__name__)

CACHE_VERSION = 1
CACHE_DIR = Path.home() / ".cache" / "cursor-chat-browser"
PROJECTS_CACHE_FILE = CACHE_DIR / "projects.json"
COMPOSER_MAP_CACHE_FILE = CACHE_DIR / "composer-id-to-ws.json"
TAB_SUMMARIES_PREFIX = "tab-summaries-"


def nocache_enabled(*, request_nocache: bool = False) -> bool:
if request_nocache:
return True
return os.environ.get("CURSOR_CHAT_BROWSER_NOCACHE", "").strip().lower() in (
"1",
"true",
"yes",
)


def _rules_digest(rules: list) -> str:
try:
payload = json.dumps(rules, sort_keys=True, ensure_ascii=False)
except (TypeError, ValueError):
payload = repr(rules)
return hashlib.sha256(payload.encode("utf-8")).hexdigest()[:16]


def _file_mtime_ns(path: str | None) -> int | None:
if not path or not os.path.isfile(path):
return None
try:
return os.stat(path).st_mtime_ns
except OSError:
return None


def fingerprint_workspace_storage(
workspace_path: str,
workspace_entries: list[dict],
*,
global_db_path: str | None,
rules: list,
cli_chats_path: str | None = None,
) -> dict[str, Any]:
"""Build a fingerprint dict for cache invalidation."""
ws_mt: list[list[str | int]] = []
for entry in workspace_entries:
name = entry.get("name")
if not isinstance(name, str):
continue
base = os.path.join(workspace_path, name)
for rel in ("state.vscdb", "workspace.json"):
p = os.path.join(base, rel)
mtime = _file_mtime_ns(p)
if mtime is not None:
ws_mt.append([f"{name}/{rel}", mtime])
ws_mt.sort(key=lambda row: row[0])

return {
"version": CACHE_VERSION,
"workspace_path": os.path.normpath(workspace_path),
"global_db_mtime_ns": _file_mtime_ns(global_db_path),
"workspace_files": ws_mt,
"rules_digest": _rules_digest(rules),
"cli_chats_mtime_ns": _file_mtime_ns(cli_chats_path),
}


def _normalize_fingerprint(fp: dict[str, Any]) -> dict[str, Any]:
"""Normalize fingerprint for comparison (JSON round-trip uses lists, not tuples)."""
normalized = dict(fp)
wf = fp.get("workspace_files")
if isinstance(wf, list):
normalized["workspace_files"] = [
[row[0], row[1]] if isinstance(row, (list, tuple)) and len(row) == 2 else row
for row in wf
]
return normalized


def _fingerprint_equal(a: object, b: dict[str, Any]) -> bool:
if not isinstance(a, dict):
return False
return _normalize_fingerprint(a) == _normalize_fingerprint(b)


def _read_cache_file(path: Path | str) -> dict[str, Any] | None:
p = Path(path)
if not p.is_file():
return None
try:
with p.open(encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
return None
return data
except (OSError, json.JSONDecodeError) as e:
_logger.debug("Summary cache read failed for %s: %s", path, e)
return None


def _write_cache_file(path: Path | str, payload: dict[str, Any]) -> None:
p = Path(path)
try:
p.parent.mkdir(parents=True, exist_ok=True)
tmp = p.with_suffix(p.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False)
tmp.replace(p)
except OSError as e:
_logger.warning("Summary cache write failed for %s: %s", path, e)


def get_cached_projects(fingerprint: dict[str, Any]) -> tuple[list[dict], list[dict]] | None:
data = _read_cache_file(PROJECTS_CACHE_FILE)
if not data:
return None
if not _fingerprint_equal(data.get("fingerprint"), fingerprint):
return None
projects = data.get("projects")
warnings = data.get("warnings")
if not isinstance(projects, list):
return None
if not isinstance(warnings, list):
warnings = []
return projects, warnings


def set_cached_projects(
fingerprint: dict[str, Any],
projects: list[dict],
warnings: list[dict],
) -> None:
_write_cache_file(
PROJECTS_CACHE_FILE,
{
"fingerprint": fingerprint,
"projects": projects,
"warnings": warnings,
},
)


def get_cached_composer_id_to_ws(
fingerprint: dict[str, Any],
) -> dict[str, str] | None:
data = _read_cache_file(COMPOSER_MAP_CACHE_FILE)
if not data:
return None
if not _fingerprint_equal(data.get("fingerprint"), fingerprint):
return None
mapping = data.get("composer_id_to_ws")
if not isinstance(mapping, dict):
return None
return {str(k): str(v) for k, v in mapping.items()}


def set_cached_composer_id_to_ws(
fingerprint: dict[str, Any],
mapping: dict[str, str],
) -> None:
_write_cache_file(
COMPOSER_MAP_CACHE_FILE,
{
"fingerprint": fingerprint,
"composer_id_to_ws": mapping,
},
)


def _tab_summaries_path(workspace_id: str) -> Path:
safe = hashlib.sha256(workspace_id.encode("utf-8")).hexdigest()[:16]
return CACHE_DIR / f"{TAB_SUMMARIES_PREFIX}{safe}.json"


def get_cached_tab_summaries(
fingerprint: dict[str, Any],
workspace_id: str,
) -> tuple[dict, int] | None:
data = _read_cache_file(_tab_summaries_path(workspace_id))
if not data:
return None
if data.get("workspace_id") != workspace_id:
return None
if not _fingerprint_equal(data.get("fingerprint"), fingerprint):
return None
payload = data.get("payload")
status = data.get("status", 200)
if not isinstance(payload, dict) or not isinstance(status, int):
return None
return payload, status


def set_cached_tab_summaries(
fingerprint: dict[str, Any],
workspace_id: str,
payload: dict,
status: int,
) -> None:
_write_cache_file(
_tab_summaries_path(workspace_id),
{
"workspace_id": workspace_id,
"fingerprint": fingerprint,
"payload": payload,
"status": status,
},
)
Loading
Loading