VectifyAI · KylinMountain · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/openkb/agent/query.py b/openkb/agent/query.py
@@ -8,6 +8,7 @@
 from agents import ToolOutputImage, ToolOutputText
 from openkb.agent.tools import (
     get_wiki_page_content,
+    grep_wiki_files,
     read_wiki_file,
     read_wiki_image,
     write_kb_file,
@@ -38,7 +39,21 @@
      ranges to help you target. Never fetch the whole document.
 6. Source content may reference images (e.g. ![image](sources/images/doc/file.png)).
    Use the get_image tool to view them when needed.
-7. Synthesize a clear, concise, well-cited answer grounded in wiki content.
+7. DRILL FOR DETAIL with grep_wiki (after reading the curated pages above):
+   summaries are lossy, so when the question needs specifics they do not
+   fully contain — numbers, names, exact claims, edge cases — use grep_wiki
+   to LOCATE which pages hold them. grep is lexical, so try a few term
+   variants: acronym and expansion, singular/plural, close synonyms. Treat
+   the results as a reading list: each line is `path:line:text` — for every
+   relevant page you have NOT already read in full, read_file that path
+   (everything before the first colon) and extract the detail. Do NOT answer
+   from the grep line alone; open the page. If a page contradicts what you
+   already have, note both claims with their citations rather than silently
+   choosing one. Repeat locate-then-read until the pages that actually
+   contain the needed detail have been read (at most 3 grep rounds; stop once
+   a round surfaces no new relevant page). grep_wiki complements index.md and
+   summaries (your starting point) — it does not replace them.
+8. Synthesize a clear, concise, well-cited answer grounded in wiki content.
 
 Answer based only on wiki content. Be concise.
 Before each tool call, output one short sentence explaining the reason.
@@ -87,12 +102,39 @@ def get_image(image_path: str) -> ToolOutputImage | ToolOutputText:
             return ToolOutputImage(image_url=result["image_url"])
         return ToolOutputText(text=result["text"])
 
+    @function_tool
+    def grep_wiki(pattern: str, ignore_case: bool = True, fixed_string: bool = False) -> str:
+        """Locate wiki pages that contain specific detail, by lexical grep.
+
+        Use this to FIND which pages hold specifics the summaries lack —
+        numbers, names, exact claims, edge cases — then read_file those pages
+        to extract the detail. It searches every wiki .md file (including
+        short-doc sources/); it does NOT search long-document page content
+        (use get_page_content for that).
+
+        Returns up to 50 matches, one per line as 'path.md:LINE:text'. Each
+        result is a page to OPEN, not an answer: take the path (everything
+        before the FIRST colon) and read_file it — do not answer from the grep
+        line alone. Pattern is an extended regex (ERE): alternation 'a|b', '?',
+        '+', '()' work; set fixed_string=True for a literal search. Try a few
+        term variants (acronym/expansion, singular/plural, synonyms) — this is
+        lexical, not semantic.
+
+        Args:
+            pattern: Search pattern (extended regex by default).
+            ignore_case: Case-insensitive (default True).
+            fixed_string: Treat pattern as a literal string, not a regex.
+        """
+        return grep_wiki_files(
+            pattern, wiki_root, ignore_case=ignore_case, fixed_string=fixed_string,
+        )
+
     from agents.model_settings import ModelSettings
 
     return Agent(
         name="wiki-query",
         instructions=instructions,
-        tools=[read_file, get_page_content, get_image],
+        tools=[read_file, get_page_content, get_image, grep_wiki],
         model=f"litellm/{model}",
         model_settings=ModelSettings(parallel_tool_calls=False),
     )

diff --git a/openkb/agent/tools.py b/openkb/agent/tools.py
@@ -7,9 +7,19 @@
 from __future__ import annotations
 
 import contextlib
+import functools
 import json as _json
+import os
+import shutil
+import subprocess
 from pathlib import Path
 
+from openkb.schema import EXCLUDED_WIKI_FILES
+
+# grep_wiki_files tuning
+_GREP_MAX_LINES = 50
+_GREP_TIMEOUT_S = 10
+
 
 def list_wiki_files(directory: str, wiki_root: str) -> str:
     """List all Markdown files in a wiki subdirectory.
@@ -54,6 +64,108 @@ def read_wiki_file(path: str, wiki_root: str) -> str:
     return full_path.read_text(encoding="utf-8")
 
 
+@functools.cache
+def _grep_binary() -> str | None:
+    """Locate the system grep once per process (PATH does not change at runtime)."""
+    return shutil.which("grep")
+
+
+def grep_wiki_files(
+    pattern: str,
+    wiki_root: str,
+    *,
+    ignore_case: bool = True,
+    fixed_string: bool = False,
+) -> str:
+    """Lexically search the wiki's markdown layer for ``pattern`` using grep.
+
+    A completeness sweep over every ``*.md`` file under *wiki_root* —
+    summaries, concepts, entities, explorations, ``index.md``, and short-doc
+    ``sources/*.md``. Long-doc per-page ``*.json`` (PageIndex's domain) is
+    excluded (only ``*.md`` is searched), as are the wiki's bookkeeping /
+    scaffolding files (``log.md``, ``AGENTS.md``, ``SCHEMA.md`` — see
+    :data:`openkb.schema.EXCLUDED_WIKI_FILES`).
+
+    Shells out to the system ``grep`` (POSIX, ubiquitous on macOS/Linux) with
+    ``shell=False``, so a hostile *pattern* cannot inject commands. ``pattern``
+    is an **extended** regular expression (ERE) by default — alternation
+    ``a|b``, ``?``, ``+``, ``()`` all work — or a literal string when
+    *fixed_string* is True.
+
+    Args:
+        pattern: Search pattern. ERE by default; literal when *fixed_string*.
+        wiki_root: Absolute path to the wiki root directory.
+        ignore_case: Case-insensitive match (default True).
+        fixed_string: Treat *pattern* as a literal string, not a regex.
+
+    Returns:
+        Up to :data:`_GREP_MAX_LINES` matches, each line ``relative/path.md:LINE:text``
+        (the path is everything before the first colon), plus a truncation
+        notice if capped. On empty pattern / no match / missing grep / timeout /
+        error-with-no-results, returns an explicit message string. Never raises.
+    """
+    if not pattern or not pattern.strip():
+        return "Provide a non-empty search pattern."
+
+    root = Path(wiki_root).resolve()
+    if not root.exists():
+        return f"Wiki root not found: {wiki_root}"
+
+    grep = _grep_binary()
+    if not grep:
+        return "grep unavailable on this system."
+
+    cmd = [grep, "-rn", "--include=*.md", "--exclude-dir=images", "--exclude-dir=.git"]
+    for name in sorted(EXCLUDED_WIKI_FILES):
+        cmd.append(f"--exclude={name}")
+    if ignore_case:
+        cmd.append("-i")
+    cmd.append("-F" if fixed_string else "-E")
+    cmd += ["-e", pattern, str(root)]
+
+    try:
+        proc = subprocess.run(
+            cmd, capture_output=True, text=True, errors="replace",
+            timeout=_GREP_TIMEOUT_S, check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return "grep timed out; narrow the pattern."
+
+    prefix = str(root) + os.sep
+    results: list[str] = []
+    for line in proc.stdout.splitlines():
+        if not line:
+            continue
+        if not line.startswith(prefix):
+            continue  # defensive: only surface paths under wiki_root
+        rel = line[len(prefix):]
+        path_part = rel.split(":", 1)[0]
+        # Defense in depth: --exclude already drops these basenames; this also
+        # catches a same-named file in a subdirectory.
+        if Path(path_part).name in EXCLUDED_WIKI_FILES:
+            continue
+        results.append(rel)
+        if len(results) > _GREP_MAX_LINES:
+            break  # only need 51 to detect truncation; stop processing
+
+    if not results:
+        # grep exit codes: 0 = match, 1 = no match, >=2 = error. grep can exit
+        # >=2 (e.g. one unreadable file) while still printing valid matches —
+        # those were collected above. Only report an error when nothing usable
+        # came back.
+        if proc.returncode >= 2:
+            stderr_lines = (proc.stderr or "").strip().splitlines()
+            first = stderr_lines[0] if stderr_lines else "unknown error"
+            return f"grep error: {first}."
+        return f"No matches for {pattern}."
+
+    truncated = len(results) > _GREP_MAX_LINES
+    out = "\n".join(results[:_GREP_MAX_LINES])
+    if truncated:
+        out += "\n… more matches; narrow the pattern."
+    return out
+
+
 def parse_pages(pages: str) -> list[int]:
     """Parse a page specification string into a sorted, deduplicated list of page numbers.
 

diff --git a/openkb/lint.py b/openkb/lint.py
@@ -15,13 +15,13 @@
 
 import yaml
 
-from openkb.schema import PAGE_CONTENT_DIRS
+from openkb.schema import EXCLUDED_WIKI_FILES, PAGE_CONTENT_DIRS
 
 # Matches [[wikilink]] or [[subdir/link]]
 _WIKILINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
 
 # Files to exclude from lint scanning (schema, logs, etc.)
-_EXCLUDED_FILES = {"AGENTS.md", "SCHEMA.md", "log.md"}
+_EXCLUDED_FILES = EXCLUDED_WIKI_FILES
 
 
 def _normalize_target(target: str) -> str:

diff --git a/openkb/schema.py b/openkb/schema.py
@@ -6,6 +6,11 @@
 # for surfaces that enumerate page content (list, lint, status, skill gate).
 PAGE_CONTENT_DIRS = ("summaries", "concepts", "entities")
 
+# Bookkeeping / scaffolding files that live under wiki/ but are NOT content.
+# Single source of truth shared by the structural linter and the grep search
+# tool so their exclusion policy can never drift.
+EXCLUDED_WIKI_FILES: frozenset[str] = frozenset({"AGENTS.md", "SCHEMA.md", "log.md"})
+
 # Canonical empty index.md seed. Used by `openkb init` and the compiler's
 # lazy-create path so they never drift.
 INDEX_SEED = "# Knowledge Base Index\n\n## Documents\n\n## Concepts\n\n## Entities\n\n## Explorations\n"