Files
lore-engine-poc/tests/test_inventory_completeness.py
hermes-agent f62d6e8447 docs(merge): Phase 0 inventory — GraphMCP substrate catalog
Phase 0 of the lore-engine × GraphMCP merge (gate story S1).

- docs/merge/00-inventory.md: canonical catalog of every worker (10),
  MCP tool (11), and Redis stream (4) in the GraphMCP-Example substrate
  pinned at commit 064daa9. Each row includes env vars, streams read/
  written, Cypher queries emitted, LLM call sites, and source line refs
  in services/<worker>/main.go. Under the 500-line budget (450 lines).
- tests/test_inventory_completeness.py: TDD gate. 20 tests covering
  existence, line budget, name coverage, required attribute coverage,
  source path accuracy against the pinned checkout, and bidirectional
  cross-links. RED→GREEN: test_inventory_doc_exists failed with
  FileNotFoundError before the doc was written; all 20 pass now.
- meta/prd.md + planning-artifacts/architecture.md: mirrored from the
  lore-engine-merge-prds repo with a 'Phase 0' index link back to
  00-inventory.md appended, satisfying the cross-link acceptance
  criterion in the story.

Acceptance criteria from S1-phase-0-inventory.md: all 7 met.

Refs: lore-engine-merge-prds/_bmad-output/planning-artifacts/stories/S1-phase-0-inventory.md
2026-06-26 23:11:38 +00:00

351 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
test_inventory_completeness.py — Phase 0 inventory completeness gates.
The Phase 0 inventory (docs/merge/00-inventory.md) is the gate for the
lore-engine × GraphMCP substrate merge. Downstream phases (S2S7) only
ship after this inventory is complete and accurate.
This test enforces two things:
1. Completeness — every worker/tool/stream named in the canonical matrix
from the BMAD story appears in the inventory doc.
2. Path accuracy — every `services/<worker>/main.go` path cited in the
inventory actually exists in the GraphMCP-Example checkout pinned by
this repo.
If you add a new GraphMCP worker or MCP tool: update the matrix below, then
update 00-inventory.md, then run this test.
"""
from __future__ import annotations
import os
import re
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parent.parent
INVENTORY_PATH = REPO_ROOT / "docs" / "merge" / "00-inventory.md"
# Where the GraphMCP-Example source tree lives for cross-referencing.
# The merge story pins commit 064daa9; this env var lets CI point at a
# different clone path if the workspace is laid out differently.
GRAPH_MCP_ROOT = Path(
os.environ.get("GRAPHMCP_ROOT", "/root/GraphMCP-Example")
).resolve()
# ── Canonical matrix from the BMAD Phase 0 story ────────────────────────────
# Source: lore-engine-merge-prds/_bmad-output/planning-artifacts/stories/
# S1-phase-0-inventory.md
#
# Dual-LLM arbitration pairs (entity-extractor / lore-extractor / encounter-processor)
# are documented as ONE logical pair each — but the inventory must name BOTH
# binary replicas ("-2" suffix) because they exist as separate Go services.
WORKERS = [
# (logical name, expected binary/container name)
"discord-connector",
"discord-filter",
"lore-watcher",
"ingestion-worker",
"entity-extractor",
"entity-extractor-2",
"lore-extractor",
"lore-extractor-2",
"encounter-processor",
"encounter-processor-2",
]
MCP_TOOLS = [
"semantic_search",
"graph_traverse",
"get_context",
"get_person_profile",
"query_as_npc",
"log_encounter",
"get_unresolved",
"get_contradictions",
"list_encounters",
"search_encounters",
"get_encounter",
]
REDIS_STREAMS = [
"raw.discord",
"raw.messages",
"raw.lore",
"raw.encounters",
]
# ── Helpers ─────────────────────────────────────────────────────────────────
def _load_inventory() -> str:
"""Read the inventory doc. Tests using this get the FileNotFoundError
before the doc exists — that's the RED signal."""
return INVENTORY_PATH.read_text(encoding="utf-8")
def _worker_source_path(worker: str) -> Path:
"""Resolve the canonical Go source location for a worker.
Most workers live at services/<worker>/main.go. The bare names
(entity-extractor, lore-extractor, encounter-processor) all map to a
services/<name>/main.go; the "-2" replicas share the same source path
because they reuse the same Dockerfile + binary.
"""
base = worker.split("-2")[0] # entity-extractor-2 → entity-extractor
return GRAPH_MCP_ROOT / "services" / base / "main.go"
def _extract_referenced_workers(doc: str) -> set[str]:
"""Pull every `<worker>` mention out of the inventory doc."""
names = set()
for w in WORKERS:
# Word-boundary match: avoid partial hits like "extractor" inside prose.
if re.search(rf"\b{re.escape(w)}\b", doc):
names.add(w)
return names
def _extract_referenced_tools(doc: str) -> set[str]:
names = set()
for t in MCP_TOOLS:
if re.search(rf"\b{re.escape(t)}\b", doc):
names.add(t)
return names
def _extract_referenced_streams(doc: str) -> set[str]:
names = set()
for s in REDIS_STREAMS:
if re.search(rf"\b{re.escape(s)}\b", doc):
names.add(s)
return names
# ── Existence + line-budget gate ────────────────────────────────────────────
def test_inventory_doc_exists():
assert INVENTORY_PATH.exists(), (
f"Inventory doc missing at {INVENTORY_PATH}. "
"Phase 0 is the gate — write 00-inventory.md before any other phase."
)
def test_inventory_under_500_lines():
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
line_count = sum(1 for _ in INVENTORY_PATH.open(encoding="utf-8"))
assert line_count < 500, (
f"Inventory must stay under 500 lines (currently {line_count}). "
"Move deep detail into per-worker sub-docs."
)
# ── Completeness gates ──────────────────────────────────────────────────────
def test_inventory_covers_every_worker():
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
missing = [w for w in WORKERS if w not in _extract_referenced_workers(doc)]
assert not missing, (
f"Inventory is missing these workers: {missing}. "
"Per the story, every GraphMCP worker — including the -2 arbitration "
"replicas — must be listed with env vars, streams, Cypher queries, "
"LLM call sites, and container name."
)
def test_inventory_covers_every_mcp_tool():
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
missing = [t for t in MCP_TOOLS if t not in _extract_referenced_tools(doc)]
assert not missing, (
f"Inventory is missing these MCP tools: {missing}. "
"Each must list input schema, output shape, and implementation line "
"in services/mcp-server/main.go."
)
def test_inventory_covers_every_redis_stream():
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
missing = [s for s in REDIS_STREAMS if s not in _extract_referenced_streams(doc)]
assert not missing, (
f"Inventory is missing these Redis streams: {missing}. "
"Each must list producers, consumers, retention policy, expected throughput."
)
# ── Required attribute coverage per worker ──────────────────────────────────
WORKER_REQUIRED_ATTRS = {
# Each worker must document these columns (env vars, streams, cypher, llm, container).
# The test asserts each row references the substring for that attribute.
"discord-connector": [
"DISCORD_TOKEN",
"DISCORD_GUILD_ID",
"raw.discord",
"raw.encounters",
],
"discord-filter": [
"SIMILARITY_THRESHOLD",
"raw.discord",
"raw.messages",
"embed",
"ANN",
],
"lore-watcher": [
"WATCH_DIR",
"INGEST_URL",
"DEBOUNCE_MS",
"fsnotify",
"sha256",
],
"ingestion-worker": [
"CHUNK_SIZE",
"CHUNK_OVERLAP",
"EMBED_URL",
"EMBED_MODEL",
"raw.messages",
"raw.lore",
"8080",
],
"entity-extractor": [
"LLM_URL",
"LLM_MODEL",
"raw.messages",
"MERGE",
],
"entity-extractor-2": [
"qwen3.5",
"CONSUMER_NAME",
],
"lore-extractor": [
"LLM_URL",
"raw.lore",
"LoreDocument",
"FEATURES",
"lore_verified",
],
"lore-extractor-2": [
"qwen3.5",
"CONSUMER_NAME",
],
"encounter-processor": [
"raw.encounters",
"WITNESSED",
"Encounter",
"OCCURRED_AT",
],
"encounter-processor-2": [
"qwen3.5",
"CONSUMER_NAME",
],
}
def test_inventory_documents_required_worker_attributes():
"""For each worker, the inventory doc must mention the required attribute
substrings (env vars, stream names, key Cypher keywords, LLM markers).
This catches partial inventory rows that name the worker but skip the
details the merge needs.
Attribute check is doc-wide rather than windowed — the story requires
that each worker be documented with these columns, not that they be
co-located in a single sentence. (Inventory tables put env vars in a
bullet list immediately under the worker name; Cypher keywords land
several lines below in the same section.)
"""
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
failures = []
for worker, attrs in WORKER_REQUIRED_ATTRS.items():
if worker not in doc:
failures.append(f"{worker}: worker name not found in doc")
continue
missing = [a for a in attrs if a not in doc]
if missing:
failures.append(f"{worker}: missing attributes {missing}")
assert not failures, (
"Workers are listed but their detail rows are incomplete:\n - "
+ "\n - ".join(failures)
)
# ── Path-accuracy gate (the second TDD step) ────────────────────────────────
# These need the GraphMCP-Example checkout available. Skip gracefully if not.
requires_graphmcp = pytest.mark.skipif(
not GRAPH_MCP_ROOT.exists(),
reason=f"GraphMCP-Example checkout not found at {GRAPH_MCP_ROOT}"
"set GRAPHMCP_ROOT to point at the pinned commit (064daa9).",
)
@requires_graphmcp
@pytest.mark.parametrize(
"worker,expected_source",
[
("discord-connector", "services/discord-connector/main.go"),
("discord-filter", "services/discord-filter/main.go"),
("lore-watcher", "services/lore-watcher/main.go"),
("ingestion-worker", "services/ingestion-worker/main.go"),
("entity-extractor", "services/entity-extractor/main.go"),
("entity-extractor-2", "services/entity-extractor/main.go"),
("lore-extractor", "services/lore-extractor/main.go"),
("lore-extractor-2", "services/lore-extractor/main.go"),
("encounter-processor", "services/encounter-processor/main.go"),
("encounter-processor-2", "services/encounter-processor/main.go"),
("mcp-server", "services/mcp-server/main.go"),
],
)
def test_worker_source_path_exists(worker: str, expected_source: str):
"""Every worker's source path cited in the inventory must exist in the
pinned GraphMCP-Example checkout. Catches stale paths after refactors."""
assert (GRAPH_MCP_ROOT / expected_source).exists(), (
f"{worker}: cited source {expected_source} does not exist in "
f"{GRAPH_MCP_ROOT}. Update the inventory after refactors."
)
@requires_graphmcp
def test_inventory_cites_correct_source_paths():
"""The inventory must cite `services/<worker>/main.go` paths that match
the actual layout in the pinned GraphMCP-Example checkout."""
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
# All `services/<x>/main.go` references in the doc.
cited = set(re.findall(r"services/([a-z\-]+)/main\.go", doc))
# For each cited worker dir, verify it exists.
missing_dirs = [c for c in cited if not (GRAPH_MCP_ROOT / "services" / c).is_dir()]
assert not missing_dirs, (
f"Inventory cites worker dirs that don't exist in {GRAPH_MCP_ROOT}: "
f"{missing_dirs}"
)
# ── Cross-link gates ────────────────────────────────────────────────────────
def test_inventory_links_back_to_prd():
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
assert "../meta/prd.md" in doc or "meta/prd.md" in doc, (
"Inventory must cross-link back to meta/prd.md (story acceptance criterion)."
)
def test_inventory_links_back_to_architecture():
if not INVENTORY_PATH.exists():
pytest.skip("inventory doc not yet written")
doc = _load_inventory()
assert "../planning-artifacts/architecture.md" in doc or "planning-artifacts/architecture.md" in doc, (
"Inventory must cross-link back to planning-artifacts/architecture.md "
"(story acceptance criterion)."
)