Phase 0 of the lore-engine × GraphMCP merge (gate story S1). - docs/merge/00-inventory.md: canonical catalog of every worker (10), MCP tool (11), and Redis stream (4) in the GraphMCP-Example substrate pinned at commit 064daa9. Each row includes env vars, streams read/ written, Cypher queries emitted, LLM call sites, and source line refs in services/<worker>/main.go. Under the 500-line budget (450 lines). - tests/test_inventory_completeness.py: TDD gate. 20 tests covering existence, line budget, name coverage, required attribute coverage, source path accuracy against the pinned checkout, and bidirectional cross-links. RED→GREEN: test_inventory_doc_exists failed with FileNotFoundError before the doc was written; all 20 pass now. - meta/prd.md + planning-artifacts/architecture.md: mirrored from the lore-engine-merge-prds repo with a 'Phase 0' index link back to 00-inventory.md appended, satisfying the cross-link acceptance criterion in the story. Acceptance criteria from S1-phase-0-inventory.md: all 7 met. Refs: lore-engine-merge-prds/_bmad-output/planning-artifacts/stories/S1-phase-0-inventory.md
351 lines
12 KiB
Python
351 lines
12 KiB
Python
"""
|
||
test_inventory_completeness.py — Phase 0 inventory completeness gates.
|
||
|
||
The Phase 0 inventory (docs/merge/00-inventory.md) is the gate for the
|
||
lore-engine × GraphMCP substrate merge. Downstream phases (S2–S7) only
|
||
ship after this inventory is complete and accurate.
|
||
|
||
This test enforces two things:
|
||
1. Completeness — every worker/tool/stream named in the canonical matrix
|
||
from the BMAD story appears in the inventory doc.
|
||
2. Path accuracy — every `services/<worker>/main.go` path cited in the
|
||
inventory actually exists in the GraphMCP-Example checkout pinned by
|
||
this repo.
|
||
|
||
If you add a new GraphMCP worker or MCP tool: update the matrix below, then
|
||
update 00-inventory.md, then run this test.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import re
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||
INVENTORY_PATH = REPO_ROOT / "docs" / "merge" / "00-inventory.md"
|
||
|
||
# Where the GraphMCP-Example source tree lives for cross-referencing.
|
||
# The merge story pins commit 064daa9; this env var lets CI point at a
|
||
# different clone path if the workspace is laid out differently.
|
||
GRAPH_MCP_ROOT = Path(
|
||
os.environ.get("GRAPHMCP_ROOT", "/root/GraphMCP-Example")
|
||
).resolve()
|
||
|
||
# ── Canonical matrix from the BMAD Phase 0 story ────────────────────────────
|
||
# Source: lore-engine-merge-prds/_bmad-output/planning-artifacts/stories/
|
||
# S1-phase-0-inventory.md
|
||
#
|
||
# Dual-LLM arbitration pairs (entity-extractor / lore-extractor / encounter-processor)
|
||
# are documented as ONE logical pair each — but the inventory must name BOTH
|
||
# binary replicas ("-2" suffix) because they exist as separate Go services.
|
||
|
||
WORKERS = [
|
||
# (logical name, expected binary/container name)
|
||
"discord-connector",
|
||
"discord-filter",
|
||
"lore-watcher",
|
||
"ingestion-worker",
|
||
"entity-extractor",
|
||
"entity-extractor-2",
|
||
"lore-extractor",
|
||
"lore-extractor-2",
|
||
"encounter-processor",
|
||
"encounter-processor-2",
|
||
]
|
||
|
||
MCP_TOOLS = [
|
||
"semantic_search",
|
||
"graph_traverse",
|
||
"get_context",
|
||
"get_person_profile",
|
||
"query_as_npc",
|
||
"log_encounter",
|
||
"get_unresolved",
|
||
"get_contradictions",
|
||
"list_encounters",
|
||
"search_encounters",
|
||
"get_encounter",
|
||
]
|
||
|
||
REDIS_STREAMS = [
|
||
"raw.discord",
|
||
"raw.messages",
|
||
"raw.lore",
|
||
"raw.encounters",
|
||
]
|
||
|
||
|
||
# ── Helpers ─────────────────────────────────────────────────────────────────
|
||
|
||
def _load_inventory() -> str:
|
||
"""Read the inventory doc. Tests using this get the FileNotFoundError
|
||
before the doc exists — that's the RED signal."""
|
||
return INVENTORY_PATH.read_text(encoding="utf-8")
|
||
|
||
|
||
def _worker_source_path(worker: str) -> Path:
|
||
"""Resolve the canonical Go source location for a worker.
|
||
|
||
Most workers live at services/<worker>/main.go. The bare names
|
||
(entity-extractor, lore-extractor, encounter-processor) all map to a
|
||
services/<name>/main.go; the "-2" replicas share the same source path
|
||
because they reuse the same Dockerfile + binary.
|
||
"""
|
||
base = worker.split("-2")[0] # entity-extractor-2 → entity-extractor
|
||
return GRAPH_MCP_ROOT / "services" / base / "main.go"
|
||
|
||
|
||
def _extract_referenced_workers(doc: str) -> set[str]:
|
||
"""Pull every `<worker>` mention out of the inventory doc."""
|
||
names = set()
|
||
for w in WORKERS:
|
||
# Word-boundary match: avoid partial hits like "extractor" inside prose.
|
||
if re.search(rf"\b{re.escape(w)}\b", doc):
|
||
names.add(w)
|
||
return names
|
||
|
||
|
||
def _extract_referenced_tools(doc: str) -> set[str]:
|
||
names = set()
|
||
for t in MCP_TOOLS:
|
||
if re.search(rf"\b{re.escape(t)}\b", doc):
|
||
names.add(t)
|
||
return names
|
||
|
||
|
||
def _extract_referenced_streams(doc: str) -> set[str]:
|
||
names = set()
|
||
for s in REDIS_STREAMS:
|
||
if re.search(rf"\b{re.escape(s)}\b", doc):
|
||
names.add(s)
|
||
return names
|
||
|
||
|
||
# ── Existence + line-budget gate ────────────────────────────────────────────
|
||
|
||
def test_inventory_doc_exists():
|
||
assert INVENTORY_PATH.exists(), (
|
||
f"Inventory doc missing at {INVENTORY_PATH}. "
|
||
"Phase 0 is the gate — write 00-inventory.md before any other phase."
|
||
)
|
||
|
||
|
||
def test_inventory_under_500_lines():
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
line_count = sum(1 for _ in INVENTORY_PATH.open(encoding="utf-8"))
|
||
assert line_count < 500, (
|
||
f"Inventory must stay under 500 lines (currently {line_count}). "
|
||
"Move deep detail into per-worker sub-docs."
|
||
)
|
||
|
||
|
||
# ── Completeness gates ──────────────────────────────────────────────────────
|
||
|
||
def test_inventory_covers_every_worker():
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
missing = [w for w in WORKERS if w not in _extract_referenced_workers(doc)]
|
||
assert not missing, (
|
||
f"Inventory is missing these workers: {missing}. "
|
||
"Per the story, every GraphMCP worker — including the -2 arbitration "
|
||
"replicas — must be listed with env vars, streams, Cypher queries, "
|
||
"LLM call sites, and container name."
|
||
)
|
||
|
||
|
||
def test_inventory_covers_every_mcp_tool():
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
missing = [t for t in MCP_TOOLS if t not in _extract_referenced_tools(doc)]
|
||
assert not missing, (
|
||
f"Inventory is missing these MCP tools: {missing}. "
|
||
"Each must list input schema, output shape, and implementation line "
|
||
"in services/mcp-server/main.go."
|
||
)
|
||
|
||
|
||
def test_inventory_covers_every_redis_stream():
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
missing = [s for s in REDIS_STREAMS if s not in _extract_referenced_streams(doc)]
|
||
assert not missing, (
|
||
f"Inventory is missing these Redis streams: {missing}. "
|
||
"Each must list producers, consumers, retention policy, expected throughput."
|
||
)
|
||
|
||
|
||
# ── Required attribute coverage per worker ──────────────────────────────────
|
||
|
||
WORKER_REQUIRED_ATTRS = {
|
||
# Each worker must document these columns (env vars, streams, cypher, llm, container).
|
||
# The test asserts each row references the substring for that attribute.
|
||
"discord-connector": [
|
||
"DISCORD_TOKEN",
|
||
"DISCORD_GUILD_ID",
|
||
"raw.discord",
|
||
"raw.encounters",
|
||
],
|
||
"discord-filter": [
|
||
"SIMILARITY_THRESHOLD",
|
||
"raw.discord",
|
||
"raw.messages",
|
||
"embed",
|
||
"ANN",
|
||
],
|
||
"lore-watcher": [
|
||
"WATCH_DIR",
|
||
"INGEST_URL",
|
||
"DEBOUNCE_MS",
|
||
"fsnotify",
|
||
"sha256",
|
||
],
|
||
"ingestion-worker": [
|
||
"CHUNK_SIZE",
|
||
"CHUNK_OVERLAP",
|
||
"EMBED_URL",
|
||
"EMBED_MODEL",
|
||
"raw.messages",
|
||
"raw.lore",
|
||
"8080",
|
||
],
|
||
"entity-extractor": [
|
||
"LLM_URL",
|
||
"LLM_MODEL",
|
||
"raw.messages",
|
||
"MERGE",
|
||
],
|
||
"entity-extractor-2": [
|
||
"qwen3.5",
|
||
"CONSUMER_NAME",
|
||
],
|
||
"lore-extractor": [
|
||
"LLM_URL",
|
||
"raw.lore",
|
||
"LoreDocument",
|
||
"FEATURES",
|
||
"lore_verified",
|
||
],
|
||
"lore-extractor-2": [
|
||
"qwen3.5",
|
||
"CONSUMER_NAME",
|
||
],
|
||
"encounter-processor": [
|
||
"raw.encounters",
|
||
"WITNESSED",
|
||
"Encounter",
|
||
"OCCURRED_AT",
|
||
],
|
||
"encounter-processor-2": [
|
||
"qwen3.5",
|
||
"CONSUMER_NAME",
|
||
],
|
||
}
|
||
|
||
|
||
def test_inventory_documents_required_worker_attributes():
|
||
"""For each worker, the inventory doc must mention the required attribute
|
||
substrings (env vars, stream names, key Cypher keywords, LLM markers).
|
||
This catches partial inventory rows that name the worker but skip the
|
||
details the merge needs.
|
||
|
||
Attribute check is doc-wide rather than windowed — the story requires
|
||
that each worker be documented with these columns, not that they be
|
||
co-located in a single sentence. (Inventory tables put env vars in a
|
||
bullet list immediately under the worker name; Cypher keywords land
|
||
several lines below in the same section.)
|
||
"""
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
failures = []
|
||
for worker, attrs in WORKER_REQUIRED_ATTRS.items():
|
||
if worker not in doc:
|
||
failures.append(f"{worker}: worker name not found in doc")
|
||
continue
|
||
missing = [a for a in attrs if a not in doc]
|
||
if missing:
|
||
failures.append(f"{worker}: missing attributes {missing}")
|
||
assert not failures, (
|
||
"Workers are listed but their detail rows are incomplete:\n - "
|
||
+ "\n - ".join(failures)
|
||
)
|
||
|
||
|
||
# ── Path-accuracy gate (the second TDD step) ────────────────────────────────
|
||
|
||
# These need the GraphMCP-Example checkout available. Skip gracefully if not.
|
||
requires_graphmcp = pytest.mark.skipif(
|
||
not GRAPH_MCP_ROOT.exists(),
|
||
reason=f"GraphMCP-Example checkout not found at {GRAPH_MCP_ROOT} — "
|
||
"set GRAPHMCP_ROOT to point at the pinned commit (064daa9).",
|
||
)
|
||
|
||
|
||
@requires_graphmcp
|
||
@pytest.mark.parametrize(
|
||
"worker,expected_source",
|
||
[
|
||
("discord-connector", "services/discord-connector/main.go"),
|
||
("discord-filter", "services/discord-filter/main.go"),
|
||
("lore-watcher", "services/lore-watcher/main.go"),
|
||
("ingestion-worker", "services/ingestion-worker/main.go"),
|
||
("entity-extractor", "services/entity-extractor/main.go"),
|
||
("entity-extractor-2", "services/entity-extractor/main.go"),
|
||
("lore-extractor", "services/lore-extractor/main.go"),
|
||
("lore-extractor-2", "services/lore-extractor/main.go"),
|
||
("encounter-processor", "services/encounter-processor/main.go"),
|
||
("encounter-processor-2", "services/encounter-processor/main.go"),
|
||
("mcp-server", "services/mcp-server/main.go"),
|
||
],
|
||
)
|
||
def test_worker_source_path_exists(worker: str, expected_source: str):
|
||
"""Every worker's source path cited in the inventory must exist in the
|
||
pinned GraphMCP-Example checkout. Catches stale paths after refactors."""
|
||
assert (GRAPH_MCP_ROOT / expected_source).exists(), (
|
||
f"{worker}: cited source {expected_source} does not exist in "
|
||
f"{GRAPH_MCP_ROOT}. Update the inventory after refactors."
|
||
)
|
||
|
||
|
||
@requires_graphmcp
|
||
def test_inventory_cites_correct_source_paths():
|
||
"""The inventory must cite `services/<worker>/main.go` paths that match
|
||
the actual layout in the pinned GraphMCP-Example checkout."""
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
# All `services/<x>/main.go` references in the doc.
|
||
cited = set(re.findall(r"services/([a-z\-]+)/main\.go", doc))
|
||
# For each cited worker dir, verify it exists.
|
||
missing_dirs = [c for c in cited if not (GRAPH_MCP_ROOT / "services" / c).is_dir()]
|
||
assert not missing_dirs, (
|
||
f"Inventory cites worker dirs that don't exist in {GRAPH_MCP_ROOT}: "
|
||
f"{missing_dirs}"
|
||
)
|
||
|
||
|
||
# ── Cross-link gates ────────────────────────────────────────────────────────
|
||
|
||
def test_inventory_links_back_to_prd():
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
assert "../meta/prd.md" in doc or "meta/prd.md" in doc, (
|
||
"Inventory must cross-link back to meta/prd.md (story acceptance criterion)."
|
||
)
|
||
|
||
|
||
def test_inventory_links_back_to_architecture():
|
||
if not INVENTORY_PATH.exists():
|
||
pytest.skip("inventory doc not yet written")
|
||
doc = _load_inventory()
|
||
assert "../planning-artifacts/architecture.md" in doc or "planning-artifacts/architecture.md" in doc, (
|
||
"Inventory must cross-link back to planning-artifacts/architecture.md "
|
||
"(story acceptance criterion)."
|
||
) |