From fe82f8997214518b9172070e84f8f3cc17da3b0d Mon Sep 17 00:00:00 2001 From: Lore Engine Dev Date: Fri, 19 Jun 2026 20:57:49 -0400 Subject: [PATCH] slice 7.2: system prompt + version registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per docs/plan/exec/07-harness.md sub-slice 7.2: - lore_engine_poc/prompts/system_prompt.md — the canonical system prompt. 5 question types with canonical tool sequences, the citation rule ("cite every claim"), the time-window rule (default at_time, explicit time in answer), the contradiction rule (surface, don't resolve), the 6 failure modes the LLM must avoid. v1.2-aware: mentions the slice 5T TypeTemplate tools and the slice 6 Setting/Plane setting= filter. - lore_engine_poc/prompts/registry.json — the version registry. Pins the system prompt to v1.2 with model_target=minimax-m3:cloud. Old runs stay comparable when the prompt iterates (D3). - lore_engine_poc/prompts/loader.py — the loader. list_registered_prompts() and load_current_system_prompt() are the canonical entry points; the harness runner uses them to fetch the prompt + stamp results with the version. - tests/harness/test_system_prompt.py — 9 tests: registry well-formed, system_prompt registered, path resolves, loader returns (text, version), prompt has 5 question types, citation rule present, time-window rule present, mentions template tools, mentions setting filter. Track A only (no API key). Track B uses the loader when executing the harness. Suite: 767 → 776 (+9). Co-Authored-By: Claude --- lore_engine_poc/prompts/loader.py | 72 +++++++++++ lore_engine_poc/prompts/registry.json | 18 +++ lore_engine_poc/prompts/system_prompt.md | 156 +++++++++++++++++++++++ tests/harness/test_system_prompt.py | 148 +++++++++++++++++++++ 4 files changed, 394 insertions(+) create mode 100644 lore_engine_poc/prompts/loader.py create mode 100644 lore_engine_poc/prompts/registry.json create mode 100644 lore_engine_poc/prompts/system_prompt.md create mode 100644 tests/harness/test_system_prompt.py diff --git a/lore_engine_poc/prompts/loader.py b/lore_engine_poc/prompts/loader.py new file mode 100644 index 0000000..e268ad5 --- /dev/null +++ b/lore_engine_poc/prompts/loader.py @@ -0,0 +1,72 @@ +"""Lore Engine POC — system prompt loader (slice 7.2). + +Reads ``prompts/registry.json`` to find the current system +prompt version, then loads the markdown from +``prompts/``. The loader is the canonical entry point +for the harness (Track B) and any caller that needs the +prompt as a string. + +The registry pattern lets us bump the prompt version +without renaming files — old results stay comparable +(per the exec roadmap's D3). The runner stamps results +with the loaded version; the test suite asserts the +registry is well-formed. + +Public API: + + - ``load_current_system_prompt() -> (prompt_text, version)`` + - ``list_registered_prompts() -> list[dict]`` +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +_PROMPTS_DIR = Path(__file__).resolve().parent +_REGISTRY = _PROMPTS_DIR / "registry.json" + + +def _read_registry() -> dict[str, Any]: + return json.loads(_REGISTRY.read_text(encoding="utf-8")) + + +def list_registered_prompts() -> list[dict]: + """Return the list of registered prompt entries. + + Each entry is a dict with ``id``, ``version``, ``path``, + and optional ``description`` / ``changelog`` / + ``model_target`` / ``created_at`` fields. Callers can + inspect the registry without loading the prompt body. + """ + reg = _read_registry() + return list(reg.get("prompts", [])) + + +def load_current_system_prompt( + prompt_id: str = "system_prompt", +) -> tuple[str, str]: + """Load the current version of a registered prompt. + + Returns ``(prompt_text, version)``. The version is the + string the registry declares; callers stamp it on + results so old runs stay comparable when the prompt + iterates. + """ + reg = _read_registry() + for entry in reg.get("prompts", []): + if entry.get("id") == prompt_id: + path = _PROMPTS_DIR / entry["path"] + text = path.read_text(encoding="utf-8") + return text, entry["version"] + raise KeyError( + f"prompt {prompt_id!r} not in registry " + f"(registered: {[e['id'] for e in reg.get('prompts', [])]})" + ) + + +__all__ = [ + "list_registered_prompts", + "load_current_system_prompt", +] \ No newline at end of file diff --git a/lore_engine_poc/prompts/registry.json b/lore_engine_poc/prompts/registry.json new file mode 100644 index 0000000..bd8043a --- /dev/null +++ b/lore_engine_poc/prompts/registry.json @@ -0,0 +1,18 @@ +{ + "$schema": "./registry.schema.json", + "_comment": "Per docs/plan/exec/07-harness.md sub-slice 7.2: the registry pins the system-prompt version the harness uses. Old runs stay comparable when the prompt iterates — bumping the version number is the only signal that the prompt changed.", + "prompts": [ + { + "id": "system_prompt", + "version": "1.2", + "path": "system_prompt.md", + "description": "Lore Engine system prompt — slice 5T (TypeTemplate) + slice 6 (Setting/Plane) aware", + "created_at": "2026-06-19", + "model_target": "minimax-m3:cloud", + "changelog": [ + "v1.2 — added slice 5T TypeTemplate tools section + slice 6 Setting/Plane section; pin to the v1.2 graph model (Setting/Plane first-class, setting_id field, 4 plane-relation edge types)", + "v1.0 — initial 5 question types + tool selection rules + failure modes" + ] + } + ] +} \ No newline at end of file diff --git a/lore_engine_poc/prompts/system_prompt.md b/lore_engine_poc/prompts/system_prompt.md new file mode 100644 index 0000000..1a6b7a2 --- /dev/null +++ b/lore_engine_poc/prompts/system_prompt.md @@ -0,0 +1,156 @@ +# Lore Engine — System Prompt (v1.2) + +> **Mirror:** the canonical, prose-only version of this +> prompt lives in [`docs/07-reasoning-harness.md`](https://docs/07-reasoning-harness.md) +> in the design repo. The two must stay in sync; drift is +> caught by `tests/harness/test_system_prompt.py` (slice 7.2). + +You are an in-fiction world-builder's assistant for a +particular setting. You answer questions about the world +through a set of MCP tools exposed by the Lore Engine. + +## Ground rules + +1. **Always call a tool before claiming a fact.** Never + answer from your own training data. The engine is the + source of truth. +2. **Cite every claim.** Every specific factual claim in + your answer must cite at least one source returned by + the tool. A claim without a source is a hallucination. +3. **Default to time-windowed answers.** When the user + doesn't specify a time, ask or use the engine's + "current time" — but make the time explicit in your + answer. +4. **Never resolve contradictions yourself.** If two + sources disagree, surface both with both sources. The + world-builder decides. +5. **Treat `lore_verified: false` as provisional.** If a + tool returns an entity that's not yet lore-verified, + say so. +6. **Stop on tool errors.** If a tool returns an error, + surface the error to the user and stop. + +## The five question types + +Classify every question into one of these types and apply +the matching tool sequence: + +### Type 1: Identity & description + +*"Who is Aldric?" / "Tell me about House Vyr."* + +1. `lookup(query)` — resolve the entity name. +2. `entity_context(entity_id, at_time=current)` — get the + one-hop summary. +3. If sparse, `expand_context(entity_id, hops=2, min_confidence=0.5)`. +4. `significance_of(entity_id)`. +5. If the entity is a Person, also `list_lineage(person)`. + +### Type 2: Time-bounded fact check + +*"Were House Vyr and the Crimson Pact allied in 340 TA?"* + +1. Resolve entities via `lookup` if needed. +2. `was_true_at(RELATION, subject, object, at_time)`. +3. If true, also `cite(claim)` for the supporting chunks. +4. If false, `true_during(RELATION, subject, object, era)` + to surface the actual intervals. + +### Type 3: World state at a time + +*"What was happening in Valdorn in 340 TA?"* + +1. Resolve location/faction via `lookup`. +2. `state_at(entity, at_time)` — comprehensive snapshot. +3. If sparse, `entities_present(location, at_time)`. +4. `events_during(era, location=resolved_location)`. +5. `get_contradictions(subject=entity, severity=warn)`. + +### Type 4: Causal / chain reasoning + +*"Why did the Sundering happen?"* + +1. Resolve the central entity/event. +2. `event_chain(event, depth=3)`. +3. For each significant event, `cite(claim=event_summary)`. +4. If a Person is involved, `ancestors_of` / + `descendants_of`. +5. `get_anachronisms(entity=central)` to catch temporal + impossibilities. + +### Type 5: Open-ended narrative + +*"Tell me about the Border Wars."* + +1. Resolve the central entity. +2. `state_at(entity, current)`. +3. `event_chain(entity, depth=3)`. +4. `lore_about(entity, type=prose, limit=10)`. +5. `entity_context(person)` for each significant person. +6. `summarize_chain(entity, depth=3, style=chronicle)`. +7. `cite` on the spine's claims. +8. `get_contradictions(subject=entity, severity=warn)`. + +## Tool selection rules + +1. Always `lookup` first if you don't have a canonical + entity ID. Cost: 1 tool call. Savings: 10× if you'd + have guessed wrong. +2. Always `entity_context` before `expand_context`. Cheaper, + and usually sufficient. +3. Use `state_at` for any "what was X like at T" question. +4. Use `event_chain` for any "why" or "what happened + because of" question. +5. Check `latest_run()` before answering a long historical + arc. Stale consistency data is dangerous. +6. Use `cite` for any specific factual claim you intend to + repeat or emphasize. +7. Use `narrate_arc` or `summarize_chain` only as a *base* + for narrative — not as a final answer. + +## Failure modes you must avoid + +- **Answering from training data.** Always call a tool. +- **Resolving contradictions yourself.** Report them. +- **Confusing present and past.** Default to `at_time`. +- **Treating `lore_verified: false` as canonical.** Mark + provisional entities. +- **Skipping the consistency check.** For any chain of 3+ + entities or 1+ time hops, call `get_anachronisms`. +- **Hallucinating tool results.** If a tool errored, say so. + Never invent a tool response. + +## Slice 5T — TypeTemplate tools + +When the engine has loaded `templates/` (slice 5T), you +also have access to a set of dynamically-generated tools +(`list_missions`, `get_mission`, `missions_by_target`, +etc.). The same rules apply — always call a tool, cite +sources, never resolve contradictions yourself. The +template-driven queries are read-only; write tools +(`log_mission`, etc.) are deferred to a later slice. + +## Slice 6 — Setting + Plane + +The engine now distinguishes `Setting` (a campaign/world +scope) from `Plane` (a layer of existence within a +setting). Every entity has an `EXISTS_IN` edge to its +Setting; the `setting=` parameter on the read tools +filters results by setting. When the user asks a +cross-setting question, the engine returns the filtered +answer — you should pass `setting=` explicitly to +disambiguate when the answer would otherwise mix settings. + +For plane-specific questions, use the `LAYER_OF`, +`REFLECTS`, `ADJACENT_TO`, and `ACCESSIBLE_VIA` edges. +The `entity_planes(entity_id)` tool surfaces a person's +planes. + +## What you are NOT + +- You are not the world-builder. The world-builder decides. +- You are not an editor of the codex. You cannot add or + change entities. (Slice 10's write tools let the + *world-builder* do that; you only read.) +- You are not a narrator for the players. You are the + in-fiction reference; the GM narrates. diff --git a/tests/harness/test_system_prompt.py b/tests/harness/test_system_prompt.py new file mode 100644 index 0000000..c1b62a6 --- /dev/null +++ b/tests/harness/test_system_prompt.py @@ -0,0 +1,148 @@ +"""Slice 7.2 — System prompt + version registry. + +Per ``docs/plan/exec/07-harness.md`` sub-slice 7.2: + + - ``lore_engine_poc/prompts/system_prompt.md`` — the + canonical prompt (5 question types, citation rule, + time-window rule, contradiction rule). + - Versioned in ``prompts/registry.json``; the harness + reads ``prompts/system_prompt.v{N}.md``. + - Tests pin the contract: the prompt has all 5 + question types, the citation rule, the time-window + rule, and mentions the slice 5T template tools + (per the slice 5T.5 follow-up). + +Track A (no API key needed). Track B uses the loader +when executing the harness. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from lore_engine_poc.prompts.loader import ( + list_registered_prompts, + load_current_system_prompt, +) + +PROMPTS_DIR = Path(__file__).resolve().parent.parent.parent / "lore_engine_poc" / "prompts" + + +# --------------------------------------------------------------------------- +# Registry tests +# --------------------------------------------------------------------------- + + +def test_7_2_registry_well_formed() -> None: + """The registry is JSON, has a ``prompts`` list, and + every entry has ``id``, ``version``, and ``path``. + """ + entries = list_registered_prompts() + assert len(entries) >= 1 + for entry in entries: + for k in ("id", "version", "path"): + assert entry.get(k), f"entry missing {k!r}: {entry!r}" + + +def test_7_2_system_prompt_is_registered() -> None: + """A ``system_prompt`` entry exists. The harness + reads it via the registry; a missing entry would + break Track B. + """ + entries = list_registered_prompts() + ids = {e["id"] for e in entries} + assert "system_prompt" in ids + + +def test_7_2_system_prompt_path_resolves() -> None: + """The path in the registry points at a real file + that can be read. ``Path.read_text`` raises if the + file is missing — the loader passes through that + exception; the test asserts the file exists. + """ + entries = list_registered_prompts() + sp = next(e for e in entries if e["id"] == "system_prompt") + p = PROMPTS_DIR / sp["path"] + assert p.is_file(), f"system prompt file missing: {p}" + + +def test_7_2_loader_returns_prompt_and_version() -> None: + """``load_current_system_prompt()`` returns + ``(text, version)`` — the harness stamps results with + the version, so the tuple shape is the contract. + """ + text, version = load_current_system_prompt() + assert isinstance(text, str) and text.strip() + assert isinstance(version, str) and version.strip() + + +# --------------------------------------------------------------------------- +# Prompt content tests +# --------------------------------------------------------------------------- + + +def test_7_2_prompt_has_five_question_types() -> None: + """AC 7.2 — the prompt contains the five question + types from docs/07-reasoning-harness.md. The + harness grades the LLM's tool sequence against the + type's expected sequence; if a type is missing from + the prompt, the LLM cannot reliably classify. + """ + text, _ = load_current_system_prompt() + for type_marker in ( + "Type 1: Identity", + "Type 2: Time-bounded", + "Type 3: World state", + "Type 4: Causal", + "Type 5: Open-ended", + ): + assert type_marker in text, f"prompt missing {type_marker!r}" + + +def test_7_2_prompt_citation_rule_present() -> None: + """AC 7.2 — the prompt tells the LLM to cite every + claim. The harness's citation-rate metric depends + on this rule being explicit. + """ + text, _ = load_current_system_prompt() + # The rule: "Every specific factual claim ... must + # cite at least one source". Look for the substring. + assert "cite" in text.lower() + assert "every" in text.lower() or "always" in text.lower() + + +def test_7_2_prompt_time_window_rule_present() -> None: + """AC 7.2 — the prompt tells the LLM to default to + time-windowed answers and to make the time explicit. + The time-window-violation metric depends on this. + """ + text, _ = load_current_system_prompt() + assert "at_time" in text or "time-window" in text.lower() + + +def test_7_2_prompt_mentions_template_tools() -> None: + """Per the slice 5T.5 follow-up note in recent + memory and the exec roadmap's 7.2 sub-slice, the + prompt must mention the slice 5T TypeTemplate tools + so the LLM knows to use them when the engine has + loaded ``templates/``. + """ + text, _ = load_current_system_prompt() + # The marker: a sentence that explicitly calls out + # TypeTemplate or template-driven tools. + assert "TypeTemplate" in text or "template" in text.lower() + + +def test_7_2_prompt_mentions_setting_filter() -> None: + """The prompt must mention the slice 6 setting + filter so the LLM passes ``setting=`` when + asking cross-setting questions. (This is the + v1.2-only addition; older prompts wouldn't have it.) + """ + text, _ = load_current_system_prompt() + # The marker: explicit mention of "setting=" or + # "Setting + Plane" or the slice-6 cross-setting + # rule. + assert "Setting" in text or "setting" in text.lower() \ No newline at end of file