slice 1.7: seed/yaml + ingest walks structured path; demo exercises time filter; integration tests prove AC 1.11 (85/85 green)

This commit is contained in:
Lore Engine Dev
2026-06-18 01:03:12 -04:00
parent 59ef857366
commit b0cc840e3e
9 changed files with 327 additions and 2 deletions

Binary file not shown.

View File

@@ -0,0 +1,92 @@
"""Adapter that turns structured-YAML edge dicts into the legacy
``Triple`` shape the in-memory :class:`Graph` consumes.
The structured-YAML parsers (slice 1) emit edge dicts in a
deliberately verbose shape — extra keys like ``kind`` and
``reason`` are useful for downstream reasoning but aren't part
of the engine's canonical ``Triple``. This adapter narrows
those dicts down to the four-field ``Triple`` the graph builder
expects, so the new YAML path lands in the same graph as the
markdown/codex path.
Slice 2 will replace this adapter when the consistency engine
re-receives the raw edge dicts directly.
"""
from __future__ import annotations
from typing import Iterable
from ..parsers_legacy import Triple
from ._yaml import YamlSchemaError
from .family_tree import parse_family_tree_file
# Map of YAML file basename (without extension) -> parser function.
# Each parser returns ``(nodes, edges)`` where each edge is a dict
# in the parser-defined shape. Adding a new YAML format = add a
# line here and a parser module.
PARSERS_BY_KIND: dict[str, callable] = {
"family_tree": parse_family_tree_file,
# "factions": parse_factions_file, # wired by sub-slice 1.2
# "timeline": parse_timeline_file, # wired by sub-slice 1.4
# "gazetteer": parse_gazetteer_file,
# "bestiary": parse_bestiary_file,
# "magic_system": parse_magic_file,
# "culture": parse_culture_file,
}
# File-naming convention: the YAML filename is the kind name.
# ``family_tree.yaml`` → kind = ``family_tree``. We try the longest
# matching prefix; ``family_tree_raventhorne.yaml`` would still
# match ``family_tree``. Files that don't match a known kind are
# silently skipped (the markdown parser is the fallback for unknown
# shapes).
def _detect_kind(filename: str) -> str | None:
stem = filename.rsplit(".", 1)[0]
# Try the longest kind prefix first so "family_tree" wins over "family".
for kind in sorted(PARSERS_BY_KIND.keys(), key=len, reverse=True):
if stem == kind or stem.startswith(kind + "_") or stem.startswith(kind + "."):
return kind
return None
def iter_structured_yaml(root: str) -> Iterable[Triple]:
"""Walk a directory and yield :class:`Triple` for every ``*.yaml`` file
whose kind-prefix matches a registered parser."""
import os
from pathlib import Path
for dirpath, _dirs, files in os.walk(root):
for fn in sorted(files):
if not fn.lower().endswith((".yaml", ".yml")):
continue
kind = _detect_kind(fn)
if kind is None:
continue
parser = PARSERS_BY_KIND[kind]
full = os.path.join(dirpath, fn)
try:
_nodes, edges = parser(full)
except YamlSchemaError as e:
# Re-raise with the file path inline so the caller can
# see which YAML file failed without re-walking.
raise type(e)(f"{full}: {e}", line=e.line, field=e.field) from e
for e in edges:
yield structured_edge_to_triple(e)
def structured_edge_to_triple(e: dict) -> Triple:
"""Convert a parser edge dict into the legacy :class:`Triple` shape."""
return Triple(
subject=e["subject"],
relation=e["relation"],
object=e["object"],
source_path=e["source_path"],
source_slug=e["source_slug"],
valid_from=e.get("valid_from"),
valid_until=e.get("valid_until"),
extraction_confidence=e.get("extraction_confidence", 1.0),
source_confidence=e.get("source_confidence", 1.0),
reliability=e.get("reliability", "canonical"),
)

View File

@@ -89,6 +89,11 @@ class Triple:
extraction_confidence: float = 1.0
source_confidence: float = 1.0
reliability: str = "canonical"
# Time bounds on the edge. None for unbounded (the markdown path
# always emits None; the structured-YAML path, slice 1, sets
# these from YAML).
valid_from: str | None = None
valid_until: str | None = None
def _read_frontmatter(text: str) -> tuple[dict, str]:

View File

@@ -0,0 +1,29 @@
lineage: "ashveil_bloodline"
founding_ancestor: "theron_ashveil"
description: "The bloodline of Theron Ashveil's descendants."
members:
- id: "theron_ashveil"
name: "Theron Ashveil"
born: "3rd_age.year_200"
died: "3rd_age.year_350"
- id: "maric_vyr"
name: "Maric Vyr"
born: "3rd_age.year_220"
died: "3rd_age.year_330"
- id: "yssa_raventhorne"
name: "Yssa Raventhorne"
born: "3rd_age.year_260"
died: "3rd_age.year_320"
- id: "aldric_raventhorne"
name: "Aldric Raventhorne"
born: "3rd_age.year_300"
parents: ["theron_ashveil", "yssa_raventhorne"]
- id: "elara_raventhorne"
name: "Elara Raventhorne"
born: "3rd_age.year_305"
parents: ["maric_vyr"]

View File

@@ -0,0 +1,15 @@
# A timeline event used by the demo's time-bounded queries (AC 1.11).
# The Battle of Black Spire takes place during the named sub-era;
# the structured parser turns it into an Event node plus an Era node
# with OCCURRED_DURING edges. Slice 1 ships the format; the timeline
# parser implementation lands in sub-slice 1.4.
era: "3rd_age"
sub_era: "age_of_iron"
events:
- id: "battle_of_black_spire"
name: "Battle of Black Spire"
during: "3rd_age.age_of_iron"
participants:
- "aldric_raventhorne"

View File

@@ -149,6 +149,8 @@ def build_graph(entities: Iterable[Entity], triples: Iterable[Triple]) -> Graph:
subject=t.subject,
relation=t.relation,
object=t.object,
valid_from=t.valid_from,
valid_until=t.valid_until,
sources=[t.source_path],
extraction_confidences=[t.extraction_confidence],
source_confidences=[t.source_confidence],
@@ -171,6 +173,31 @@ def build_graph(entities: Iterable[Entity], triples: Iterable[Triple]) -> Graph:
# ``is_disputed``, and link them via ``disputed_with``.
# Slice 2's consistency engine turns this into a
# ``Contradiction`` node.
#
# 0. Existing is unbounded, new is bounded (slice 1,
# structured-YAML override): the YAML is the
# authoritative source for time bounds. Adopt the
# YAML bounds on the existing edge and append the
# YAML as a contributing source. The unbounded
# markdown-style triple still gets cited for
# audit (it's where the relationship was first
# noticed), but the bounds come from the structured
# path. This is the inverse of Case 3 — instead of
# spawning a second disputed Edge, we promote the
# structured source.
if (
existing.valid_from is None
and existing.valid_until is None
and (t.valid_from is not None or t.valid_until is not None)
):
existing.valid_from = t.valid_from
existing.valid_until = t.valid_until
if t.source_path not in existing.sources:
existing.sources.append(t.source_path)
existing.extraction_confidences.append(t.extraction_confidence)
existing.source_confidences.append(t.source_confidence)
existing.reliabilities.append(t.reliability)
continue
if t.source_path in existing.sources and _windows_consistent(
existing.valid_from, existing.valid_until, None, None
):

View File

@@ -16,6 +16,7 @@ ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from lore_engine_poc.parsers import iter_codex, extract_triples
from lore_engine_poc.parsers import iter_structured_yaml
from lore_engine_poc.tools import build_graph
@@ -39,8 +40,23 @@ def main() -> int:
return 1
entities = list(iter_codex(args.codex))
triples = extract_triples(entities)
graph = build_graph(entities, triples)
md_triples = extract_triples(entities)
# Slice 1: also walk the structured-YAML files alongside the
# markdown codex. Markdown triples are the prose path (no
# bounds); YAML triples are the structured path (with bounds).
# ``build_graph`` merges them, and the structured bounds win
# when both paths produce the same fact.
yaml_root = os.path.join(args.codex, "yaml")
yaml_triples = []
if os.path.isdir(yaml_root):
yaml_triples = list(iter_structured_yaml(yaml_root))
print(f"[01_ingest] structured YAML triples: {len(yaml_triples)}")
else:
print(f"[01_ingest] no yaml/ subdir; structured path skipped")
graph = build_graph(entities, md_triples + yaml_triples)
triples = md_triples + yaml_triples
# Dedupe triples for reporting.
seen = set()

View File

@@ -64,6 +64,15 @@ def main() -> int:
"MEMBER_OF,Roland Raventhorne,Iron Mountain Trading Company,3rd_age.year_345",
# Negative case: a relationship that doesn't exist in the codex.
"ALLIED_WITH,House Raventhorne,House Quche,3rd_age.year_345",
# Slice 1: time-bounded edges from the structured YAML path.
# Inside Theron's life → true.
"PARENT_OF,Theron Ashveil,Aldric Raventhorne,3rd_age.year_325",
# After Theron's death at year 350 → false (proves the
# time filter actually fires on the YAML-derived bounds).
"PARENT_OF,Theron Ashveil,Aldric Raventhorne,3rd_age.year_400",
# Maric's PARENT_OF window ends at his death year 330.
"PARENT_OF,Maric Vyr,Elara Raventhorne,3rd_age.year_250",
"PARENT_OF,Maric Vyr,Elara Raventhorne,3rd_age.year_340",
]
for q in queries:

View File

@@ -0,0 +1,132 @@
"""Tests for the structured-YAML → Graph integration (slice 1, sub-slice 1.7).
These exercise the seam between the new YAML parsers and the legacy
in-memory graph. The contract: a YAML file's time-bounded edges
must reach the graph with their ``valid_from``/``valid_until``
populated, and the markdown/codex path's unbounded edges must not
be allowed to clobber them on merge.
"""
from __future__ import annotations
import textwrap
import pytest
def _yaml(tmp_path, name, body):
p = tmp_path / name
p.write_text(textwrap.dedent(body).lstrip())
return p
def test_yaml_edge_reaches_graph_with_bounds(tmp_path):
"""AC 1.2 + 1.11: YAML PARENT_OF reaches the graph with non-null bounds."""
from lore_engine_poc.parsers import iter_structured_yaml
from lore_engine_poc.tools import build_graph
_yaml(tmp_path, "family_tree_minimal.yaml", """
lineage: "test_lineage"
members:
- id: "parent"
name: "Parent"
born: "3rd_age.year_200"
died: "3rd_age.year_350"
- id: "child"
name: "Child"
born: "3rd_age.year_300"
parents: ["parent"]
""")
triples = list(iter_structured_yaml(str(tmp_path)))
g = build_graph([], triples)
edge = next(
e for e in g.edges_by_subject.get("Parent", {}).get("PARENT_OF", [])
if e.object == "Child"
)
assert edge.valid_from == "3rd_age.year_300"
assert edge.valid_until == "3rd_age.year_350"
def test_was_true_at_uses_yaml_bounds(tmp_path):
"""AC 1.11: was_true_at returns was_true=false when at_time is outside the YAML window."""
from lore_engine_poc.parsers import iter_structured_yaml
from lore_engine_poc.tools import build_graph, was_true_at
_yaml(tmp_path, "family_tree.yaml", """
lineage: "test_lineage"
members:
- id: "parent"
name: "Parent"
born: "3rd_age.year_200"
died: "3rd_age.year_350"
- id: "child"
name: "Child"
born: "3rd_age.year_300"
parents: ["parent"]
""")
g = build_graph([], list(iter_structured_yaml(str(tmp_path))))
# at_time INSIDE the window: [300, 350) → true.
r_in = was_true_at(g, "PARENT_OF", "Parent", "Child", "3rd_age.year_325")
assert r_in["was_true"] is True
assert r_in["valid_from"] == "3rd_age.year_300"
assert r_in["valid_until"] == "3rd_age.year_350"
# at_time OUTSIDE the window: parent died at year 350, child
# born at year 300. Year 400 is past the upper bound → false.
r_out = was_true_at(g, "PARENT_OF", "Parent", "Child", "3rd_age.year_400")
assert r_out["was_true"] is False
assert r_out["confidence"] == 0.0
# at_time BEFORE the window: child not yet born → false.
r_before = was_true_at(g, "PARENT_OF", "Parent", "Child", "3rd_age.year_250")
assert r_before["was_true"] is False
def test_yaml_edge_wins_when_markdown_already_has_unbounded_edge(tmp_path):
"""Slice 1.7 integration: structured YAML bounds must not be lost
when a markdown extractor has already produced an unbounded edge
for the same (subject, relation, object) tuple."""
from lore_engine_poc.parsers import Triple, iter_structured_yaml
from lore_engine_poc.tools import build_graph
_yaml(tmp_path, "family_tree.yaml", """
lineage: "test_lineage"
members:
- id: "parent"
name: "Parent"
born: "3rd_age.year_200"
died: "3rd_age.year_350"
- id: "child"
name: "Child"
born: "3rd_age.year_300"
parents: ["parent"]
""")
# Markdown-style triple: same fact, no bounds. Should NOT
# overwrite the YAML edge's bounds.
markdown_triple = Triple(
subject="Parent",
relation="PARENT_OF",
object="Child",
source_path="/tmp/markdown.md",
source_slug="markdown",
extraction_confidence=0.6,
source_confidence=1.0,
reliability="canonical",
)
g = build_graph([], [markdown_triple] + list(iter_structured_yaml(str(tmp_path))))
edge = next(
e for e in g.edges_by_subject.get("Parent", {}).get("PARENT_OF", [])
if e.object == "Child"
)
# The YAML bounds must survive the merge. The markdown edge
# contributes as a second source but does not zero out the bounds.
assert edge.valid_from == "3rd_age.year_300"
assert edge.valid_until == "3rd_age.year_350"
assert "/tmp/markdown.md" in edge.sources