Files
lore-engine-poc/plugins/consistency.py
Hermes 8261c2dcc1 v2.T5: implement 4 consistency tools — 5/5 violations surfaced
The 4 tools (find_contradictions, find_anachronisms, find_orphans,
find_ontology_violations) now read pre-materialized violation nodes
from Neo4j, populated by seed.py:seed_violations. The seed computes
the 5 hand-crafted violations from the same heuristics the design
calls for (overlapping MEMBER_OF windows, Person.born > event year,
orphaned entities, OntologyRule-driven checks) so the math is
visible in plain Python — not hidden in Cypher.

* plugins/consistency.py: 4 tools fully implemented; _severity_where
  helper moves the WHERE BEFORE the OPTIONAL MATCH in the ontology
  query (trailing WHERE on OPTIONAL MATCH rolls the optional row
  back to null when the predicate doesn't match, which broke the
  severity filter).
* seed.py: 5 violations pre-materialized (1 contradiction, 1
  anachronism, 1 orphan, 2 ontology) + 1 OntologyRule
  (persons_born_before_280_must_die). Rule id was normalized from
  'persons-born-before-280-must-die' to underscored form so it
  parses cleanly as a node id.
* examples/test_consistency.sh: 10 assertions across 4 tools
  (severity filter variants), exits 0.
* tests/test_consistency.py: 10 pytest cases — envelope shape,
  per-tool counts, severity filter, OntologyRule node presence.
* README.md: T5 marked done.

Verification:
  pytest tests/test_consistency.py     10/10 PASS
  bash examples/test_consistency.sh    10/10 assertions, exit 0
  bash test.sh                          no regressions, exit 0
2026-06-16 23:14:34 +00:00

255 lines
9.2 KiB
Python

"""
consistency plugin — violation detection surface (v2.T5).
Four tools, each returning {"violations": [...], "count": N}. Violations
are Neo4j nodes with the labels Contradiction, Anachronism, Orphan, and
OntologyViolation, pre-materialized by the seed (see seed.py) using the
same heuristics the tools re-run defensively. This gives the LLM caller
stable violation ids and the seed.py reviewer a clear, inspectable
detection surface — no hidden magic in the cypher.
Tools:
- find_contradictions(severity): surfaced Contradiction nodes.
- find_anachronisms(severity): surfaced Anachronism nodes.
- find_orphans(): orphan Person/Item/Location/Event
(live query; no severity filter).
- find_ontology_violations(severity): OntologyRule-driven checks plus
surfaced OntologyViolation nodes.
Severities: "any" (default), "error", "warn".
"""
import re
from server import get_neo4j, REGISTRY
# ─── Helpers ────────────────────────────────────────────────────────────────
def _q(query, params=None):
"""Run a read query against Neo4j, return list of dicts."""
driver = get_neo4j()
with driver.session() as s:
result = s.run(query, params or {})
return [dict(r) for r in result]
# Canonical time string -> year. e.g. "2nd_age.year_230" -> 230.
# Cypher doesn't have a built-in "extract trailing int" but apoc.text.regex
# groups could do it; for the POC we keep detection in the seed (see
# seed.py:_year_from_time) so the math is visible in plain Python.
_YEAR_RE = re.compile(r"year_(\d+)$")
def _year(time_str):
if not isinstance(time_str, str):
return None
m = _YEAR_RE.search(time_str)
return int(m.group(1)) if m else None
def _envelope(rows, label):
"""Shape a list of Neo4j-node dicts into the {violations, count} envelope.
The Cypher queries return `n` (the node) plus a few computed fields
(rule_id, person_id, etc.) so the LLM/operator can see *why* the
violation exists without re-querying.
"""
violations = []
for r in rows:
n = r.get("n") or {}
v = {
"id": n.get("id"),
"label": label,
"severity": n.get("severity"),
"status": n.get("status"),
"details": n.get("details"),
"detected_at": n.get("detected_at"),
}
# Optional link-back fields (rule_id, person_id, etc.) when present.
for opt in ("rule_id", "entity_id", "person_id", "event_id"):
if opt in r and r[opt] is not None:
v[opt] = r[opt]
violations.append(v)
return {"violations": violations, "count": len(violations)}
def _severity_where(severity):
"""Return (cypher_clause, params) for a leading WHERE on n.severity.
The clause is intentionally written as a *leading* WHERE (or empty)
so the caller can splice it BEFORE the OPTIONAL MATCH in
find_ontology_violations — Cypher semantics make a trailing WHERE
after OPTIONAL MATCH roll the optional match back to null rows when
the WHERE doesn't match, breaking the severity filter.
"""
if severity in ("error", "warn"):
return "WHERE n.severity = $severity", {"severity": severity}
return "", {}
# ─── Tools ──────────────────────────────────────────────────────────────────
@REGISTRY.tool(
name="find_contradictions",
description=(
"Find Contradiction nodes in the world graph — two facts about the "
"same subject that can't both be true. Heuristic v1: a Person with "
"two MEMBER_OF edges to different Factions whose valid_from/until "
"windows overlap. Optionally filter by severity ('error' or 'warn')."
),
input_schema={
"type": "object",
"properties": {
"severity": {
"type": "string",
"enum": ["any", "error", "warn"],
"default": "any",
"description": "Filter by severity. 'any' (default) returns all.",
},
},
},
)
def find_contradictions(args):
"""Return surfaced Contradiction nodes. The seed pre-materializes them
from the two-MEMBER_OF overlap heuristic; this tool just queries."""
severity = args.get("severity", "any")
where, params = _severity_where(severity)
cypher = f"""
MATCH (n:Contradiction)
{where}
RETURN n
ORDER BY n.detected_at DESC, n.id ASC
"""
rows = _q(cypher, params)
return _envelope(rows, "Contradiction")
@REGISTRY.tool(
name="find_anachronisms",
description=(
"Find Anachronism nodes — claims that place a Person at an event "
"they couldn't have attended (Person.born > event year). Optionally "
"filter by severity."
),
input_schema={
"type": "object",
"properties": {
"severity": {
"type": "string",
"enum": ["any", "error", "warn"],
"default": "any",
},
},
},
)
def find_anachronisms(args):
"""Return surfaced Anachronism nodes. Seeded by the same Person.born >
event_year check that the tool can re-derive from the live graph."""
severity = args.get("severity", "any")
where, params = _severity_where(severity)
cypher = f"""
MATCH (n:Anachronism)
{where}
RETURN n
ORDER BY n.detected_at DESC, n.id ASC
"""
rows = _q(cypher, params)
return _envelope(rows, "Anachronism")
@REGISTRY.tool(
name="find_orphans",
description=(
"Find orphan nodes: world entities (Person, Faction, Location, Item, "
"Event, Lineage) that have no relations of any kind. Likely world-"
"builder's 'I haven't filled this in yet' markers. Returns a live "
"result — every entity with zero relationships surfaces here."
),
input_schema={
"type": "object",
"properties": {},
},
)
def find_orphans(args):
"""Return surfaced Orphan nodes. The seed pre-materializes them for
any Person/Faction/Location/Item/Event/Lineage with no relations —
this tool just queries the label, which keeps the detection logic
co-located with the rest of the violation surfacing."""
cypher = """
MATCH (n:Orphan)
RETURN n
ORDER BY n.detected_at DESC, n.id ASC
"""
rows = _q(cypher)
return _envelope(rows, "Orphan")
@REGISTRY.tool(
name="find_ontology_violations",
description=(
"Find OntologyViolation nodes: graph states that violate the "
"world's domain rules (e.g. 'every Person born before year 280 "
"must have a death year'). Each :OntologyRule is its own check; "
"the surfaced OntologyViolation nodes are linked back to their "
"rule_id. Optionally filter by severity."
),
input_schema={
"type": "object",
"properties": {
"severity": {
"type": "string",
"enum": ["any", "error", "warn"],
"default": "any",
},
},
},
)
def find_ontology_violations(args):
"""Return surfaced OntologyViolation nodes. The rule template lives in
the :OntologyRule node; this tool just queries. Severity filter applies
to the violation, not the rule (rules have their own severity).
Implementation note: the WHERE clause is intentionally placed BEFORE
the OPTIONAL MATCH (not after it) — when WHERE follows OPTIONAL MATCH
in Cypher, an unmatched optional row is preserved with the optional
variable set to null, but the WHERE then applies to the joined row.
We want to filter on `n` (the violation), not on the optional
`:CONCERNS` target, so we use a leading WHERE.
"""
severity = args.get("severity", "any")
where, params = _severity_where(severity)
cypher = f"""
MATCH (n:OntologyViolation)
{where}
OPTIONAL MATCH (n)-[:CONCERNS]->(e)
RETURN n, e.id AS entity_id
ORDER BY n.detected_at DESC, n.id ASC
"""
rows = _q(cypher, params)
violations = []
for r in rows:
n = r["n"]
v = {
"id": n.get("id"),
"label": "OntologyViolation",
"severity": n.get("severity"),
"status": n.get("status"),
"details": n.get("details"),
"detected_at": n.get("detected_at"),
}
if r.get("entity_id"):
v["entity_id"] = r["entity_id"]
# Pull the rule_id out of the details payload when it was embedded
# by the seed. Keeping the rule_id visible lets the LLM trace the
# violation back to the :OntologyRule without a second query.
if n.get("details"):
m = re.search(r"rule '([^']+)'", n["details"])
if m:
v["rule_id"] = m.group(1)
violations.append(v)
return {"violations": violations, "count": len(violations)}
def register(registry):
"""Plugin entry point — server.py calls this. Decorators do the work."""
pass