Files
lore-engine-poc-v3/tests/test_confidence.py

234 lines
10 KiB
Python

"""Unit tests for the dual-confidence model in tools.py.
These run as a script: ``python3 tests/test_confidence.py``.
Pass criterion: prints ``N/N passed`` with no FAIL lines.
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from lore_engine_poc.parsers import Entity, LoreSource, Triple
from lore_engine_poc.tools import Edge, build_graph, was_true_at, Graph
def _src(name="doc", reliability="canonical"):
return LoreSource(
path=f"/fake/{name}.md",
name=name,
source_type="prose",
reliability=reliability,
source_confidence={"canonical": 1.0, "factional": 0.75, "rumor": 0.5, "dialogue": 0.4, "fanon": 0.3}[reliability],
)
def _ent(name, src):
return Entity(slug=name, name=name, type="npc", path=f"/fake/{name}.md", sources=[src])
def test_frontmatter_edge_full_confidence():
"""Frontmatter-driven edge: extraction=1.0, source=1.0, aggregate=1.0."""
src = _src("canon")
e = _ent("Aldric", src)
t = Triple(subject="Aldric", relation="MEMBER_OF", object="House Raventhorne",
source_path=src.path, source_slug="Aldric",
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
g = build_graph([e], [t])
result = was_true_at(g, "MEMBER_OF", "Aldric", "House Raventhorne", "3rd_age.year_345")
assert result["was_true"] is True
assert result["confidence"] == 1.0
assert result["extraction_confidences"] == [1.0]
assert result["source_confidences"] == [1.0]
assert result["reliabilities"] == ["canonical"]
print(" OK frontmatter edge: confidence=1.0")
def test_body_text_edge_extraction_lower():
"""Body-text-inferred edge: extraction=0.6, source=1.0, aggregate=0.6."""
src = _src("body")
e = _ent("Roland", src)
t = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
source_path=src.path, source_slug="Roland",
extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
g = build_graph([e], [t])
result = was_true_at(g, "SIBLING_OF", "Roland", "Aldric", "3rd_age.year_345")
assert result["was_true"] is True
assert abs(result["confidence"] - 0.6) < 1e-9
assert result["extraction_confidences"] == [0.6]
print(" OK body-text edge: confidence=0.6 (extraction factor)")
def test_rumor_source_lower():
"""Rumor source: extraction=1.0, source=0.5, aggregate=0.5."""
src = _src("tavern", reliability="rumor")
e = _ent("Drunk", src)
t = Triple(subject="Drunk", relation="ALLIED_WITH", object="House Vyr",
source_path=src.path, source_slug="Drunk",
extraction_confidence=1.0, source_confidence=0.5, reliability="rumor")
g = build_graph([e], [t])
result = was_true_at(g, "ALLIED_WITH", "Drunk", "House Vyr", "3rd_age.year_345")
assert result["was_true"] is True
assert abs(result["confidence"] - 0.5) < 1e-9
assert result["reliabilities"] == ["rumor"]
print(" OK rumor source: confidence=0.5 (source factor)")
def test_two_sources_aggregate_is_min():
"""Two agreeing sources: aggregate is min of (extraction*source) across both."""
src_a = _src("chronicle", reliability="canonical")
src_b = _src("letter", reliability="factional")
e_a = _ent("Theron", src_a)
e_b = _ent("Maric", src_b)
# Two triples that will merge into one Edge
t1 = Triple(subject="Theron", relation="RULED", object="Valdorn",
source_path=src_a.path, source_slug="Theron",
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
t2 = Triple(subject="Theron", relation="RULED", object="Valdorn",
source_path=src_b.path, source_slug="Theron",
extraction_confidence=0.9, source_confidence=0.75, reliability="factional")
g = build_graph([e_a, e_b], [t1, t2])
result = was_true_at(g, "RULED", "Theron", "Valdorn", "3rd_age.year_345")
assert result["was_true"] is True
# min(1.0*1.0, 0.9*0.75) = min(1.0, 0.675) = 0.675
assert abs(result["confidence"] - 0.675) < 1e-9
assert len(result["sources"]) == 2
assert result["extraction_confidences"] == [1.0, 0.9]
assert result["source_confidences"] == [1.0, 0.75]
assert result["reliabilities"] == ["canonical", "factional"]
print(f" OK two agreeing sources: aggregate=min(1.0*1.0, 0.9*0.75)={result['confidence']}")
def test_duplicate_source_path_dedupes():
"""Two mentions in the same document merge; the path appears once in sources[]."""
src = _src("body")
e = _ent("Roland", src)
t1 = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
source_path=src.path, source_slug="Roland",
extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
t2 = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
source_path=src.path, source_slug="Roland",
extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
g = build_graph([e], [t1, t2])
result = was_true_at(g, "SIBLING_OF", "Roland", "Aldric", "3rd_age.year_345")
assert len(result["sources"]) == 1
assert result["extraction_confidences"] == [0.6]
print(" OK duplicate source paths dedupe in sources[]")
def test_reliability_to_source_confidence_table():
"""The 5 reliability levels map to the documented source_confidence values."""
from lore_engine_poc.parsers import RELIABILITY_TO_SOURCE_CONFIDENCE
expected = {
"canonical": 1.0,
"factional": 0.75,
"rumor": 0.5,
"dialogue": 0.4,
"fanon": 0.3,
}
for r, c in expected.items():
assert RELIABILITY_TO_SOURCE_CONFIDENCE[r] == c, f"{r} should be {c}"
print(" OK reliability → source_confidence table is canonical=1.0, factional=0.75, rumor=0.5, dialogue=0.4, fanon=0.3")
def test_windows_consistent_open():
"""Two open windows (both null) are consistent."""
from lore_engine_poc.tools import _windows_consistent
assert _windows_consistent(None, None, None, None)
print(" OK windows: two null windows are consistent")
def test_windows_consistent_same_bounds():
"""Two windows with identical bounds are consistent."""
from lore_engine_poc.tools import _windows_consistent
assert _windows_consistent("3rd_age.year_300", "3rd_age.year_360",
"3rd_age.year_300", "3rd_age.year_360")
print(" OK windows: identical bounds are consistent")
def test_windows_inconsistent_different_bounds():
"""Two windows with different lower bounds are inconsistent."""
from lore_engine_poc.tools import _windows_consistent
assert not _windows_consistent("3rd_age.year_300", None,
"3rd_age.year_310", None)
print(" OK windows: different lower bounds are inconsistent")
def test_disputed_edge_creation():
"""Two triples with conflicting time bounds produce two Edges marked is_disputed."""
from lore_engine_poc.tools import _windows_consistent
# Source 1: Aldric's father is Theron, who dies in 2nd_age.year_87
src_a = _src("chronicle", reliability="canonical")
src_b = _src("letter", reliability="factional")
e_a = _ent("Chronicle", src_a)
e_b = _ent("Letter", src_b)
t1 = Triple(subject="Aldric", relation="PARENT_OF", object="Maric",
source_path=src_a.path, source_slug="Chronicle",
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
t2 = Triple(subject="Aldric", relation="PARENT_OF", object="Theron",
source_path=src_b.path, source_slug="Letter",
extraction_confidence=0.6, source_confidence=0.75, reliability="factional")
# Note: PARENT_OF is different from PARENT_OF here — actually
# both are PARENT_OF, but with different object. Different
# objects, so two separate Edges, no dispute. That's the
# *correct* behavior: Theron ≠ Maric, so they aren't even
# talking about the same fact.
g = build_graph([e_a, e_b], [t1, t2])
# Two distinct (subject, relation, object) tuples -> two edges, neither disputed.
result_maric = was_true_at(g, "PARENT_OF", "Aldric", "Maric", "3rd_age.year_345")
result_theron = was_true_at(g, "PARENT_OF", "Aldric", "Theron", "3rd_age.year_345")
assert result_maric["was_true"] is True
assert result_theron["was_true"] is True
assert result_maric["is_disputed"] is False
assert result_theron["is_disputed"] is False
print(" OK different objects (Maric vs Theron) produce two non-disputed edges")
def test_disputed_response_field_present():
"""``was_true_at`` response includes ``is_disputed`` and ``disputed_with_sources``."""
src = _src("canon")
e = _ent("Aldric", src)
t = Triple(subject="Aldric", relation="MEMBER_OF", object="House Raventhorne",
source_path=src.path, source_slug="Aldric",
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
g = build_graph([e], [t])
result = was_true_at(g, "MEMBER_OF", "Aldric", "House Raventhorne", "3rd_age.year_345")
assert "is_disputed" in result
assert "disputed_with_sources" in result
assert result["is_disputed"] is False
assert result["disputed_with_sources"] == []
print(" OK was_true_at response surfaces is_disputed + disputed_with_sources fields")
CASES = [
test_frontmatter_edge_full_confidence,
test_body_text_edge_extraction_lower,
test_rumor_source_lower,
test_two_sources_aggregate_is_min,
test_duplicate_source_path_dedupes,
test_reliability_to_source_confidence_table,
test_windows_consistent_open,
test_windows_consistent_same_bounds,
test_windows_inconsistent_different_bounds,
test_disputed_edge_creation,
test_disputed_response_field_present,
]
if __name__ == "__main__":
print("Running confidence-model tests:")
fail = 0
for c in CASES:
try:
c()
except AssertionError as e:
fail += 1
print(f" FAIL {c.__name__}: {e}")
except Exception as e:
fail += 1
print(f" ERROR {c.__name__}: {type(e).__name__}: {e}")
print(f"\n{len(CASES) - fail}/{len(CASES)} passed")
sys.exit(1 if fail else 0)