234 lines
10 KiB
Python
234 lines
10 KiB
Python
"""Unit tests for the dual-confidence model in tools.py.
|
|
|
|
These run as a script: ``python3 tests/test_confidence.py``.
|
|
Pass criterion: prints ``N/N passed`` with no FAIL lines.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
from lore_engine_poc.parsers import Entity, LoreSource, Triple
|
|
from lore_engine_poc.tools import Edge, build_graph, was_true_at, Graph
|
|
|
|
|
|
def _src(name="doc", reliability="canonical"):
|
|
return LoreSource(
|
|
path=f"/fake/{name}.md",
|
|
name=name,
|
|
source_type="prose",
|
|
reliability=reliability,
|
|
source_confidence={"canonical": 1.0, "factional": 0.75, "rumor": 0.5, "dialogue": 0.4, "fanon": 0.3}[reliability],
|
|
)
|
|
|
|
|
|
def _ent(name, src):
|
|
return Entity(slug=name, name=name, type="npc", path=f"/fake/{name}.md", sources=[src])
|
|
|
|
|
|
def test_frontmatter_edge_full_confidence():
|
|
"""Frontmatter-driven edge: extraction=1.0, source=1.0, aggregate=1.0."""
|
|
src = _src("canon")
|
|
e = _ent("Aldric", src)
|
|
t = Triple(subject="Aldric", relation="MEMBER_OF", object="House Raventhorne",
|
|
source_path=src.path, source_slug="Aldric",
|
|
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
|
|
g = build_graph([e], [t])
|
|
result = was_true_at(g, "MEMBER_OF", "Aldric", "House Raventhorne", "3rd_age.year_345")
|
|
assert result["was_true"] is True
|
|
assert result["confidence"] == 1.0
|
|
assert result["extraction_confidences"] == [1.0]
|
|
assert result["source_confidences"] == [1.0]
|
|
assert result["reliabilities"] == ["canonical"]
|
|
print(" OK frontmatter edge: confidence=1.0")
|
|
|
|
|
|
def test_body_text_edge_extraction_lower():
|
|
"""Body-text-inferred edge: extraction=0.6, source=1.0, aggregate=0.6."""
|
|
src = _src("body")
|
|
e = _ent("Roland", src)
|
|
t = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
|
|
source_path=src.path, source_slug="Roland",
|
|
extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
|
|
g = build_graph([e], [t])
|
|
result = was_true_at(g, "SIBLING_OF", "Roland", "Aldric", "3rd_age.year_345")
|
|
assert result["was_true"] is True
|
|
assert abs(result["confidence"] - 0.6) < 1e-9
|
|
assert result["extraction_confidences"] == [0.6]
|
|
print(" OK body-text edge: confidence=0.6 (extraction factor)")
|
|
|
|
|
|
def test_rumor_source_lower():
|
|
"""Rumor source: extraction=1.0, source=0.5, aggregate=0.5."""
|
|
src = _src("tavern", reliability="rumor")
|
|
e = _ent("Drunk", src)
|
|
t = Triple(subject="Drunk", relation="ALLIED_WITH", object="House Vyr",
|
|
source_path=src.path, source_slug="Drunk",
|
|
extraction_confidence=1.0, source_confidence=0.5, reliability="rumor")
|
|
g = build_graph([e], [t])
|
|
result = was_true_at(g, "ALLIED_WITH", "Drunk", "House Vyr", "3rd_age.year_345")
|
|
assert result["was_true"] is True
|
|
assert abs(result["confidence"] - 0.5) < 1e-9
|
|
assert result["reliabilities"] == ["rumor"]
|
|
print(" OK rumor source: confidence=0.5 (source factor)")
|
|
|
|
|
|
def test_two_sources_aggregate_is_min():
|
|
"""Two agreeing sources: aggregate is min of (extraction*source) across both."""
|
|
src_a = _src("chronicle", reliability="canonical")
|
|
src_b = _src("letter", reliability="factional")
|
|
e_a = _ent("Theron", src_a)
|
|
e_b = _ent("Maric", src_b)
|
|
# Two triples that will merge into one Edge
|
|
t1 = Triple(subject="Theron", relation="RULED", object="Valdorn",
|
|
source_path=src_a.path, source_slug="Theron",
|
|
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
|
|
t2 = Triple(subject="Theron", relation="RULED", object="Valdorn",
|
|
source_path=src_b.path, source_slug="Theron",
|
|
extraction_confidence=0.9, source_confidence=0.75, reliability="factional")
|
|
g = build_graph([e_a, e_b], [t1, t2])
|
|
result = was_true_at(g, "RULED", "Theron", "Valdorn", "3rd_age.year_345")
|
|
assert result["was_true"] is True
|
|
# min(1.0*1.0, 0.9*0.75) = min(1.0, 0.675) = 0.675
|
|
assert abs(result["confidence"] - 0.675) < 1e-9
|
|
assert len(result["sources"]) == 2
|
|
assert result["extraction_confidences"] == [1.0, 0.9]
|
|
assert result["source_confidences"] == [1.0, 0.75]
|
|
assert result["reliabilities"] == ["canonical", "factional"]
|
|
print(f" OK two agreeing sources: aggregate=min(1.0*1.0, 0.9*0.75)={result['confidence']}")
|
|
|
|
|
|
def test_duplicate_source_path_dedupes():
|
|
"""Two mentions in the same document merge; the path appears once in sources[]."""
|
|
src = _src("body")
|
|
e = _ent("Roland", src)
|
|
t1 = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
|
|
source_path=src.path, source_slug="Roland",
|
|
extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
|
|
t2 = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
|
|
source_path=src.path, source_slug="Roland",
|
|
extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
|
|
g = build_graph([e], [t1, t2])
|
|
result = was_true_at(g, "SIBLING_OF", "Roland", "Aldric", "3rd_age.year_345")
|
|
assert len(result["sources"]) == 1
|
|
assert result["extraction_confidences"] == [0.6]
|
|
print(" OK duplicate source paths dedupe in sources[]")
|
|
|
|
|
|
def test_reliability_to_source_confidence_table():
|
|
"""The 5 reliability levels map to the documented source_confidence values."""
|
|
from lore_engine_poc.parsers import RELIABILITY_TO_SOURCE_CONFIDENCE
|
|
expected = {
|
|
"canonical": 1.0,
|
|
"factional": 0.75,
|
|
"rumor": 0.5,
|
|
"dialogue": 0.4,
|
|
"fanon": 0.3,
|
|
}
|
|
for r, c in expected.items():
|
|
assert RELIABILITY_TO_SOURCE_CONFIDENCE[r] == c, f"{r} should be {c}"
|
|
print(" OK reliability → source_confidence table is canonical=1.0, factional=0.75, rumor=0.5, dialogue=0.4, fanon=0.3")
|
|
|
|
|
|
def test_windows_consistent_open():
|
|
"""Two open windows (both null) are consistent."""
|
|
from lore_engine_poc.tools import _windows_consistent
|
|
assert _windows_consistent(None, None, None, None)
|
|
print(" OK windows: two null windows are consistent")
|
|
|
|
|
|
def test_windows_consistent_same_bounds():
|
|
"""Two windows with identical bounds are consistent."""
|
|
from lore_engine_poc.tools import _windows_consistent
|
|
assert _windows_consistent("3rd_age.year_300", "3rd_age.year_360",
|
|
"3rd_age.year_300", "3rd_age.year_360")
|
|
print(" OK windows: identical bounds are consistent")
|
|
|
|
|
|
def test_windows_inconsistent_different_bounds():
|
|
"""Two windows with different lower bounds are inconsistent."""
|
|
from lore_engine_poc.tools import _windows_consistent
|
|
assert not _windows_consistent("3rd_age.year_300", None,
|
|
"3rd_age.year_310", None)
|
|
print(" OK windows: different lower bounds are inconsistent")
|
|
|
|
|
|
def test_disputed_edge_creation():
|
|
"""Two triples with conflicting time bounds produce two Edges marked is_disputed."""
|
|
from lore_engine_poc.tools import _windows_consistent
|
|
# Source 1: Aldric's father is Theron, who dies in 2nd_age.year_87
|
|
src_a = _src("chronicle", reliability="canonical")
|
|
src_b = _src("letter", reliability="factional")
|
|
e_a = _ent("Chronicle", src_a)
|
|
e_b = _ent("Letter", src_b)
|
|
t1 = Triple(subject="Aldric", relation="PARENT_OF", object="Maric",
|
|
source_path=src_a.path, source_slug="Chronicle",
|
|
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
|
|
t2 = Triple(subject="Aldric", relation="PARENT_OF", object="Theron",
|
|
source_path=src_b.path, source_slug="Letter",
|
|
extraction_confidence=0.6, source_confidence=0.75, reliability="factional")
|
|
# Note: PARENT_OF is different from PARENT_OF here — actually
|
|
# both are PARENT_OF, but with different object. Different
|
|
# objects, so two separate Edges, no dispute. That's the
|
|
# *correct* behavior: Theron ≠ Maric, so they aren't even
|
|
# talking about the same fact.
|
|
g = build_graph([e_a, e_b], [t1, t2])
|
|
# Two distinct (subject, relation, object) tuples -> two edges, neither disputed.
|
|
result_maric = was_true_at(g, "PARENT_OF", "Aldric", "Maric", "3rd_age.year_345")
|
|
result_theron = was_true_at(g, "PARENT_OF", "Aldric", "Theron", "3rd_age.year_345")
|
|
assert result_maric["was_true"] is True
|
|
assert result_theron["was_true"] is True
|
|
assert result_maric["is_disputed"] is False
|
|
assert result_theron["is_disputed"] is False
|
|
print(" OK different objects (Maric vs Theron) produce two non-disputed edges")
|
|
|
|
|
|
def test_disputed_response_field_present():
|
|
"""``was_true_at`` response includes ``is_disputed`` and ``disputed_with_sources``."""
|
|
src = _src("canon")
|
|
e = _ent("Aldric", src)
|
|
t = Triple(subject="Aldric", relation="MEMBER_OF", object="House Raventhorne",
|
|
source_path=src.path, source_slug="Aldric",
|
|
extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
|
|
g = build_graph([e], [t])
|
|
result = was_true_at(g, "MEMBER_OF", "Aldric", "House Raventhorne", "3rd_age.year_345")
|
|
assert "is_disputed" in result
|
|
assert "disputed_with_sources" in result
|
|
assert result["is_disputed"] is False
|
|
assert result["disputed_with_sources"] == []
|
|
print(" OK was_true_at response surfaces is_disputed + disputed_with_sources fields")
|
|
|
|
|
|
CASES = [
|
|
test_frontmatter_edge_full_confidence,
|
|
test_body_text_edge_extraction_lower,
|
|
test_rumor_source_lower,
|
|
test_two_sources_aggregate_is_min,
|
|
test_duplicate_source_path_dedupes,
|
|
test_reliability_to_source_confidence_table,
|
|
test_windows_consistent_open,
|
|
test_windows_consistent_same_bounds,
|
|
test_windows_inconsistent_different_bounds,
|
|
test_disputed_edge_creation,
|
|
test_disputed_response_field_present,
|
|
]
|
|
|
|
if __name__ == "__main__":
|
|
print("Running confidence-model tests:")
|
|
fail = 0
|
|
for c in CASES:
|
|
try:
|
|
c()
|
|
except AssertionError as e:
|
|
fail += 1
|
|
print(f" FAIL {c.__name__}: {e}")
|
|
except Exception as e:
|
|
fail += 1
|
|
print(f" ERROR {c.__name__}: {type(e).__name__}: {e}")
|
|
print(f"\n{len(CASES) - fail}/{len(CASES)} passed")
|
|
sys.exit(1 if fail else 0)
|