lore-engine-poc-v3/tests/test_confidence.py

"""Unit tests for the dual-confidence model in tools.py.

These run as a script: ``python3 tests/test_confidence.py``.
Pass criterion: prints ``N/N passed`` with no FAIL lines.
"""

from __future__ import annotations

import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))

from lore_engine_poc.parsers import Entity, LoreSource, Triple
from lore_engine_poc.tools import Edge, build_graph, was_true_at, Graph


def _src(name="doc", reliability="canonical"):
    return LoreSource(
        path=f"/fake/{name}.md",
        name=name,
        source_type="prose",
        reliability=reliability,
        source_confidence={"canonical": 1.0, "factional": 0.75, "rumor": 0.5, "dialogue": 0.4, "fanon": 0.3}[reliability],
    )


def _ent(name, src):
    return Entity(slug=name, name=name, type="npc", path=f"/fake/{name}.md", sources=[src])


def test_frontmatter_edge_full_confidence():
    """Frontmatter-driven edge: extraction=1.0, source=1.0, aggregate=1.0."""
    src = _src("canon")
    e = _ent("Aldric", src)
    t = Triple(subject="Aldric", relation="MEMBER_OF", object="House Raventhorne",
               source_path=src.path, source_slug="Aldric",
               extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
    g = build_graph([e], [t])
    result = was_true_at(g, "MEMBER_OF", "Aldric", "House Raventhorne", "3rd_age.year_345")
    assert result["was_true"] is True
    assert result["confidence"] == 1.0
    assert result["extraction_confidences"] == [1.0]
    assert result["source_confidences"] == [1.0]
    assert result["reliabilities"] == ["canonical"]
    print("  OK  frontmatter edge: confidence=1.0")


def test_body_text_edge_extraction_lower():
    """Body-text-inferred edge: extraction=0.6, source=1.0, aggregate=0.6."""
    src = _src("body")
    e = _ent("Roland", src)
    t = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
               source_path=src.path, source_slug="Roland",
               extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
    g = build_graph([e], [t])
    result = was_true_at(g, "SIBLING_OF", "Roland", "Aldric", "3rd_age.year_345")
    assert result["was_true"] is True
    assert abs(result["confidence"] - 0.6) < 1e-9
    assert result["extraction_confidences"] == [0.6]
    print("  OK  body-text edge: confidence=0.6 (extraction factor)")


def test_rumor_source_lower():
    """Rumor source: extraction=1.0, source=0.5, aggregate=0.5."""
    src = _src("tavern", reliability="rumor")
    e = _ent("Drunk", src)
    t = Triple(subject="Drunk", relation="ALLIED_WITH", object="House Vyr",
               source_path=src.path, source_slug="Drunk",
               extraction_confidence=1.0, source_confidence=0.5, reliability="rumor")
    g = build_graph([e], [t])
    result = was_true_at(g, "ALLIED_WITH", "Drunk", "House Vyr", "3rd_age.year_345")
    assert result["was_true"] is True
    assert abs(result["confidence"] - 0.5) < 1e-9
    assert result["reliabilities"] == ["rumor"]
    print("  OK  rumor source: confidence=0.5 (source factor)")


def test_two_sources_aggregate_is_min():
    """Two agreeing sources: aggregate is min of (extraction*source) across both."""
    src_a = _src("chronicle", reliability="canonical")
    src_b = _src("letter", reliability="factional")
    e_a = _ent("Theron", src_a)
    e_b = _ent("Maric", src_b)
    # Two triples that will merge into one Edge
    t1 = Triple(subject="Theron", relation="RULED", object="Valdorn",
                source_path=src_a.path, source_slug="Theron",
                extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
    t2 = Triple(subject="Theron", relation="RULED", object="Valdorn",
                source_path=src_b.path, source_slug="Theron",
                extraction_confidence=0.9, source_confidence=0.75, reliability="factional")
    g = build_graph([e_a, e_b], [t1, t2])
    result = was_true_at(g, "RULED", "Theron", "Valdorn", "3rd_age.year_345")
    assert result["was_true"] is True
    # min(1.0*1.0, 0.9*0.75) = min(1.0, 0.675) = 0.675
    assert abs(result["confidence"] - 0.675) < 1e-9
    assert len(result["sources"]) == 2
    assert result["extraction_confidences"] == [1.0, 0.9]
    assert result["source_confidences"] == [1.0, 0.75]
    assert result["reliabilities"] == ["canonical", "factional"]
    print(f"  OK  two agreeing sources: aggregate=min(1.0*1.0, 0.9*0.75)={result['confidence']}")


def test_duplicate_source_path_dedupes():
    """Two mentions in the same document merge; the path appears once in sources[]."""
    src = _src("body")
    e = _ent("Roland", src)
    t1 = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
                source_path=src.path, source_slug="Roland",
                extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
    t2 = Triple(subject="Roland", relation="SIBLING_OF", object="Aldric",
                source_path=src.path, source_slug="Roland",
                extraction_confidence=0.6, source_confidence=1.0, reliability="canonical")
    g = build_graph([e], [t1, t2])
    result = was_true_at(g, "SIBLING_OF", "Roland", "Aldric", "3rd_age.year_345")
    assert len(result["sources"]) == 1
    assert result["extraction_confidences"] == [0.6]
    print("  OK  duplicate source paths dedupe in sources[]")


def test_reliability_to_source_confidence_table():
    """The 5 reliability levels map to the documented source_confidence values."""
    from lore_engine_poc.parsers import RELIABILITY_TO_SOURCE_CONFIDENCE
    expected = {
        "canonical": 1.0,
        "factional": 0.75,
        "rumor":     0.5,
        "dialogue":  0.4,
        "fanon":     0.3,
    }
    for r, c in expected.items():
        assert RELIABILITY_TO_SOURCE_CONFIDENCE[r] == c, f"{r} should be {c}"
    print("  OK  reliability → source_confidence table is canonical=1.0, factional=0.75, rumor=0.5, dialogue=0.4, fanon=0.3")


def test_windows_consistent_open():
    """Two open windows (both null) are consistent."""
    from lore_engine_poc.tools import _windows_consistent
    assert _windows_consistent(None, None, None, None)
    print("  OK  windows: two null windows are consistent")


def test_windows_consistent_same_bounds():
    """Two windows with identical bounds are consistent."""
    from lore_engine_poc.tools import _windows_consistent
    assert _windows_consistent("3rd_age.year_300", "3rd_age.year_360",
                                "3rd_age.year_300", "3rd_age.year_360")
    print("  OK  windows: identical bounds are consistent")


def test_windows_inconsistent_different_bounds():
    """Two windows with different lower bounds are inconsistent."""
    from lore_engine_poc.tools import _windows_consistent
    assert not _windows_consistent("3rd_age.year_300", None,
                                    "3rd_age.year_310", None)
    print("  OK  windows: different lower bounds are inconsistent")


def test_disputed_edge_creation():
    """Two triples with conflicting time bounds produce two Edges marked is_disputed."""
    from lore_engine_poc.tools import _windows_consistent
    # Source 1: Aldric's father is Theron, who dies in 2nd_age.year_87
    src_a = _src("chronicle", reliability="canonical")
    src_b = _src("letter", reliability="factional")
    e_a = _ent("Chronicle", src_a)
    e_b = _ent("Letter", src_b)
    t1 = Triple(subject="Aldric", relation="PARENT_OF", object="Maric",
                source_path=src_a.path, source_slug="Chronicle",
                extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
    t2 = Triple(subject="Aldric", relation="PARENT_OF", object="Theron",
                source_path=src_b.path, source_slug="Letter",
                extraction_confidence=0.6, source_confidence=0.75, reliability="factional")
    # Note: PARENT_OF is different from PARENT_OF here — actually
    # both are PARENT_OF, but with different object. Different
    # objects, so two separate Edges, no dispute. That's the
    # *correct* behavior: Theron ≠ Maric, so they aren't even
    # talking about the same fact.
    g = build_graph([e_a, e_b], [t1, t2])
    # Two distinct (subject, relation, object) tuples -> two edges, neither disputed.
    result_maric = was_true_at(g, "PARENT_OF", "Aldric", "Maric", "3rd_age.year_345")
    result_theron = was_true_at(g, "PARENT_OF", "Aldric", "Theron", "3rd_age.year_345")
    assert result_maric["was_true"] is True
    assert result_theron["was_true"] is True
    assert result_maric["is_disputed"] is False
    assert result_theron["is_disputed"] is False
    print("  OK  different objects (Maric vs Theron) produce two non-disputed edges")


def test_disputed_response_field_present():
    """``was_true_at`` response includes ``is_disputed`` and ``disputed_with_sources``."""
    src = _src("canon")
    e = _ent("Aldric", src)
    t = Triple(subject="Aldric", relation="MEMBER_OF", object="House Raventhorne",
               source_path=src.path, source_slug="Aldric",
               extraction_confidence=1.0, source_confidence=1.0, reliability="canonical")
    g = build_graph([e], [t])
    result = was_true_at(g, "MEMBER_OF", "Aldric", "House Raventhorne", "3rd_age.year_345")
    assert "is_disputed" in result
    assert "disputed_with_sources" in result
    assert result["is_disputed"] is False
    assert result["disputed_with_sources"] == []
    print("  OK  was_true_at response surfaces is_disputed + disputed_with_sources fields")


CASES = [
    test_frontmatter_edge_full_confidence,
    test_body_text_edge_extraction_lower,
    test_rumor_source_lower,
    test_two_sources_aggregate_is_min,
    test_duplicate_source_path_dedupes,
    test_reliability_to_source_confidence_table,
    test_windows_consistent_open,
    test_windows_consistent_same_bounds,
    test_windows_inconsistent_different_bounds,
    test_disputed_edge_creation,
    test_disputed_response_field_present,
]

if __name__ == "__main__":
    print("Running confidence-model tests:")
    fail = 0
    for c in CASES:
        try:
            c()
        except AssertionError as e:
            fail += 1
            print(f"  FAIL  {c.__name__}: {e}")
        except Exception as e:
            fail += 1
            print(f"  ERROR {c.__name__}: {type(e).__name__}: {e}")
    print(f"\n{len(CASES) - fail}/{len(CASES)} passed")
    sys.exit(1 if fail else 0)