lore-engine-poc-v3/lore_engine_poc/tools.py

"""Lore Engine POC — read-side tools.

This module implements the single tool promised by the slice:

    was_true_at(relation, subject, object, at_time) -> dict

The tool answers the question "did the ``relation`` between ``subject``
and ``object`` hold true at ``at_time``?" by:

  1. Looking up the subject and object in the in-memory graph built by
     :mod:`lore_engine_poc.parsers`.
  2. Finding edges of the requested type between them.
  3. Evaluating each edge's ``valid_from`` / ``valid_until`` window
     against ``at_time`` using
     :func:`lore_engine_poc.time_model.time_in_window`.

For the slice the graph is an in-memory dict keyed by entity name. The
Cognee integration in :mod:`lore_engine_poc.cognee_store` is a parallel
code path that materialises the same triples into Cognee's graph DB.
The two paths can be cross-checked in tests.
"""

from __future__ import annotations

import os
import uuid
from dataclasses import dataclass, field, replace
from typing import Iterable, Optional


def _new_edge_id() -> str:
    """Stable per-edge id (8 hex chars). Used by retcon /
    mark_verified to point at a specific edge in the graph."""
    return f"e-{uuid.uuid4().hex[:8]}"

from .parsers import Entity, LoreSource, Triple, iter_codex, extract_triples
from .time_model import time_in_window


@dataclass
class Edge:
    subject: str
    relation: str
    object: str
    # Stable per-edge identity. Stamped at construction by
    # the default factory so existing call sites don't change.
    # Slice 10.2 uses this to point at a specific edge for
    # retcon / mark_verified mutations.
    edge_id: str = field(default_factory=_new_edge_id)
    valid_from: Optional[str] = None
    valid_until: Optional[str] = None
    sources: list[str] = field(default_factory=list)
    # Two confidence dimensions per source. The aggregate
    # ``confidence`` returned to callers is
    # ``min(extraction_confidence * source_confidence)`` across all
    # sources. If two sources agree on the same fact, the higher
    # confidence wins on agreement; the lower confidence is reported
    # as the "floor" of the answer.
    extraction_confidences: list[float] = field(default_factory=list)
    source_confidences: list[float] = field(default_factory=list)
    reliabilities: list[str] = field(default_factory=list)
    confidence: float = 1.0
    # When two sources produce ``(subject, relation, object)`` with
    # conflicting time bounds, the merger marks both as disputed
    # and points them at each other. Slice 2's consistency engine
    # turns this into a ``Contradiction`` node. For the POC codex,
    # no real disputes exist, so ``is_disputed`` is always False.
    is_disputed: bool = False
    disputed_with: list["Edge"] = field(default_factory=list)
    # Slice 10.2 — retcon audit metadata. Populated when a
    # world-builder calls ``retcon`` to amend an edge's bounds
    # or its object. ``retcon_at`` is an ISO timestamp;
    # ``retcon_note`` is the free-text reason. The original
    # edge is mutated in place (retcon is append-only at the
    # *audit log* level, not at the *edge* level).
    retcon_at: Optional[str] = None
    retcon_note: Optional[str] = None
    # Slice 10.2 — mark-verified audit metadata. ``verified_by``
    # is the verifier's name (e.g. an email or a handle);
    # ``verified_at`` is the ISO timestamp of the verification;
    # ``verified_note`` is the free-text reason. The mark-verified
    # tool also appends a (1.0, 1.0, "human_verified") source
    # tuple so the aggregate confidence floors to 1.0.
    verified_by: Optional[str] = None
    verified_at: Optional[str] = None
    verified_note: Optional[str] = None

    @property
    def aggregate_confidence(self) -> float:
        """Worst-case aggregated confidence across all sources.

        Reported as the answer's confidence. If a world-builder
        wants the *best* case, that's a separate tool (slice 4).
        """
        if not self.extraction_confidences or not self.source_confidences:
            return 0.0
        per_source = [
            e * s for e, s in zip(self.extraction_confidences, self.source_confidences)
        ]
        return min(per_source)


def _windows_consistent(a_from, a_until, b_from, b_until) -> bool:
    """Two time windows are consistent if they overlap or one is fully open.

    Used by the edge merger to decide whether two triples that share
    ``(subject, relation, object)`` are the same fact or a dispute.
    A dispute is *temporal* disagreement — same fact, different
    time bounds. Two completely empty bounds are consistent.
    """
    if (a_from is None and a_until is None) or (b_from is None and b_until is None):
        return True
    # If both have a lower bound, they must overlap on the lower bound.
    if a_from is not None and b_from is not None and a_from != b_from:
        # Use the time_model helper: do the two bounds refer to
        # the same point in the era tree?
        from .time_model import _cmp_atoms
        if _cmp_atoms(a_from, b_from) != 0:
            return False
    if a_until is not None and b_until is not None and a_until != b_until:
        from .time_model import _cmp_atoms
        if _cmp_atoms(a_until, b_until) != 0:
            return False
    return True


# Slice 5.1 — the in-memory graph has moved to
# :mod:`lore_engine_poc.graph_backend` as :class:`InMemoryGraph`
# with a :class:`GraphBackend` Protocol. ``Graph`` is a
# back-compat alias so the 559 existing tests (and any external
# code that imports ``Graph``) keep working unchanged.
from .graph_backend import (  # noqa: F401, E402  -- re-export for back-compat
    GraphBackend,
    InMemoryGraph as Graph,
)


def build_graph(entities: Iterable[Entity], triples: Iterable[Triple]) -> Graph:
    """Convert parser output into an in-memory :class:`Graph`.

    All entity names are seeded into the graph even if they have no
    edges, so ``by_name()`` returns the canonical form for every
    parsed entity — important for queries that target an isolated
    entity.

    Multiple triples that share ``(subject, relation, object)`` are
    merged into a single ``Edge`` whose sources list contains all
    contributing documents. This is the "two sources agree" case —
    agreement raises the answer's confidence; the per-source
    confidences are preserved for inspection.

    The graph also gets populated with the :class:`LoreSource` side
    index (AC 1.9): markdown ``Entity`` objects contribute one
    ``LoreSource`` per parsed file (already attached on
    ``entity.sources[0]``), and structured-YAML ``_LORESOURCE_NODE``
    marker triples contribute one per YAML file. ``SOURCED_FROM``
    triples (one per typed edge in the structured-YAML path) record
    the path -> (subject, relation, object) provenance link that
    slice 2's consistency engine consumes.
    """
    g = Graph()
    for e in entities:
        g.names.add(e.name)
        # Slice 4.0: seed the type index from every entity. The
        # markdown path uses lowercase string types
        # (``"npc"``/``"faction"``/``"location"``); the
        # structured-YAML path uses PascalCase labels
        # (``"Person"``/``"Faction"``/etc.). We populate
        # both styles: the markdown type as-is, and a
        # PascalCase variant keyed on the same name so
        # callers can filter on either.
        if e.type:
            g.add_entity_of_type(e.name, e.type)
        # PascalCase alias for the markdown type — e.g. ``"npc"`` →
        # also ``"Person"``. Slice 4's ``lookup`` uses this to
        # let callers ask for ``type="Person"`` against a graph
        # that was built from markdown.
        _pascal = {
            "npc": "Person",
            "faction": "Faction",
            "location": "Location",
            "region": "Region",
            "entry": "Entry",
            "lineage": "Lineage",
            "bestiary": "Creature",
            "magic_system": "MagicSystem",
            "culture": "Culture",
            "timeline": "Event",
        }.get(e.type)
        if _pascal:
            g.add_entity_of_type(e.name, _pascal)
        for src in e.sources:
            g.lore_sources.setdefault(src.path, src)
    for t in triples:
        # Handle the LoreSource node marker emitted by the
        # structured-YAML adapter. The triple's
        # ``extraction_confidence / source_confidence / reliability``
        # fields carry the source's metadata. We synthesise a
        # LoreSource from them.
        if t.relation == "_LORESOURCE_NODE":
            if t.source_path not in g.lore_sources:
                g.lore_sources[t.source_path] = LoreSource(
                    path=t.source_path,
                    name=t.source_slug,
                    source_type="",  # filled by the adapter into the path
                    reliability=t.reliability,
                    source_confidence=t.source_confidence,
                )
            continue
    for t in triples:
        # Find an existing edge with the same (subject, relation, object).
        existing = None
        for candidate in g.edges_by_subject.get(t.subject, {}).get(t.relation, []):
            if candidate.object == t.object:
                existing = candidate
                break
        if existing is None:
            g.add(Edge(
                subject=t.subject,
                relation=t.relation,
                object=t.object,
                valid_from=t.valid_from,
                valid_until=t.valid_until,
                sources=[t.source_path],
                extraction_confidences=[t.extraction_confidence],
                source_confidences=[t.source_confidence],
                reliabilities=[t.reliability],
            ))
        else:
            # Two cases to distinguish:
            #
            # 1. Same source, same bounds: a duplicate mention in
            #    one document. Skip — the edge already represents
            #    this fact from this source.
            #
            # 2. Different source, same bounds: an independent
            #    confirmation. Merge — append the source so the
            #    caller sees both documents cited.
            #
            # 3. Different source, conflicting bounds: a dispute.
            #    Don't merge into one Edge; instead, build a second
            #    Edge with the disputed bounds, mark both as
            #    ``is_disputed``, and link them via ``disputed_with``.
            #    Slice 2's consistency engine turns this into a
            #    ``Contradiction`` node.
            #
            # 0. Existing is unbounded, new is bounded (slice 1,
            #    structured-YAML override): the YAML is the
            #    authoritative source for time bounds. Adopt the
            #    YAML bounds on the existing edge and append the
            #    YAML as a contributing source. The unbounded
            #    markdown-style triple still gets cited for
            #    audit (it's where the relationship was first
            #    noticed), but the bounds come from the structured
            #    path. This is the inverse of Case 3 — instead of
            #    spawning a second disputed Edge, we promote the
            #    structured source.
            if (
                existing.valid_from is None
                and existing.valid_until is None
                and (t.valid_from is not None or t.valid_until is not None)
            ):
                existing.valid_from = t.valid_from
                existing.valid_until = t.valid_until
                if t.source_path not in existing.sources:
                    existing.sources.append(t.source_path)
                    existing.extraction_confidences.append(t.extraction_confidence)
                    existing.source_confidences.append(t.source_confidence)
                    existing.reliabilities.append(t.reliability)
                continue
            if t.source_path in existing.sources and _windows_consistent(
                existing.valid_from, existing.valid_until, None, None
            ):
                # Case 1: duplicate from same source, no bounds.
                # Both windows are null → consistent. Skip.
                continue
            if _windows_consistent(
                existing.valid_from, existing.valid_until, None, None
            ) and _windows_consistent(None, None, None, None):
                # Case 2: existing has no bounds, new has no bounds.
                # Append as a new source.
                existing.sources.append(t.source_path)
                existing.extraction_confidences.append(t.extraction_confidence)
                existing.source_confidences.append(t.source_confidence)
                existing.reliabilities.append(t.reliability)
            else:
                # Case 3: bounds disagree. Build a second Edge,
                # mark both as disputed, link them.
                disputed = Edge(
                    subject=t.subject,
                    relation=t.relation,
                    object=t.object,
                    valid_from=None,  # the new triple has no bounds; this
                    valid_until=None, # is the structural-dispute case
                    sources=[t.source_path],
                    extraction_confidences=[t.extraction_confidence],
                    source_confidences=[t.source_confidence],
                    reliabilities=[t.reliability],
                    is_disputed=True,
                )
                existing.is_disputed = True
                existing.disputed_with.append(disputed)
                disputed.disputed_with.append(existing)
                g.add(disputed)
    return g


def load_graph_from_codex(root: str) -> Graph:
    """One-call helper: parse a codex directory and return the graph."""
    return build_graph(iter_codex(root), extract_triples(iter_codex(root)))


def was_true_at(
    graph: Graph,
    relation: str,
    subject: str,
    object: str,
    at_time: str,
    current_time: Optional[str] = None,
    *,
    setting: Optional[str] = None,
) -> dict:
    """The Lore Engine ``was_true_at`` tool.

    Returns a dict shaped like the spec in ``docs/05-mcp-tools.md``::

        {
          "was_true": bool,
          "relation": str,
          "subject": str,
          "object": str,
          "at_time": str,
          "valid_from": str | None,
          "valid_until": str | None,
          "sources": list[str],
          "confidence": float,
          "edges_examined": int,
        }

    Slice 6.5 — ``setting`` (keyword-only) requires both
    ``subject`` and ``object`` to ``EXISTS_IN`` the named
    setting. If either endpoint is not a member of the
    setting, the answer is ``was_true: False`` (with the
    standard "unknown entity" note when the entity is
    *also* missing from the graph). An unknown setting
    short-circuits to ``was_true: False``. ``None``
    (default) means no filter — slice 4 / 9 behaviour.
    """
    # Slice 6.5 — setting filter on both endpoints. We
    # check the setting *first* so an entity that's in the
    # graph but not in the setting returns the
    # standard-shape response (was_true: False) without
    # the "unknown entity" note — the entity exists, it's
    # just not in this setting.
    if setting is not None:
        if graph.find_setting(setting) is None:
            return {
                "was_true": False,
                "relation": relation,
                "subject": subject,
                "object": object,
                "at_time": at_time,
                "valid_from": None,
                "valid_until": None,
                "sources": [],
                "confidence": 0.0,
                "edges_examined": 0,
                "note": f"unknown setting: {setting}",
            }
        setting_members = graph.setting_entities(setting)
        s_check = graph.by_name(subject)
        o_check = graph.by_name(object)
        # An entity not in the setting is treated like an
        # unknown entity — same note shape. (The LLM caller
        # can't tell the difference between "doesn't exist"
        # and "exists but not here" from this single call,
        # which is by design — the world-builder is the one
        # who decides which entities belong to which setting.)
        if s_check is None or s_check not in setting_members:
            return {
                "was_true": False,
                "relation": relation,
                "subject": subject,
                "object": object,
                "at_time": at_time,
                "valid_from": None,
                "valid_until": None,
                "sources": [],
                "confidence": 0.0,
                "edges_examined": 0,
                "note": f"unknown entity: {subject}",
            }
        if o_check is None or o_check not in setting_members:
            return {
                "was_true": False,
                "relation": relation,
                "subject": subject,
                "object": object,
                "at_time": at_time,
                "valid_from": None,
                "valid_until": None,
                "sources": [],
                "confidence": 0.0,
                "edges_examined": 0,
                "note": f"unknown entity: {object}",
            }
    s = graph.by_name(subject)
    o = graph.by_name(object)
    if s is None or o is None:
        return {
            "was_true": False,
            "relation": relation,
            "subject": subject,
            "object": object,
            "at_time": at_time,
            "valid_from": None,
            "valid_until": None,
            "sources": [],
            "confidence": 0.0,
            "edges_examined": 0,
            "note": f"unknown entity: {subject if s is None else object}",
        }

    # Slice 5.4: route through the GraphBackend Protocol method
    # so the in-memory and Neo4j backends answer the same way.
    candidates = [e for e in graph.edges_for_subject(s, relation) if e.object == o]
    if not candidates:
        # Try the reverse direction — ``was_true_at`` is symmetric in the
        # sense that a fact can be recorded from either endpoint.
        candidates = [e for e in graph.edges_for_subject(o, relation) if e.object == s]

    best: Optional[Edge] = None
    for e in candidates:
        if time_in_window(at_time, e.valid_from, e.valid_until, current_time=current_time):
            # Pick the highest-confidence match. The aggregate
            # confidence is min(extraction * source) across sources;
            # for slice 0 each edge has exactly one source, so
            # ``aggregate_confidence == extraction * source``.
            agg = e.aggregate_confidence
            if best is None or agg > best.aggregate_confidence:
                best = e

    if best is None:
        return {
            "was_true": False,
            "relation": relation,
            "subject": s,
            "object": o,
            "at_time": at_time,
            "valid_from": None,
            "valid_until": None,
            "sources": [],
            "confidence": 0.0,
            "edges_examined": len(candidates),
        }

    return {
        "was_true": True,
        "relation": relation,
        "subject": s,
        "object": o,
        "at_time": at_time,
        "valid_from": best.valid_from,
        "valid_until": best.valid_until,
        "sources": best.sources,
        "extraction_confidences": best.extraction_confidences,
        "source_confidences": best.source_confidences,
        "reliabilities": best.reliabilities,
        "confidence": best.aggregate_confidence,
        "is_disputed": best.is_disputed,
        "disputed_with_sources": [
            e.sources for e in best.disputed_with
        ],
        "edges_examined": len(candidates),
    }


__all__ = [
    "Edge",
    "Graph",
    "build_graph",
    "load_graph_from_codex",
    "was_true_at",
]