lore-engine-poc-v3/scripts/06_llm_smoke.py

"""06_llm_smoke — manual smoke test for the LLM extractor.

Loads one NPC file from the seed codex and runs the extractor
over its body. Prints the resulting triples.

Run:
    # Default-off behaviour without an API key: prints a
    # message explaining what's missing and exits 0.
    python3 scripts/06_llm_smoke.py

    # With the real provider (requires OLLAMA_API_KEY in env):
    OLLAMA_API_KEY=... python3 scripts/06_llm_smoke.py

    # Override the model slug (default: minimax-m3:cloud):
    OLLAMA_API_KEY=... LORE_LLM_MODEL=minimax-m3:cloud \
        python3 scripts/06_llm_smoke.py

Why a script and not a test:

    Tests use ``FakeProvider`` for fast, deterministic
    coverage. This script is the **manual** smoke that
    proves the real Ollama Cloud call returns parseable
    triples on real prose. It's the slice 3.5 deliverable.
"""

from __future__ import annotations

import argparse
import os
import sys
from pathlib import Path

# Allow ``python3 scripts/06_llm_smoke.py`` from the project root.
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))

from lore_engine_poc.extraction import extract_from_chunk
from lore_engine_poc.llm import OllamaCloudProvider
from lore_engine_poc.parsers import iter_codex


def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser()
    p.add_argument(
        "--codex",
        default=str(ROOT / "lore_engine_poc" / "seed"),
        help="Codex root (defaults to lore_engine_poc/seed).",
    )
    p.add_argument(
        "--slug",
        default="Aldric Raventhorne",
        help="Slug of the entity to smoke-test (default: Aldric Raventhorne).",
    )
    return p.parse_args()


def main() -> int:
    args = parse_args()

    if not os.environ.get("OLLAMA_API_KEY"):
        print(
            "[06_llm_smoke] OLLAMA_API_KEY is not set.\n"
            "  This smoke script exercises the real Ollama Cloud "
            "provider; it needs an API key.\n"
            "  Set OLLAMA_API_KEY (and optionally LORE_LLM_MODEL) "
            "and re-run to exercise the live path.\n"
            "  For offline testing, run the test suite "
            "(`pytest tests/test_extraction/`) which uses "
            "FakeProvider.",
            file=sys.stderr,
        )
        return 0

    print(f"[06_llm_smoke] loading codex from {args.codex}")
    entities = list(iter_codex(args.codex))
    target = next(
        (e for e in entities if e.slug == args.slug),
        None,
    )
    if target is None:
        print(
            f"[06_llm_smoke] no entity with slug={args.slug!r} in codex",
            file=sys.stderr,
        )
        return 1

    print(f"[06_llm_smoke] entity: {target.slug} (body={len(target.body)} chars)")

    provider = OllamaCloudProvider()
    print(f"[06_llm_smoke] provider: OllamaCloudProvider model={provider.model!r}")

    triples = extract_from_chunk(target, provider)
    print(f"[06_llm_smoke] LLM triples extracted: {len(triples)}")
    for t in triples:
        print(f"  [{t.subject}, {t.relation}, {t.object}]")
        print(
            f"    source={t.source_path} "
            f"extraction_conf={t.extraction_confidence} "
            f"source_conf={t.source_confidence} "
            f"reliability={t.reliability}"
        )
    return 0


if __name__ == "__main__":
    raise SystemExit(main())