- 01_ingest.py: LORE_INGEST_LLM=1 enables LLM extraction after the
deterministic path; build_graph is now called AFTER LLM triples
merge in (the 3.4 ordering fix).
- LORE_INGEST_FAKE_LLM=1 + LORE_INGEST_FAKE_LLM_SCRIPT=path selects
FakeProvider for offline/CI runs.
- Missing OLLAMA_API_KEY degrades gracefully: stderr warning, rc=0,
deterministic graph still built (no crash, no LLM triples).
- scripts/06_llm_smoke.py: one-shot manual smoke for the real
Ollama Cloud provider; loads one NPC, runs extractor, prints
triples. Skips (rc=0, helpful message) when OLLAMA_API_KEY unset.
- FakeProvider gains dict-style {match_any, response} / {match_any,
raise} entries so tests can skip exact-prompt matching when the
body is large.
- tests/test_extraction/test_ingest_wiring.py: 8 subprocess tests
covering default-off, enabled, idempotency (x2), adds-fact,
provider-failure tolerance, bad-JSON tolerance, and missing-key
fallback.
- tests/fixtures/llm_empty_script.json: [] (used by the enabled-
path test where no triples are expected).
435/435 tests pass (was 382 pre-slice; +53). End-to-end ingest with
--skip-cognee runs cleanly on default-off path.
106 lines
3.3 KiB
Python
106 lines
3.3 KiB
Python
"""06_llm_smoke — manual smoke test for the LLM extractor.
|
|
|
|
Loads one NPC file from the seed codex and runs the extractor
|
|
over its body. Prints the resulting triples.
|
|
|
|
Run:
|
|
# Default-off behaviour without an API key: prints a
|
|
# message explaining what's missing and exits 0.
|
|
python3 scripts/06_llm_smoke.py
|
|
|
|
# With the real provider (requires OLLAMA_API_KEY in env):
|
|
OLLAMA_API_KEY=... python3 scripts/06_llm_smoke.py
|
|
|
|
# Override the model slug (default: minimax-m3:cloud):
|
|
OLLAMA_API_KEY=... LORE_LLM_MODEL=minimax-m3:cloud \
|
|
python3 scripts/06_llm_smoke.py
|
|
|
|
Why a script and not a test:
|
|
|
|
Tests use ``FakeProvider`` for fast, deterministic
|
|
coverage. This script is the **manual** smoke that
|
|
proves the real Ollama Cloud call returns parseable
|
|
triples on real prose. It's the slice 3.5 deliverable.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Allow ``python3 scripts/06_llm_smoke.py`` from the project root.
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
from lore_engine_poc.extraction import extract_from_chunk
|
|
from lore_engine_poc.llm import OllamaCloudProvider
|
|
from lore_engine_poc.parsers import iter_codex
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument(
|
|
"--codex",
|
|
default=str(ROOT / "lore_engine_poc" / "seed"),
|
|
help="Codex root (defaults to lore_engine_poc/seed).",
|
|
)
|
|
p.add_argument(
|
|
"--slug",
|
|
default="Aldric Raventhorne",
|
|
help="Slug of the entity to smoke-test (default: Aldric Raventhorne).",
|
|
)
|
|
return p.parse_args()
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
|
|
if not os.environ.get("OLLAMA_API_KEY"):
|
|
print(
|
|
"[06_llm_smoke] OLLAMA_API_KEY is not set.\n"
|
|
" This smoke script exercises the real Ollama Cloud "
|
|
"provider; it needs an API key.\n"
|
|
" Set OLLAMA_API_KEY (and optionally LORE_LLM_MODEL) "
|
|
"and re-run to exercise the live path.\n"
|
|
" For offline testing, run the test suite "
|
|
"(`pytest tests/test_extraction/`) which uses "
|
|
"FakeProvider.",
|
|
file=sys.stderr,
|
|
)
|
|
return 0
|
|
|
|
print(f"[06_llm_smoke] loading codex from {args.codex}")
|
|
entities = list(iter_codex(args.codex))
|
|
target = next(
|
|
(e for e in entities if e.slug == args.slug),
|
|
None,
|
|
)
|
|
if target is None:
|
|
print(
|
|
f"[06_llm_smoke] no entity with slug={args.slug!r} in codex",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
print(f"[06_llm_smoke] entity: {target.slug} (body={len(target.body)} chars)")
|
|
|
|
provider = OllamaCloudProvider()
|
|
print(f"[06_llm_smoke] provider: OllamaCloudProvider model={provider.model!r}")
|
|
|
|
triples = extract_from_chunk(target, provider)
|
|
print(f"[06_llm_smoke] LLM triples extracted: {len(triples)}")
|
|
for t in triples:
|
|
print(f" [{t.subject}, {t.relation}, {t.object}]")
|
|
print(
|
|
f" source={t.source_path} "
|
|
f"extraction_conf={t.extraction_confidence} "
|
|
f"source_conf={t.source_confidence} "
|
|
f"reliability={t.reliability}"
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main()) |