Files
Lore Engine Dev 29f7b89d53 slice 3.4+3.5: 01_ingest.py LORE_INGEST_LLM wiring + smoke script (8/8 + manual)
- 01_ingest.py: LORE_INGEST_LLM=1 enables LLM extraction after the
  deterministic path; build_graph is now called AFTER LLM triples
  merge in (the 3.4 ordering fix).
- LORE_INGEST_FAKE_LLM=1 + LORE_INGEST_FAKE_LLM_SCRIPT=path selects
  FakeProvider for offline/CI runs.
- Missing OLLAMA_API_KEY degrades gracefully: stderr warning, rc=0,
  deterministic graph still built (no crash, no LLM triples).
- scripts/06_llm_smoke.py: one-shot manual smoke for the real
  Ollama Cloud provider; loads one NPC, runs extractor, prints
  triples. Skips (rc=0, helpful message) when OLLAMA_API_KEY unset.
- FakeProvider gains dict-style {match_any, response} / {match_any,
  raise} entries so tests can skip exact-prompt matching when the
  body is large.
- tests/test_extraction/test_ingest_wiring.py: 8 subprocess tests
  covering default-off, enabled, idempotency (x2), adds-fact,
  provider-failure tolerance, bad-JSON tolerance, and missing-key
  fallback.
- tests/fixtures/llm_empty_script.json: [] (used by the enabled-
  path test where no triples are expected).

435/435 tests pass (was 382 pre-slice; +53). End-to-end ingest with
--skip-cognee runs cleanly on default-off path.
2026-06-18 11:27:52 -04:00

213 lines
7.1 KiB
Python

"""Lore Engine POC — LLM provider abstraction (slice 3).
A thin wrapper around the LLM call surface we need for
extraction. We define one ``LLMProvider`` Protocol with a
single method (``chat``) and ship two implementations:
* :class:`FakeProvider` — canned responses for tests. The
test code scripts ``(messages, response)`` pairs; the
provider matches the incoming messages to the script and
returns the canned response.
* :class:`OllamaCloudProvider` — the real provider. Talks
to ``https://ollama.com/api/chat`` over bearer-token auth
using the ``urllib.request`` stdlib module (no new pip
dependencies).
Why stdlib only and not LiteLLM:
* LiteLLM is great when you have many providers. We have
one — Ollama Cloud — and one method to call.
* The auto-classifier blocked earlier pip installs of
agent-chosen packages (see slice 2.6+). ``urllib`` is
already in the standard library.
* The protocol stays uniform if we ever add a second
provider (LiteLLM, anthropic, local vLLM): implement
``chat(messages) -> str`` and slot in.
The provider is intentionally **stateless**: one call →
one response. Stateful concerns (sessions, conversation
history, retries) live in the caller — the extractor
passes a single-message prompt and parses the single
string response.
"""
from __future__ import annotations
import json
import os
import urllib.request
from typing import Any, Callable, Optional, Protocol, runtime_checkable
# ---------------------------------------------------------------------------
# Protocol — the duck-typed contract
# ---------------------------------------------------------------------------
@runtime_checkable
class LLMProvider(Protocol):
"""A single-call LLM provider.
``chat`` takes an OpenAI-style ``messages`` list and
returns the assistant message content as a string. The
caller is responsible for parsing; the provider does
not interpret the response.
"""
def chat(self, messages: list[dict], **opts: Any) -> str:
...
# ---------------------------------------------------------------------------
# FakeProvider — for tests
# ---------------------------------------------------------------------------
class FakeProvider:
"""Canned-response provider for tests.
The constructor takes a ``script``: a list of either:
* ``(messages_match, response)`` tuples — match the incoming
messages exactly against ``messages_match`` and return
``response``.
* ``{"match_any": True, "response": "..."}`` dicts — match
any messages and return ``response``. Useful for tests
where the prompt body is large or variable.
* ``{"match_any": True, "raise": "<message>"}`` dicts —
match any messages and raise ``OSError(<message>)``. Used
to simulate provider failures (timeout, network error).
Every call is recorded on ``self.calls`` for assertions.
If no script entry matches, ``chat`` raises ``AssertionError``
so test drift is loud.
"""
def __init__(
self,
script: Optional[list[Any]] = None,
):
self.script: list[Any] = list(script or [])
self.calls: list[list[dict]] = []
def chat(self, messages: list[dict], **opts: Any) -> str:
self.calls.append(messages)
for entry in self.script:
if isinstance(entry, tuple):
match, response = entry
if match == messages:
return response
elif isinstance(entry, dict):
if entry.get("match_any"):
if "raise" in entry:
raise OSError(entry["raise"])
return entry.get("response", "")
raise AssertionError(
f"FakeProvider: no scripted response for messages={messages!r}"
)
def fake_provider_from_script_file(path: str) -> FakeProvider:
"""Build a :class:`FakeProvider` from a JSON script file.
The file is a JSON list of script entries (see
:class:`FakeProvider`). Tuples are serialised as
``[messages, response]`` 2-element lists; dicts are passed
through unchanged.
"""
with open(path, "r", encoding="utf-8") as f:
raw = json.load(f)
script: list[Any] = []
for entry in raw:
if isinstance(entry, list) and len(entry) == 2:
script.append((entry[0], entry[1]))
elif isinstance(entry, dict):
script.append(entry)
else:
raise ValueError(
f"fake_provider_from_script_file: bad entry {entry!r}"
)
return FakeProvider(script=script)
# ---------------------------------------------------------------------------
# OllamaCloudProvider — real
# ---------------------------------------------------------------------------
class OllamaCloudProvider:
"""Provider for Ollama Cloud (``https://ollama.com``).
Auth is a bearer token in the ``Authorization`` header
(the ``$OLLAMA_API_KEY`` env var). The model defaults
to ``minimax-m3:cloud`` (the user's chosen slug) but
can be overridden via the ``$LORE_LLM_MODEL`` env var
or the constructor's ``model`` kwarg.
The provider is *fail-loud*: any HTTP error, timeout, or
non-JSON response bubbles up. The extractor (the only
caller) catches and degrades to an empty result so the
graph still loads. See :mod:`lore_engine_poc.extraction`.
"""
ENDPOINT = "https://ollama.com/api/chat"
DEFAULT_MODEL = "minimax-m3:cloud"
DEFAULT_TIMEOUT = 60.0
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
timeout: Optional[float] = None,
):
self.api_key = (
api_key
if api_key is not None
else os.environ.get("OLLAMA_API_KEY")
)
if not self.api_key:
raise RuntimeError(
"OllamaCloudProvider: $OLLAMA_API_KEY is not set. "
"Either export the env var or pass api_key= explicitly."
)
self.model = (
model
if model is not None
else os.environ.get("LORE_LLM_MODEL", self.DEFAULT_MODEL)
)
if timeout is not None:
self.timeout = float(timeout)
else:
try:
self.timeout = float(
os.environ.get("LORE_LLM_TIMEOUT", self.DEFAULT_TIMEOUT)
)
except ValueError:
self.timeout = self.DEFAULT_TIMEOUT
def chat(self, messages: list[dict], **opts: Any) -> str:
body = json.dumps({
"model": self.model,
"messages": messages,
"stream": False,
}).encode()
req = urllib.request.Request(
self.ENDPOINT,
data=body,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
)
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
data = json.loads(resp.read())
return data["message"]["content"]
__all__ = [
"LLMProvider",
"FakeProvider",
"OllamaCloudProvider",
"fake_provider_from_script_file",
]