- 01_ingest.py: LORE_INGEST_LLM=1 enables LLM extraction after the
deterministic path; build_graph is now called AFTER LLM triples
merge in (the 3.4 ordering fix).
- LORE_INGEST_FAKE_LLM=1 + LORE_INGEST_FAKE_LLM_SCRIPT=path selects
FakeProvider for offline/CI runs.
- Missing OLLAMA_API_KEY degrades gracefully: stderr warning, rc=0,
deterministic graph still built (no crash, no LLM triples).
- scripts/06_llm_smoke.py: one-shot manual smoke for the real
Ollama Cloud provider; loads one NPC, runs extractor, prints
triples. Skips (rc=0, helpful message) when OLLAMA_API_KEY unset.
- FakeProvider gains dict-style {match_any, response} / {match_any,
raise} entries so tests can skip exact-prompt matching when the
body is large.
- tests/test_extraction/test_ingest_wiring.py: 8 subprocess tests
covering default-off, enabled, idempotency (x2), adds-fact,
provider-failure tolerance, bad-JSON tolerance, and missing-key
fallback.
- tests/fixtures/llm_empty_script.json: [] (used by the enabled-
path test where no triples are expected).
435/435 tests pass (was 382 pre-slice; +53). End-to-end ingest with
--skip-cognee runs cleanly on default-off path.
213 lines
7.1 KiB
Python
213 lines
7.1 KiB
Python
"""Lore Engine POC — LLM provider abstraction (slice 3).
|
|
|
|
A thin wrapper around the LLM call surface we need for
|
|
extraction. We define one ``LLMProvider`` Protocol with a
|
|
single method (``chat``) and ship two implementations:
|
|
|
|
* :class:`FakeProvider` — canned responses for tests. The
|
|
test code scripts ``(messages, response)`` pairs; the
|
|
provider matches the incoming messages to the script and
|
|
returns the canned response.
|
|
* :class:`OllamaCloudProvider` — the real provider. Talks
|
|
to ``https://ollama.com/api/chat`` over bearer-token auth
|
|
using the ``urllib.request`` stdlib module (no new pip
|
|
dependencies).
|
|
|
|
Why stdlib only and not LiteLLM:
|
|
|
|
* LiteLLM is great when you have many providers. We have
|
|
one — Ollama Cloud — and one method to call.
|
|
* The auto-classifier blocked earlier pip installs of
|
|
agent-chosen packages (see slice 2.6+). ``urllib`` is
|
|
already in the standard library.
|
|
* The protocol stays uniform if we ever add a second
|
|
provider (LiteLLM, anthropic, local vLLM): implement
|
|
``chat(messages) -> str`` and slot in.
|
|
|
|
The provider is intentionally **stateless**: one call →
|
|
one response. Stateful concerns (sessions, conversation
|
|
history, retries) live in the caller — the extractor
|
|
passes a single-message prompt and parses the single
|
|
string response.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import urllib.request
|
|
from typing import Any, Callable, Optional, Protocol, runtime_checkable
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Protocol — the duck-typed contract
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@runtime_checkable
|
|
class LLMProvider(Protocol):
|
|
"""A single-call LLM provider.
|
|
|
|
``chat`` takes an OpenAI-style ``messages`` list and
|
|
returns the assistant message content as a string. The
|
|
caller is responsible for parsing; the provider does
|
|
not interpret the response.
|
|
"""
|
|
|
|
def chat(self, messages: list[dict], **opts: Any) -> str:
|
|
...
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FakeProvider — for tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class FakeProvider:
|
|
"""Canned-response provider for tests.
|
|
|
|
The constructor takes a ``script``: a list of either:
|
|
|
|
* ``(messages_match, response)`` tuples — match the incoming
|
|
messages exactly against ``messages_match`` and return
|
|
``response``.
|
|
* ``{"match_any": True, "response": "..."}`` dicts — match
|
|
any messages and return ``response``. Useful for tests
|
|
where the prompt body is large or variable.
|
|
* ``{"match_any": True, "raise": "<message>"}`` dicts —
|
|
match any messages and raise ``OSError(<message>)``. Used
|
|
to simulate provider failures (timeout, network error).
|
|
|
|
Every call is recorded on ``self.calls`` for assertions.
|
|
|
|
If no script entry matches, ``chat`` raises ``AssertionError``
|
|
so test drift is loud.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
script: Optional[list[Any]] = None,
|
|
):
|
|
self.script: list[Any] = list(script or [])
|
|
self.calls: list[list[dict]] = []
|
|
|
|
def chat(self, messages: list[dict], **opts: Any) -> str:
|
|
self.calls.append(messages)
|
|
for entry in self.script:
|
|
if isinstance(entry, tuple):
|
|
match, response = entry
|
|
if match == messages:
|
|
return response
|
|
elif isinstance(entry, dict):
|
|
if entry.get("match_any"):
|
|
if "raise" in entry:
|
|
raise OSError(entry["raise"])
|
|
return entry.get("response", "")
|
|
raise AssertionError(
|
|
f"FakeProvider: no scripted response for messages={messages!r}"
|
|
)
|
|
|
|
|
|
def fake_provider_from_script_file(path: str) -> FakeProvider:
|
|
"""Build a :class:`FakeProvider` from a JSON script file.
|
|
|
|
The file is a JSON list of script entries (see
|
|
:class:`FakeProvider`). Tuples are serialised as
|
|
``[messages, response]`` 2-element lists; dicts are passed
|
|
through unchanged.
|
|
"""
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
raw = json.load(f)
|
|
script: list[Any] = []
|
|
for entry in raw:
|
|
if isinstance(entry, list) and len(entry) == 2:
|
|
script.append((entry[0], entry[1]))
|
|
elif isinstance(entry, dict):
|
|
script.append(entry)
|
|
else:
|
|
raise ValueError(
|
|
f"fake_provider_from_script_file: bad entry {entry!r}"
|
|
)
|
|
return FakeProvider(script=script)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# OllamaCloudProvider — real
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class OllamaCloudProvider:
|
|
"""Provider for Ollama Cloud (``https://ollama.com``).
|
|
|
|
Auth is a bearer token in the ``Authorization`` header
|
|
(the ``$OLLAMA_API_KEY`` env var). The model defaults
|
|
to ``minimax-m3:cloud`` (the user's chosen slug) but
|
|
can be overridden via the ``$LORE_LLM_MODEL`` env var
|
|
or the constructor's ``model`` kwarg.
|
|
|
|
The provider is *fail-loud*: any HTTP error, timeout, or
|
|
non-JSON response bubbles up. The extractor (the only
|
|
caller) catches and degrades to an empty result so the
|
|
graph still loads. See :mod:`lore_engine_poc.extraction`.
|
|
"""
|
|
|
|
ENDPOINT = "https://ollama.com/api/chat"
|
|
DEFAULT_MODEL = "minimax-m3:cloud"
|
|
DEFAULT_TIMEOUT = 60.0
|
|
|
|
def __init__(
|
|
self,
|
|
api_key: Optional[str] = None,
|
|
model: Optional[str] = None,
|
|
timeout: Optional[float] = None,
|
|
):
|
|
self.api_key = (
|
|
api_key
|
|
if api_key is not None
|
|
else os.environ.get("OLLAMA_API_KEY")
|
|
)
|
|
if not self.api_key:
|
|
raise RuntimeError(
|
|
"OllamaCloudProvider: $OLLAMA_API_KEY is not set. "
|
|
"Either export the env var or pass api_key= explicitly."
|
|
)
|
|
self.model = (
|
|
model
|
|
if model is not None
|
|
else os.environ.get("LORE_LLM_MODEL", self.DEFAULT_MODEL)
|
|
)
|
|
if timeout is not None:
|
|
self.timeout = float(timeout)
|
|
else:
|
|
try:
|
|
self.timeout = float(
|
|
os.environ.get("LORE_LLM_TIMEOUT", self.DEFAULT_TIMEOUT)
|
|
)
|
|
except ValueError:
|
|
self.timeout = self.DEFAULT_TIMEOUT
|
|
|
|
def chat(self, messages: list[dict], **opts: Any) -> str:
|
|
body = json.dumps({
|
|
"model": self.model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
}).encode()
|
|
req = urllib.request.Request(
|
|
self.ENDPOINT,
|
|
data=body,
|
|
headers={
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
|
|
data = json.loads(resp.read())
|
|
return data["message"]["content"]
|
|
|
|
|
|
__all__ = [
|
|
"LLMProvider",
|
|
"FakeProvider",
|
|
"OllamaCloudProvider",
|
|
"fake_provider_from_script_file",
|
|
] |