lore-engine-poc-v3/lore_engine_poc/llm.py

"""Lore Engine POC — LLM provider abstraction (slice 3).

A thin wrapper around the LLM call surface we need for
extraction. We define one ``LLMProvider`` Protocol with a
single method (``chat``) and ship two implementations:

* :class:`FakeProvider` — canned responses for tests. The
  test code scripts ``(messages, response)`` pairs; the
  provider matches the incoming messages to the script and
  returns the canned response.
* :class:`OllamaCloudProvider` — the real provider. Talks
  to ``https://ollama.com/api/chat`` over bearer-token auth
  using the ``urllib.request`` stdlib module (no new pip
  dependencies).

Why stdlib only and not LiteLLM:

* LiteLLM is great when you have many providers. We have
  one — Ollama Cloud — and one method to call.
* The auto-classifier blocked earlier pip installs of
  agent-chosen packages (see slice 2.6+). ``urllib`` is
  already in the standard library.
* The protocol stays uniform if we ever add a second
  provider (LiteLLM, anthropic, local vLLM): implement
  ``chat(messages) -> str`` and slot in.

The provider is intentionally **stateless**: one call →
one response. Stateful concerns (sessions, conversation
history, retries) live in the caller — the extractor
passes a single-message prompt and parses the single
string response.
"""

from __future__ import annotations

import json
import os
import urllib.request
from typing import Any, Callable, Optional, Protocol, runtime_checkable


# ---------------------------------------------------------------------------
# Protocol — the duck-typed contract
# ---------------------------------------------------------------------------


@runtime_checkable
class LLMProvider(Protocol):
    """A single-call LLM provider.

    ``chat`` takes an OpenAI-style ``messages`` list and
    returns the assistant message content as a string. The
    caller is responsible for parsing; the provider does
    not interpret the response.
    """

    def chat(self, messages: list[dict], **opts: Any) -> str:
        ...


# ---------------------------------------------------------------------------
# FakeProvider — for tests
# ---------------------------------------------------------------------------


class FakeProvider:
    """Canned-response provider for tests.

    The constructor takes a ``script``: a list of either:

    * ``(messages_match, response)`` tuples — match the incoming
      messages exactly against ``messages_match`` and return
      ``response``.
    * ``{"match_any": True, "response": "..."}`` dicts — match
      any messages and return ``response``. Useful for tests
      where the prompt body is large or variable.
    * ``{"match_any": True, "raise": "<message>"}`` dicts —
      match any messages and raise ``OSError(<message>)``. Used
      to simulate provider failures (timeout, network error).

    Every call is recorded on ``self.calls`` for assertions.

    If no script entry matches, ``chat`` raises ``AssertionError``
    so test drift is loud.
    """

    def __init__(
        self,
        script: Optional[list[Any]] = None,
    ):
        self.script: list[Any] = list(script or [])
        self.calls: list[list[dict]] = []

    def chat(self, messages: list[dict], **opts: Any) -> str:
        self.calls.append(messages)
        for entry in self.script:
            if isinstance(entry, tuple):
                match, response = entry
                if match == messages:
                    return response
            elif isinstance(entry, dict):
                if entry.get("match_any"):
                    if "raise" in entry:
                        raise OSError(entry["raise"])
                    return entry.get("response", "")
        raise AssertionError(
            f"FakeProvider: no scripted response for messages={messages!r}"
        )


def fake_provider_from_script_file(path: str) -> FakeProvider:
    """Build a :class:`FakeProvider` from a JSON script file.

    The file is a JSON list of script entries (see
    :class:`FakeProvider`). Tuples are serialised as
    ``[messages, response]`` 2-element lists; dicts are passed
    through unchanged.
    """
    with open(path, "r", encoding="utf-8") as f:
        raw = json.load(f)
    script: list[Any] = []
    for entry in raw:
        if isinstance(entry, list) and len(entry) == 2:
            script.append((entry[0], entry[1]))
        elif isinstance(entry, dict):
            script.append(entry)
        else:
            raise ValueError(
                f"fake_provider_from_script_file: bad entry {entry!r}"
            )
    return FakeProvider(script=script)


# ---------------------------------------------------------------------------
# OllamaCloudProvider — real
# ---------------------------------------------------------------------------


class OllamaCloudProvider:
    """Provider for Ollama Cloud (``https://ollama.com``).

    Auth is a bearer token in the ``Authorization`` header
    (the ``$OLLAMA_API_KEY`` env var). The model defaults
    to ``minimax-m3:cloud`` (the user's chosen slug) but
    can be overridden via the ``$LORE_LLM_MODEL`` env var
    or the constructor's ``model`` kwarg.

    The provider is *fail-loud*: any HTTP error, timeout, or
    non-JSON response bubbles up. The extractor (the only
    caller) catches and degrades to an empty result so the
    graph still loads. See :mod:`lore_engine_poc.extraction`.
    """

    ENDPOINT = "https://ollama.com/api/chat"
    DEFAULT_MODEL = "minimax-m3:cloud"
    DEFAULT_TIMEOUT = 60.0

    def __init__(
        self,
        api_key: Optional[str] = None,
        model: Optional[str] = None,
        timeout: Optional[float] = None,
    ):
        self.api_key = (
            api_key
            if api_key is not None
            else os.environ.get("OLLAMA_API_KEY")
        )
        if not self.api_key:
            raise RuntimeError(
                "OllamaCloudProvider: $OLLAMA_API_KEY is not set. "
                "Either export the env var or pass api_key= explicitly."
            )
        self.model = (
            model
            if model is not None
            else os.environ.get("LORE_LLM_MODEL", self.DEFAULT_MODEL)
        )
        if timeout is not None:
            self.timeout = float(timeout)
        else:
            try:
                self.timeout = float(
                    os.environ.get("LORE_LLM_TIMEOUT", self.DEFAULT_TIMEOUT)
                )
            except ValueError:
                self.timeout = self.DEFAULT_TIMEOUT

    def chat(self, messages: list[dict], **opts: Any) -> str:
        body = json.dumps({
            "model": self.model,
            "messages": messages,
            "stream": False,
        }).encode()
        req = urllib.request.Request(
            self.ENDPOINT,
            data=body,
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            },
        )
        with urllib.request.urlopen(req, timeout=self.timeout) as resp:
            data = json.loads(resp.read())
        return data["message"]["content"]


__all__ = [
    "LLMProvider",
    "FakeProvider",
    "OllamaCloudProvider",
    "fake_provider_from_script_file",
]