warhammer-vision/server.py

#!/usr/bin/env python3
"""Warhammer Vision — Identify Warhammer 40k models using vision LLMs through LiteLLM."""

import base64
import io
import json
import logging
import os
import time
from pathlib import Path

import httpx
from fastapi import FastAPI, File, Form, Request, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger(__name__)

app = FastAPI(title="Warhammer Vision")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000/v1/chat/completions")
LITELLM_API_KEY = os.environ.get("LITELLM_API_KEY", "anything")
VISION_MODEL = os.environ.get("VISION_MODEL", "gemma4-it:e4b")
MAX_IMAGE_SIZE = 10 * 1024 * 1024  # 10 MB
REQUEST_TIMEOUT = 120

SYSTEM_PROMPT = """You are an expert Warhammer 40,000 miniature identification system.
Analyze the provided image and identify the miniature with as much detail as possible.

Identify the following when visible:
1. **Faction** — e.g., Space Marines, Orks, Necrons, Aeldari, Tyranids, Chaos, Astra Militarum, etc.
2. **Sub-faction / Chapter / Dynasty** — e.g., Ultramarines (blue + gold), Blood Angels (red), Death Guard (green + rot), Szarekhan Dynasty (bronze + green)
3. **Unit/Model Name** — e.g., Intercessor, Skitarii Ranger, Necron Warrior, Boyz, Termagant, Plague Marine
4. **Role** — Troops, Elite, Heavy Support, HQ, Fast Attack, etc.
5. **Loadout** — visible weapons and equipment
6. **Paint scheme notes** — colors used, quality, any distinctive markings

Be specific but honest. If you're uncertain, say so and explain what you can identify vs what's unclear.

Format your response as clean markdown with clear headings."""

# Track stats
identify_count = 0


class IdentifyResponse(BaseModel):
    success: bool
    result: str | None = None
    error: str | None = None
    model_used: str | None = None
    time_ms: float = 0


@app.get("/api/health")
async def health():
    return {"status": "ok", "model": VISION_MODEL, "models_tested": identify_count}


@app.post("/api/identify", response_model=IdentifyResponse)
async def identify(image: UploadFile = File(...), model: str | None = Form(None)):
    global identify_count
    t0 = time.time()

    if not image.content_type or not image.content_type.startswith("image/"):
        return JSONResponse(
            status_code=400,
            content={"success": False, "error": "File must be an image (PNG, JPEG, WebP, etc.)"},
        )

    raw = await image.read()
    if len(raw) > MAX_IMAGE_SIZE:
        return JSONResponse(
            status_code=413,
            content={"success": False, "error": f"Image too large ({len(raw)} bytes). Max {MAX_IMAGE_SIZE}."},
        )

    mime = image.content_type or "image/jpeg"
    b64 = base64.b64encode(raw).decode()
    data_url = f"data:{mime};base64,{b64}"

    chosen_model = model or VISION_MODEL
    payload = {
        "model": chosen_model,
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Identify this Warhammer 40k miniature in detail:"},
                    {"type": "image_url", "image_url": {"url": data_url, "detail": "high"}},
                ],
            }
        ],
        "max_tokens": 1000,
    }

    try:
        async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
            resp = await client.post(
                LITELLM_URL,
                json=payload,
                headers={"Authorization": f"Bearer {LITELLM_API_KEY}", "Content-Type": "application/json"},
            )
            resp.raise_for_status()
            data_out = resp.json()
            content = data_out["choices"][0]["message"]["content"]
            identify_count += 1
            elapsed = (time.time() - t0) * 1000

            log.info(
                "Identified model %d: %s in %.0fms (%d input, %d output tokens)",
                identify_count,
                content[:80].replace("\n", " "),
                elapsed,
                data_out.get("usage", {}).get("prompt_tokens", 0),
                data_out.get("usage", {}).get("completion_tokens", 0),
            )

            return IdentifyResponse(success=True, result=content, model_used=chosen_model, time_ms=round(elapsed))

    except httpx.HTTPStatusError as e:
        body = e.response.text[:500]
        log.error("LiteLLM error %d: %s", e.response.status_code, body)
        return JSONResponse(
            status_code=502,
            content={"success": False, "error": f"Model error: {body}", "time_ms": round((time.time() - t0) * 1000)},
        )
    except httpx.TimeoutException:
        return JSONResponse(
            status_code=504,
            content={"success": False, "error": "Model request timed out", "time_ms": round((time.time() - t0) * 1000)},
        )
    except Exception as e:
        log.exception("Unexpected error")
        return JSONResponse(
            status_code=500,
            content={"success": False, "error": str(e), "time_ms": round((time.time() - t0) * 1000)},
        )


# Serve static files (frontend)
static_dir = Path(__file__).parent / "static"
static_dir.mkdir(parents=True, exist_ok=True)
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")


if __name__ == "__main__":
    import uvicorn

    port = int(os.environ.get("PORT", 5173))
    log.info("Starting Warhammer Vision on http://0.0.0.0:%d — using model: %s", port, VISION_MODEL)
    uvicorn.run(app, host="0.0.0.0", port=port)