Files
warhammer-vision/server.py
2026-05-21 01:41:47 +00:00

162 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""Warhammer Vision — Identify Warhammer 40k models using vision LLMs through LiteLLM."""
import base64
import io
import json
import logging
import os
import time
from pathlib import Path
import httpx
from fastapi import FastAPI, File, Form, Request, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger(__name__)
app = FastAPI(title="Warhammer Vision")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000/v1/chat/completions")
LITELLM_API_KEY = os.environ.get("LITELLM_API_KEY", "anything")
VISION_MODEL = os.environ.get("VISION_MODEL", "gemma4-it:e4b")
MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MB
REQUEST_TIMEOUT = 120
SYSTEM_PROMPT = """You are an expert Warhammer 40,000 miniature identification system.
Analyze the provided image and identify the miniature with as much detail as possible.
Identify the following when visible:
1. **Faction** — e.g., Space Marines, Orks, Necrons, Aeldari, Tyranids, Chaos, Astra Militarum, etc.
2. **Sub-faction / Chapter / Dynasty** — e.g., Ultramarines (blue + gold), Blood Angels (red), Death Guard (green + rot), Szarekhan Dynasty (bronze + green)
3. **Unit/Model Name** — e.g., Intercessor, Skitarii Ranger, Necron Warrior, Boyz, Termagant, Plague Marine
4. **Role** — Troops, Elite, Heavy Support, HQ, Fast Attack, etc.
5. **Loadout** — visible weapons and equipment
6. **Paint scheme notes** — colors used, quality, any distinctive markings
Be specific but honest. If you're uncertain, say so and explain what you can identify vs what's unclear.
Format your response as clean markdown with clear headings."""
# Track stats
identify_count = 0
class IdentifyResponse(BaseModel):
success: bool
result: str | None = None
error: str | None = None
model_used: str | None = None
time_ms: float = 0
@app.get("/api/health")
async def health():
return {"status": "ok", "model": VISION_MODEL, "models_tested": identify_count}
@app.post("/api/identify", response_model=IdentifyResponse)
async def identify(image: UploadFile = File(...), model: str | None = Form(None)):
global identify_count
t0 = time.time()
if not image.content_type or not image.content_type.startswith("image/"):
return JSONResponse(
status_code=400,
content={"success": False, "error": "File must be an image (PNG, JPEG, WebP, etc.)"},
)
raw = await image.read()
if len(raw) > MAX_IMAGE_SIZE:
return JSONResponse(
status_code=413,
content={"success": False, "error": f"Image too large ({len(raw)} bytes). Max {MAX_IMAGE_SIZE}."},
)
mime = image.content_type or "image/jpeg"
b64 = base64.b64encode(raw).decode()
data_url = f"data:{mime};base64,{b64}"
chosen_model = model or VISION_MODEL
payload = {
"model": chosen_model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "Identify this Warhammer 40k miniature in detail:"},
{"type": "image_url", "image_url": {"url": data_url, "detail": "high"}},
],
}
],
"max_tokens": 1000,
}
try:
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
resp = await client.post(
LITELLM_URL,
json=payload,
headers={"Authorization": f"Bearer {LITELLM_API_KEY}", "Content-Type": "application/json"},
)
resp.raise_for_status()
data_out = resp.json()
content = data_out["choices"][0]["message"]["content"]
identify_count += 1
elapsed = (time.time() - t0) * 1000
log.info(
"Identified model %d: %s in %.0fms (%d input, %d output tokens)",
identify_count,
content[:80].replace("\n", " "),
elapsed,
data_out.get("usage", {}).get("prompt_tokens", 0),
data_out.get("usage", {}).get("completion_tokens", 0),
)
return IdentifyResponse(success=True, result=content, model_used=chosen_model, time_ms=round(elapsed))
except httpx.HTTPStatusError as e:
body = e.response.text[:500]
log.error("LiteLLM error %d: %s", e.response.status_code, body)
return JSONResponse(
status_code=502,
content={"success": False, "error": f"Model error: {body}", "time_ms": round((time.time() - t0) * 1000)},
)
except httpx.TimeoutException:
return JSONResponse(
status_code=504,
content={"success": False, "error": "Model request timed out", "time_ms": round((time.time() - t0) * 1000)},
)
except Exception as e:
log.exception("Unexpected error")
return JSONResponse(
status_code=500,
content={"success": False, "error": str(e), "time_ms": round((time.time() - t0) * 1000)},
)
# Serve static files (frontend)
static_dir = Path(__file__).parent / "static"
static_dir.mkdir(parents=True, exist_ok=True)
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 5173))
log.info("Starting Warhammer Vision on http://0.0.0.0:%d — using model: %s", port, VISION_MODEL)
uvicorn.run(app, host="0.0.0.0", port=port)