162 lines
5.7 KiB
Python
162 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Warhammer Vision — Identify Warhammer 40k models using vision LLMs through LiteLLM."""
|
|
|
|
import base64
|
|
import io
|
|
import json
|
|
import logging
|
|
import os
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from fastapi import FastAPI, File, Form, Request, UploadFile
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import FileResponse, JSONResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
|
log = logging.getLogger(__name__)
|
|
|
|
app = FastAPI(title="Warhammer Vision")
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000/v1/chat/completions")
|
|
LITELLM_API_KEY = os.environ.get("LITELLM_API_KEY", "anything")
|
|
VISION_MODEL = os.environ.get("VISION_MODEL", "gemma4-it:e4b")
|
|
MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MB
|
|
REQUEST_TIMEOUT = 120
|
|
|
|
SYSTEM_PROMPT = """You are an expert Warhammer 40,000 miniature identification system.
|
|
Analyze the provided image and identify the miniature with as much detail as possible.
|
|
|
|
Identify the following when visible:
|
|
1. **Faction** — e.g., Space Marines, Orks, Necrons, Aeldari, Tyranids, Chaos, Astra Militarum, etc.
|
|
2. **Sub-faction / Chapter / Dynasty** — e.g., Ultramarines (blue + gold), Blood Angels (red), Death Guard (green + rot), Szarekhan Dynasty (bronze + green)
|
|
3. **Unit/Model Name** — e.g., Intercessor, Skitarii Ranger, Necron Warrior, Boyz, Termagant, Plague Marine
|
|
4. **Role** — Troops, Elite, Heavy Support, HQ, Fast Attack, etc.
|
|
5. **Loadout** — visible weapons and equipment
|
|
6. **Paint scheme notes** — colors used, quality, any distinctive markings
|
|
|
|
Be specific but honest. If you're uncertain, say so and explain what you can identify vs what's unclear.
|
|
|
|
Format your response as clean markdown with clear headings."""
|
|
|
|
# Track stats
|
|
identify_count = 0
|
|
|
|
|
|
class IdentifyResponse(BaseModel):
|
|
success: bool
|
|
result: str | None = None
|
|
error: str | None = None
|
|
model_used: str | None = None
|
|
time_ms: float = 0
|
|
|
|
|
|
@app.get("/api/health")
|
|
async def health():
|
|
return {"status": "ok", "model": VISION_MODEL, "models_tested": identify_count}
|
|
|
|
|
|
@app.post("/api/identify", response_model=IdentifyResponse)
|
|
async def identify(image: UploadFile = File(...), model: str | None = Form(None)):
|
|
global identify_count
|
|
t0 = time.time()
|
|
|
|
if not image.content_type or not image.content_type.startswith("image/"):
|
|
return JSONResponse(
|
|
status_code=400,
|
|
content={"success": False, "error": "File must be an image (PNG, JPEG, WebP, etc.)"},
|
|
)
|
|
|
|
raw = await image.read()
|
|
if len(raw) > MAX_IMAGE_SIZE:
|
|
return JSONResponse(
|
|
status_code=413,
|
|
content={"success": False, "error": f"Image too large ({len(raw)} bytes). Max {MAX_IMAGE_SIZE}."},
|
|
)
|
|
|
|
mime = image.content_type or "image/jpeg"
|
|
b64 = base64.b64encode(raw).decode()
|
|
data_url = f"data:{mime};base64,{b64}"
|
|
|
|
chosen_model = model or VISION_MODEL
|
|
payload = {
|
|
"model": chosen_model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "Identify this Warhammer 40k miniature in detail:"},
|
|
{"type": "image_url", "image_url": {"url": data_url, "detail": "high"}},
|
|
],
|
|
}
|
|
],
|
|
"max_tokens": 1000,
|
|
}
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
|
resp = await client.post(
|
|
LITELLM_URL,
|
|
json=payload,
|
|
headers={"Authorization": f"Bearer {LITELLM_API_KEY}", "Content-Type": "application/json"},
|
|
)
|
|
resp.raise_for_status()
|
|
data_out = resp.json()
|
|
content = data_out["choices"][0]["message"]["content"]
|
|
identify_count += 1
|
|
elapsed = (time.time() - t0) * 1000
|
|
|
|
log.info(
|
|
"Identified model %d: %s in %.0fms (%d input, %d output tokens)",
|
|
identify_count,
|
|
content[:80].replace("\n", " "),
|
|
elapsed,
|
|
data_out.get("usage", {}).get("prompt_tokens", 0),
|
|
data_out.get("usage", {}).get("completion_tokens", 0),
|
|
)
|
|
|
|
return IdentifyResponse(success=True, result=content, model_used=chosen_model, time_ms=round(elapsed))
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
body = e.response.text[:500]
|
|
log.error("LiteLLM error %d: %s", e.response.status_code, body)
|
|
return JSONResponse(
|
|
status_code=502,
|
|
content={"success": False, "error": f"Model error: {body}", "time_ms": round((time.time() - t0) * 1000)},
|
|
)
|
|
except httpx.TimeoutException:
|
|
return JSONResponse(
|
|
status_code=504,
|
|
content={"success": False, "error": "Model request timed out", "time_ms": round((time.time() - t0) * 1000)},
|
|
)
|
|
except Exception as e:
|
|
log.exception("Unexpected error")
|
|
return JSONResponse(
|
|
status_code=500,
|
|
content={"success": False, "error": str(e), "time_ms": round((time.time() - t0) * 1000)},
|
|
)
|
|
|
|
|
|
# Serve static files (frontend)
|
|
static_dir = Path(__file__).parent / "static"
|
|
static_dir.mkdir(parents=True, exist_ok=True)
|
|
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
port = int(os.environ.get("PORT", 5173))
|
|
log.info("Starting Warhammer Vision on http://0.0.0.0:%d — using model: %s", port, VISION_MODEL)
|
|
uvicorn.run(app, host="0.0.0.0", port=port)
|