- docker-compose: swap postgres image to pgvector/pgvector:pg16
- postgres/init.sql: CREATE EXTENSION vector; image_embedding table
- plugins/embeddings.py: embed_images + search_images_semantic
(sentence-transformers all-MiniLM-L6-v2, lazy-loaded, pgvector <=> cosine)
- plugins/images.py: register_image kicks off background embed worker
- seed.py: seed_embeddings writes 4 embeddings for the mock images
- README: semantic image search section + T3 note
- 11 tests across 4 files, all green:
test_embeddings_plugin.py (4): schema, ordering, idempotency, stub
test_embeddings_real_model.py (3): real MiniLM, acceptance queries
test_register_image_hook.py (2): manifest row, end-to-end hook
test_seed_embeddings.py (2): writes 4, idempotent
- Includes T3 consistency plugin skeleton (4 stub tools)
248 lines
9.6 KiB
Python
248 lines
9.6 KiB
Python
"""
|
|
images plugin — MinIO-backed image recall with Neo4j entity links.
|
|
|
|
Demonstrates the "different DB for different purpose" pattern:
|
|
- Postgres holds image MANIFESTS (metadata, tags, captions) so the LLM
|
|
can decide which images to surface.
|
|
- MinIO holds the actual BYTES (PNGs, JPEGs).
|
|
- Neo4j holds the LINK from an image to the entity it depicts.
|
|
|
|
The LLM calls recall_images(entity=...) to get back a list of
|
|
{image_id, caption, object_key, presigned_url} so it can either describe
|
|
the image (from caption) or fetch the bytes (from the presigned URL).
|
|
"""
|
|
import datetime as dt
|
|
import logging
|
|
import os
|
|
import threading
|
|
from urllib.parse import urlparse, urlunparse
|
|
|
|
from server import get_postgres, get_neo4j, get_minio, REGISTRY
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
# Module-level state for the background-embedding hook. We only start
|
|
# the thread once per gateway process; subsequent register_image calls
|
|
# reuse it.
|
|
_embed_thread_started = False
|
|
_embed_thread_lock = threading.Lock()
|
|
|
|
|
|
def _start_embed_worker_once():
|
|
"""Spawn a single daemon thread that watches for new embeddings.
|
|
Imported lazily so that `from server import ...` failures during
|
|
plugin import don't crash the gateway."""
|
|
global _embed_thread_started
|
|
with _embed_thread_lock:
|
|
if _embed_thread_started:
|
|
return
|
|
# The embedding plugin is auto-loaded after this one (alphabetical:
|
|
# embeddings.py < images.py in reversed order — actually images.py
|
|
# comes first). We can't import at module top, so do it here.
|
|
def _worker():
|
|
import time
|
|
from plugins.embeddings import _do_embed_images
|
|
while True:
|
|
try:
|
|
n = _do_embed_images(limit=50)
|
|
if n:
|
|
LOG.info(f"embed_worker: wrote {n} new embeddings")
|
|
except Exception as e:
|
|
LOG.exception(f"embed_worker: {e}")
|
|
time.sleep(2)
|
|
t = threading.Thread(target=_worker, name="embed-worker", daemon=True)
|
|
t.start()
|
|
_embed_thread_started = True
|
|
LOG.info("started embed_worker background thread")
|
|
|
|
|
|
def _q_neo4j(query, params=None):
|
|
driver = get_neo4j()
|
|
with driver.session() as s:
|
|
return [dict(r) for r in s.run(query, params or {})]
|
|
|
|
|
|
def _q_pg(sql, params=None, fetch=True):
|
|
conn = get_postgres()
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql, params or ())
|
|
if fetch and cur.description:
|
|
cols = [d[0] for d in cur.description]
|
|
return [dict(zip(cols, r)) for r in cur.fetchall()]
|
|
return []
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def _presign(object_key: str) -> str:
|
|
"""
|
|
Return a presigned MinIO URL the caller can fetch bytes from.
|
|
|
|
Tricky bit: we always sign against the *internal* MinIO endpoint (the
|
|
one this container can reach), but the URL we return is rewritten to
|
|
use the *public* endpoint (the one the client will actually hit). This
|
|
works because AWS-style signatures in MinIO are computed over the
|
|
canonical request including the *Host header*, not the hostname in the
|
|
URL. We set the Host header explicitly to the public host in the
|
|
SigV4 signing step.
|
|
"""
|
|
import datetime as dt
|
|
from botocore.config import Config
|
|
import boto3
|
|
public = os.environ.get("MINIO_PUBLIC_URL", os.environ["MINIO_URL"])
|
|
internal = os.environ["MINIO_URL"]
|
|
bucket = os.environ["MINIO_BUCKET"]
|
|
if public == internal:
|
|
# Easy path: same endpoint
|
|
return get_minio().presigned_get_object(
|
|
bucket, object_key, expires=dt.timedelta(hours=1)
|
|
)
|
|
# Sign against the public endpoint directly so the signature
|
|
# matches the URL we're handing out.
|
|
parsed = urlparse(public)
|
|
s3 = boto3.client(
|
|
"s3",
|
|
endpoint_url=f"{parsed.scheme}://{parsed.hostname}:{parsed.port or 80}",
|
|
aws_access_key_id=os.environ["MINIO_ACCESS_KEY"],
|
|
aws_secret_access_key=os.environ["MINIO_SECRET_KEY"],
|
|
region_name="us-east-1",
|
|
config=Config(signature_version="s3v4"),
|
|
)
|
|
return s3.generate_presigned_url(
|
|
"get_object",
|
|
Params={"Bucket": bucket, "Key": object_key},
|
|
ExpiresIn=3600,
|
|
)
|
|
|
|
|
|
@REGISTRY.tool(
|
|
name="register_image",
|
|
description="Register an image in the manifest. Idempotent on (image_id). The object must already be in MinIO at the given object_key.",
|
|
input_schema={
|
|
"type": "object",
|
|
"properties": {
|
|
"image_id": {"type": "string", "description": "Caller-chosen unique id"},
|
|
"object_key": {"type": "string", "description": "MinIO object key, e.g. 'characters/aldric.png'"},
|
|
"entity_id": {"type": "string", "description": "Linked Neo4j entity id (Person.id, Location.id, etc.)"},
|
|
"entity_type": {"type": "string", "enum": ["Person", "Faction", "Location", "Item", "Event"]},
|
|
"caption": {"type": "string", "description": "1-3 sentences describing the image for the LLM"},
|
|
"tags": {"type": "array", "items": {"type": "string"}},
|
|
"era": {"type": "string", "description": "Canonical era slug, e.g. '2nd_age'"},
|
|
"width": {"type": "integer"},
|
|
"height": {"type": "integer"},
|
|
"bytes": {"type": "integer"},
|
|
},
|
|
"required": ["image_id", "object_key", "caption"],
|
|
},
|
|
)
|
|
def register_image(args):
|
|
_q_pg("""
|
|
INSERT INTO image_manifest
|
|
(image_id, object_key, entity_id, entity_type, caption, tags, era, width, height, bytes)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT (image_id) DO UPDATE
|
|
SET object_key = EXCLUDED.object_key,
|
|
entity_id = EXCLUDED.entity_id,
|
|
caption = EXCLUDED.caption,
|
|
tags = EXCLUDED.tags,
|
|
era = EXCLUDED.era
|
|
""", (
|
|
args["image_id"], args["object_key"], args.get("entity_id"),
|
|
args.get("entity_type"), args["caption"], args.get("tags", []),
|
|
args.get("era"), args.get("width"), args.get("height"), args.get("bytes"),
|
|
), fetch=False)
|
|
# Link in Neo4j so entity_context can see "this image depicts X"
|
|
if args.get("entity_id") and args.get("entity_type"):
|
|
_q_neo4j("""
|
|
MATCH (e {id: $entity_id})
|
|
MERGE (img:Image {id: $image_id})
|
|
ON CREATE SET img.caption = $caption, img.era = $era
|
|
MERGE (img)-[:DEPICTS]->(e)
|
|
""", {
|
|
"entity_id": args["entity_id"], "image_id": args["image_id"],
|
|
"caption": args["caption"], "era": args.get("era"),
|
|
})
|
|
# Kick off (or wake up) the background embed worker so the new image
|
|
# is searchable by `search_images_semantic` within a few seconds.
|
|
_start_embed_worker_once()
|
|
return {"registered": True, "image_id": args["image_id"]}
|
|
|
|
|
|
@REGISTRY.tool(
|
|
name="recall_images",
|
|
description="Recall images for an entity. Returns a list of {image_id, caption, tags, era, presigned_url}.",
|
|
input_schema={
|
|
"type": "object",
|
|
"properties": {
|
|
"entity_id": {"type": "string", "description": "Person.id / Location.id / etc."},
|
|
"tag": {"type": "string", "description": "Optional tag filter (e.g. 'portrait', 'battle')"},
|
|
"limit": {"type": "integer", "default": 5},
|
|
},
|
|
"required": ["entity_id"],
|
|
},
|
|
)
|
|
def recall_images(args):
|
|
if args.get("tag"):
|
|
rows = _q_pg("""
|
|
SELECT image_id, caption, tags, era, object_key
|
|
FROM image_manifest
|
|
WHERE entity_id = %s AND %s = ANY(tags)
|
|
ORDER BY uploaded_at DESC LIMIT %s
|
|
""", (args["entity_id"], args["tag"], args.get("limit", 5)))
|
|
else:
|
|
rows = _q_pg("""
|
|
SELECT image_id, caption, tags, era, object_key
|
|
FROM image_manifest
|
|
WHERE entity_id = %s
|
|
ORDER BY uploaded_at DESC LIMIT %s
|
|
""", (args["entity_id"], args.get("limit", 5)))
|
|
out = []
|
|
for r in rows:
|
|
out.append({
|
|
"image_id": r["image_id"],
|
|
"caption": r["caption"],
|
|
"tags": r["tags"],
|
|
"era": r["era"],
|
|
"presigned_url": _presign(r["object_key"]),
|
|
})
|
|
return {"entity_id": args["entity_id"], "count": len(out), "images": out}
|
|
|
|
|
|
@REGISTRY.tool(
|
|
name="search_images_by_caption",
|
|
description="Find images whose caption or tags contain a substring. Use this when the LLM doesn't know the exact entity id.",
|
|
input_schema={
|
|
"type": "object",
|
|
"properties": {
|
|
"q": {"type": "string", "description": "Substring to search for in caption or tags"},
|
|
"limit": {"type": "integer", "default": 5},
|
|
},
|
|
"required": ["q"],
|
|
},
|
|
)
|
|
def search_images_by_caption(args):
|
|
like = f"%{args['q']}%"
|
|
rows = _q_pg("""
|
|
SELECT image_id, entity_id, entity_type, caption, tags, era, object_key
|
|
FROM image_manifest
|
|
WHERE caption ILIKE %s OR EXISTS (SELECT 1 FROM unnest(tags) tag WHERE tag ILIKE %s)
|
|
ORDER BY uploaded_at DESC LIMIT %s
|
|
""", (like, like, args.get("limit", 5)))
|
|
out = []
|
|
for r in rows:
|
|
out.append({
|
|
"image_id": r["image_id"],
|
|
"entity_id": r["entity_id"],
|
|
"entity_type": r["entity_type"],
|
|
"caption": r["caption"],
|
|
"tags": r["tags"],
|
|
"era": r["era"],
|
|
"presigned_url": _presign(r["object_key"]),
|
|
})
|
|
return {"q": args["q"], "count": len(out), "images": out}
|
|
|
|
|
|
def register(registry):
|
|
pass
|