- docker-compose: swap postgres image to pgvector/pgvector:pg16
- postgres/init.sql: CREATE EXTENSION vector; image_embedding table
- plugins/embeddings.py: embed_images + search_images_semantic
(sentence-transformers all-MiniLM-L6-v2, lazy-loaded, pgvector <=> cosine)
- plugins/images.py: register_image kicks off background embed worker
- seed.py: seed_embeddings writes 4 embeddings for the mock images
- README: semantic image search section + T3 note
- 11 tests across 4 files, all green:
test_embeddings_plugin.py (4): schema, ordering, idempotency, stub
test_embeddings_real_model.py (3): real MiniLM, acceptance queries
test_register_image_hook.py (2): manifest row, end-to-end hook
test_seed_embeddings.py (2): writes 4, idempotent
- Includes T3 consistency plugin skeleton (4 stub tools)
429 lines
19 KiB
Python
429 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate a high-fantasy mock world and load it into the POC stack.
|
|
|
|
Mock world: the realm of Arda, two eras (1st and 2nd Age), three factions,
|
|
ten people, two locations, four items, ten events, ten lineage edges,
|
|
a handful of trades, and four images.
|
|
|
|
This script can be run repeatedly — it's idempotent (uses MERGE in Neo4j,
|
|
ON CONFLICT in Postgres).
|
|
"""
|
|
import datetime as dt
|
|
import os
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from neo4j import GraphDatabase
|
|
import psycopg2
|
|
from minio import Minio
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
# ─── config (also used by docker-compose) ────────────────────────────────────
|
|
|
|
NEO4J_URL = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
|
|
NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
|
|
NEO4J_PASS = os.environ.get("NEO4J_PASSWORD", "lore-dev-password")
|
|
PG_URL = os.environ.get("POSTGRES_URL", "postgresql://lore:***@localhost:5432/lore")
|
|
MINIO_URL = os.environ.get("MINIO_URL", "http://localhost:9000")
|
|
MINIO_USER = os.environ.get("MINIO_ACCESS_KEY", "lorelore")
|
|
MINIO_PASS = os.environ.get("MINIO_SECRET_KEY", "lore-dev-password")
|
|
MINIO_BUCKET = os.environ.get("MINIO_BUCKET", "lore-images")
|
|
|
|
|
|
# ─── mock data ───────────────────────────────────────────────────────────────
|
|
|
|
PEOPLE = [
|
|
# (id, name, born, died, tier, culture)
|
|
("theron", "Theron Ashveil", 10, 120, "noble", "Valdorni"),
|
|
("maric", "Maric Vyr", 85, 160, "noble", "Valdorni"),
|
|
("aldric", "Aldric Raventhorne", 220, 285, "noble", "Valdorni"),
|
|
("elara", "Elara Raventhorne", 220, None, "noble", "Valdorni"),
|
|
("cael", "Cael Vyr", 160, 240, "noble", "Valdorni"),
|
|
("yssa", "Yssa Raventhorne", 165, None, "noble", "Valdorni"),
|
|
("vex", "Vex the Silent", 180, None, "commoner","Mardsvillan"),
|
|
("alessia", "Alessia Dusk", 190, None, "commoner","Mardsvillan"),
|
|
("kael", "General Kael", 200, None, "noble", "Crimson Pact"),
|
|
("guildmaster","Guildmaster Torren", 175, None, "noble", "Mardsvillan"),
|
|
]
|
|
|
|
FACTIONS = [
|
|
# (id, name, founded, dissolved)
|
|
("house_vyr", "House Vyr", 85, None),
|
|
("crimson_pact", "The Crimson Pact", 150, None),
|
|
("merchants", "Merchants Guild", 100, None),
|
|
]
|
|
|
|
LOCATIONS = [
|
|
# (id, name)
|
|
("valdorn", "Valdorn"),
|
|
("mardsville", "Mardsville"),
|
|
("thornwall", "Thornwall Keep"),
|
|
("black_spire", "Black Spire Pass"),
|
|
]
|
|
|
|
ERAS = [
|
|
# (slug, name, start, end, parent)
|
|
("1st_age", "First Age", 0, 100, None),
|
|
("2nd_age", "Second Age", 100, 300, None),
|
|
("2nd_age.age_of_iron", "Age of Iron", 150, 300, "2nd_age"),
|
|
]
|
|
|
|
EVENTS = [
|
|
# (id, name, in_fiction_time, era_slug, location_id)
|
|
("e1", "Battle of Black Spire", "2nd_age.year_232", "2nd_age", "black_spire"),
|
|
("e2", "Founding of House Vyr", "2nd_age.year_85", "2nd_age", "valdorn"),
|
|
("e3", "Crimson Pact Founded", "2nd_age.year_150", "2nd_age", "mardsville"),
|
|
("e4", "Aldric becomes lord", "2nd_age.year_240", "2nd_age", "thornwall"),
|
|
("e5", "The Mardsville Heist", "2nd_age.year_265", "2nd_age", "mardsville"),
|
|
("e6", "Crimson Pact attacks Thornwall", "2nd_age.year_280", "2nd_age", "thornwall"),
|
|
]
|
|
|
|
ITEMS = [
|
|
# (id, name, kind)
|
|
("sword_eventide", "Sword of Eventide", "weapon"),
|
|
("pale_ledger", "The Pale Ledger", "document"),
|
|
("ruby_eye", "Ruby Eye of Kael", "artifact"),
|
|
("silver_locket", "Elara's Locket", "jewelry"),
|
|
]
|
|
|
|
# Time-bounded relations (the interesting ones — not just static)
|
|
RELATIONS = [
|
|
# (from_kind, from_id, rel, to_kind, to_id, valid_from, valid_until)
|
|
("Person", "theron", "PARENT_OF", "Person", "maric", "1st_age.year_50", "2nd_age.year_120"),
|
|
("Person", "maric", "PARENT_OF", "Person", "cael", "2nd_age.year_180", None),
|
|
("Person", "cael", "PARENT_OF", "Person", "aldric", "2nd_age.year_240", "2nd_age.year_285"),
|
|
("Person", "yssa", "PARENT_OF", "Person", "aldric", "2nd_age.year_240", "2nd_age.year_285"),
|
|
("Person", "aldric", "SPOUSE_OF", "Person", "elara", "2nd_age.year_250", None),
|
|
("Person", "theron", "FOUNDED", "Faction", "house_vyr", "1st_age.year_85", None),
|
|
("Person", "maric", "MEMBER_OF", "Faction", "house_vyr", "2nd_age.year_100", "2nd_age.year_160"),
|
|
("Person", "aldric", "MEMBER_OF", "Faction", "house_vyr", "2nd_age.year_240", None),
|
|
("Person", "aldric", "RULES", "Location","thornwall","2nd_age.year_240", "2nd_age.year_285"),
|
|
("Person", "kael", "MEMBER_OF", "Faction", "crimson_pact","2nd_age.year_200", None),
|
|
("Faction","crimson_pact","RULES","Location", "mardsville","2nd_age.year_160", "2nd_age.year_232"),
|
|
("Faction","house_vyr","ALLIED_WITH","Faction","merchants", "2nd_age.year_100", None),
|
|
("Faction","crimson_pact","ENEMY_OF","Faction","house_vyr", "2nd_age.year_150", None),
|
|
("Person","aldric","POSSESSES","Item","sword_eventide", "2nd_age.year_245", None),
|
|
("Person","elara","POSSESSES","Item","silver_locket", "2nd_age.year_250", None),
|
|
("Location","thornwall","PART_OF","Location","valdorn", None, None),
|
|
("Location","mardsville","PART_OF","Location","valdorn", None, None),
|
|
("Event","e1","PARTICIPATED_IN","Person","aldric", "2nd_age.year_232", "2nd_age.year_232"),
|
|
("Event","e1","PARTICIPATED_IN","Person","kael", "2nd_age.year_232", "2nd_age.year_232"),
|
|
("Event","e5","PARTICIPATED_IN","Person","vex", "2nd_age.year_265", "2nd_age.year_265"),
|
|
("Event","e6","PARTICIPATED_IN","Person","aldric", "2nd_age.year_280", "2nd_age.year_280"),
|
|
]
|
|
|
|
# Lineage group
|
|
LINEAGES = [
|
|
("house_vyr_bloodline", "House Vyr (bloodline)", "theron"),
|
|
]
|
|
|
|
# Trade log entries (Postgres)
|
|
TRADES = [
|
|
# (buyer, seller, item, qty, unit, unit_price, in_fiction_time, location, notes)
|
|
("aldric", "guildmaster", "pale_ledger", 1, "gp", 500, "2nd_age.year_265", "mardsville", "Aldric bought the Pale Ledger via Vex"),
|
|
("elara", "guildmaster", "silver_locket", 1, "gp", 120, "2nd_age.year_255", "mardsville", "Gift for Elara"),
|
|
("kael", "guildmaster", "ruby_eye", 1, "gp", 900, "2nd_age.year_270", "mardsville", "Crimson Pact acquisition"),
|
|
]
|
|
|
|
# Images
|
|
IMAGES = [
|
|
# (image_id, object_key, entity_id, entity_type, caption, tags, era)
|
|
("img_aldric_portrait", "characters/aldric_portrait.png", "aldric", "Person",
|
|
"Portrait of Aldric Raventhorne, Lord of Thornwall. Middle-aged, dark hair, a scar above the left eye.",
|
|
["portrait", "noble", "thornwall"], "2nd_age"),
|
|
("img_vex_portrait", "characters/vex_portrait.png", "vex", "Person",
|
|
"Vex the Silent, a hooded thief from the alleys of Mardsville. Face mostly in shadow.",
|
|
["portrait", "thief", "mardsville"], "2nd_age"),
|
|
("img_thornwall", "places/thornwall.png", "thornwall", "Location",
|
|
"Thornwall Keep at dawn. The banners of House Vyr fly from the battlements.",
|
|
["keep", "house_vyr", "dawn"], "2nd_age"),
|
|
("img_battle", "events/battle_of_black_spire.png", "e1", "Event",
|
|
"The Battle of Black Spire, where Aldric defeated General Kael. House Vyr's banners hold the ridge.",
|
|
["battle", "aldric", "kael", "house_vyr"], "2nd_age"),
|
|
]
|
|
|
|
|
|
# ─── helpers ─────────────────────────────────────────────────────────────────
|
|
|
|
def load_neo4j():
|
|
print(f"[neo4j] connecting to {NEO4J_URL}")
|
|
d = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASS))
|
|
# wait for neo4j
|
|
for i in range(30):
|
|
try:
|
|
d.verify_connectivity()
|
|
return d
|
|
except Exception as e:
|
|
print(f"[neo4j] not ready ({e}); retry {i}")
|
|
time.sleep(2)
|
|
raise RuntimeError("neo4j never came up")
|
|
|
|
|
|
def load_postgres():
|
|
print(f"[postgres] connecting to {PG_URL}")
|
|
for i in range(30):
|
|
try:
|
|
return psycopg2.connect(PG_URL)
|
|
except Exception as e:
|
|
print(f"[postgres] not ready ({e}); retry {i}")
|
|
time.sleep(2)
|
|
raise RuntimeError("postgres never came up")
|
|
|
|
|
|
def load_minio():
|
|
print(f"[minio] connecting to {MINIO_URL}")
|
|
for i in range(30):
|
|
try:
|
|
c = Minio(MINIO_URL.replace("http://", ""),
|
|
access_key=MINIO_USER, secret_key=MINIO_PASS, secure=False)
|
|
# Make sure bucket exists
|
|
if not c.bucket_exists(MINIO_BUCKET):
|
|
c.make_bucket(MINIO_BUCKET)
|
|
return c
|
|
except Exception as e:
|
|
print(f"[minio] not ready ({e}); retry {i}")
|
|
time.sleep(2)
|
|
raise RuntimeError("minio never came up")
|
|
|
|
|
|
# ─── seeder functions ────────────────────────────────────────────────────────
|
|
|
|
def seed_neo4j(driver):
|
|
with driver.session() as s:
|
|
# Constraints
|
|
for label in ["Person", "Faction", "Location", "Item", "Event", "Era", "Lineage"]:
|
|
s.run(f"CREATE CONSTRAINT IF NOT EXISTS FOR (n:{label}) REQUIRE n.id IS UNIQUE")
|
|
s.run("CREATE CONSTRAINT era_slug IF NOT EXISTS FOR (e:Era) REQUIRE e.slug IS UNIQUE")
|
|
|
|
# Eras
|
|
for slug, name, start, end, parent in ERAS:
|
|
s.run("""
|
|
MERGE (e:Era {slug: $slug})
|
|
SET e.name = $name, e.start = $start, e.end = $end, e.parent_slug = $parent
|
|
""", slug=slug, name=name, start=start, end=end, parent=parent)
|
|
for slug, _, _, _, parent in ERAS:
|
|
if parent:
|
|
s.run("""
|
|
MATCH (child:Era {slug: $slug}), (parent:Era {slug: $p})
|
|
MERGE (child)-[:PART_OF]->(parent)
|
|
""", slug=slug, p=parent)
|
|
print(f"[neo4j] seeded {len(ERAS)} eras")
|
|
|
|
# People
|
|
for pid, name, born, died, tier, culture in PEOPLE:
|
|
s.run("""
|
|
MERGE (p:Person {id: $pid})
|
|
SET p.name = $name, p.born = $born, p.died = $died,
|
|
p.tier = $tier, p.culture = $culture
|
|
""", pid=pid, name=name, born=born, died=died, tier=tier, culture=culture)
|
|
print(f"[neo4j] seeded {len(PEOPLE)} people")
|
|
|
|
# Factions
|
|
for fid, name, founded, dissolved in FACTIONS:
|
|
s.run("""
|
|
MERGE (f:Faction {id: $fid})
|
|
SET f.name = $name, f.founded = $founded, f.dissolved = $dissolved
|
|
""", fid=fid, name=name, founded=founded, dissolved=dissolved)
|
|
print(f"[neo4j] seeded {len(FACTIONS)} factions")
|
|
|
|
# Locations
|
|
for lid, name in LOCATIONS:
|
|
s.run("MERGE (l:Location {id: $lid}) SET l.name = $name",
|
|
lid=lid, name=name)
|
|
print(f"[neo4j] seeded {len(LOCATIONS)} locations")
|
|
|
|
# Items
|
|
for iid, name, kind in ITEMS:
|
|
s.run("MERGE (i:Item {id: $iid}) SET i.name = $name, i.kind = $kind",
|
|
iid=iid, name=name, kind=kind)
|
|
print(f"[neo4j] seeded {len(ITEMS)} items")
|
|
|
|
# Events
|
|
for eid, name, when, era_slug, loc_id in EVENTS:
|
|
s.run("""
|
|
MERGE (e:Event {id: $eid})
|
|
SET e.name = $name, e.in_fiction_time = $when
|
|
WITH e
|
|
MATCH (era:Era {slug: $era_slug})
|
|
MERGE (e)-[:OCCURRED_DURING]->(era)
|
|
WITH e
|
|
MATCH (l:Location {id: $loc_id})
|
|
MERGE (e)-[:OCCURRED_AT]->(l)
|
|
""", eid=eid, name=name, when=when, era_slug=era_slug, loc_id=loc_id)
|
|
print(f"[neo4j] seeded {len(EVENTS)} events")
|
|
|
|
# Lineages
|
|
for lin_id, name, founder in LINEAGES:
|
|
s.run("""
|
|
MERGE (l:Lineage {id: $lin_id})
|
|
SET l.name = $name
|
|
WITH l
|
|
MATCH (f:Person {id: $founder})
|
|
MERGE (l)-[:FOUNDED_BY]->(f)
|
|
""", lin_id=lin_id, name=name, founder=founder)
|
|
# Add all Vyr-lineage people
|
|
for pid, *_ in PEOPLE:
|
|
if pid in {"theron", "maric", "cael", "aldric"}:
|
|
s.run("""
|
|
MATCH (l:Lineage {id: $lin_id}), (p:Person {id: $pid})
|
|
MERGE (p)-[:MEMBER_OF]->(l)
|
|
""", lin_id=lin_id, pid=pid)
|
|
print(f"[neo4j] seeded {len(LINEAGES)} lineages")
|
|
|
|
# Time-bounded relations
|
|
for fk, fid, rel, tk, tid, vf, vu in RELATIONS:
|
|
s.run(f"""
|
|
MATCH (a {{id: $fid}})
|
|
MATCH (b {{id: $tid}})
|
|
MERGE (a)-[r:`{rel}`]->(b)
|
|
SET r.valid_from = $vf, r.valid_until = $vu
|
|
""", fid=fid, tid=tid, vf=vf, vu=vu)
|
|
print(f"[neo4j] seeded {len(RELATIONS)} time-bounded relations")
|
|
|
|
|
|
def seed_postgres(conn):
|
|
with conn.cursor() as cur:
|
|
for buyer, seller, item, qty, unit, price, when, loc, notes in TRADES:
|
|
cur.execute("""
|
|
INSERT INTO trade_log
|
|
(buyer_id, seller_id, item_id, quantity, unit, unit_price, total_price,
|
|
location_id, in_fiction_time, notes)
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
ON CONFLICT DO NOTHING
|
|
""", (buyer, seller, item, qty, unit, price, qty * price, loc, when, notes))
|
|
conn.commit()
|
|
print(f"[postgres] seeded {len(TRADES)} trade_log rows")
|
|
|
|
|
|
def make_placeholder_image(text: str, color: tuple) -> Image.Image:
|
|
"""Generate a simple 512x768 placeholder image with text on a colored background."""
|
|
img = Image.new("RGB", (512, 768), color=color)
|
|
d = ImageDraw.Draw(img)
|
|
try:
|
|
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf", 36)
|
|
except Exception:
|
|
font = ImageFont.load_default()
|
|
# Wrap text roughly
|
|
lines = []
|
|
words = text.split()
|
|
line = ""
|
|
for w in words:
|
|
if len(line) + len(w) + 1 > 24:
|
|
lines.append(line)
|
|
line = w
|
|
else:
|
|
line = (line + " " + w).strip()
|
|
if line:
|
|
lines.append(line)
|
|
y = 280
|
|
for ln in lines[:6]:
|
|
bbox = d.textbbox((0, 0), ln, font=font)
|
|
w = bbox[2] - bbox[0]
|
|
d.text(((512 - w) // 2, y), ln, fill=(255, 255, 255), font=font)
|
|
y += 60
|
|
d.text((20, 720), "lore-engine-poc mock", fill=(180, 180, 180), font=font)
|
|
return img
|
|
|
|
|
|
def seed_minio(client, pg_conn):
|
|
palette = {
|
|
"Person": (60, 40, 90), # purple
|
|
"Location": (40, 70, 50), # dark green
|
|
"Event": (110, 40, 30), # dark red
|
|
"Item": (110, 90, 20), # gold
|
|
"Faction": (50, 50, 80), # slate
|
|
}
|
|
with pg_conn.cursor() as cur:
|
|
for image_id, object_key, entity_id, entity_type, caption, tags, era in IMAGES:
|
|
# 1. Generate + upload the image bytes
|
|
img = make_placeholder_image(caption, palette.get(entity_type, (50, 50, 50)))
|
|
tmp = f"/tmp/{image_id}.png"
|
|
img.save(tmp, "PNG")
|
|
size = Path(tmp).stat().st_size
|
|
client.fput_object(MINIO_BUCKET, object_key, tmp, content_type="image/png")
|
|
# 2. Register manifest in Postgres
|
|
cur.execute("""
|
|
INSERT INTO image_manifest
|
|
(image_id, object_key, entity_id, entity_type, caption, tags, era, width, height, bytes)
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
ON CONFLICT (image_id) DO UPDATE
|
|
SET object_key = EXCLUDED.object_key,
|
|
caption = EXCLUDED.caption,
|
|
tags = EXCLUDED.tags
|
|
""", (image_id, object_key, entity_id, entity_type, caption, tags, era,
|
|
img.width, img.height, size))
|
|
os.unlink(tmp)
|
|
pg_conn.commit()
|
|
print(f"[minio+postgres] seeded {len(IMAGES)} images")
|
|
# 4. Compute and store embeddings for the 4 mock images so
|
|
# `search_images_semantic` works out of the box.
|
|
seed_embeddings(pg)
|
|
|
|
|
|
def seed_embeddings(pg_conn):
|
|
"""Idempotent: compute + store a 384-dim embedding for each manifest row
|
|
that doesn't have one yet. Requires sentence-transformers; the model
|
|
is downloaded on first use (~80MB) and cached under ~/.cache/torch."""
|
|
try:
|
|
from sentence_transformers import SentenceTransformer
|
|
except ImportError:
|
|
print("[embeddings] sentence-transformers not installed — skipping")
|
|
return
|
|
print("[embeddings] loading model all-MiniLM-L6-v2 (~80MB, one-time)...")
|
|
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
|
with pg_conn.cursor() as cur:
|
|
# Ensure the embedding table exists (mirrors init.sql).
|
|
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
|
cur.execute("""
|
|
CREATE TABLE IF NOT EXISTS image_embedding (
|
|
image_id TEXT PRIMARY KEY REFERENCES image_manifest(image_id) ON DELETE CASCADE,
|
|
embedding vector(384) NOT NULL,
|
|
embedded_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
""")
|
|
cur.execute("""
|
|
SELECT m.image_id, m.caption
|
|
FROM image_manifest m
|
|
LEFT JOIN image_embedding e ON e.image_id = m.image_id
|
|
WHERE e.image_id IS NULL
|
|
""")
|
|
rows = cur.fetchall()
|
|
if not rows:
|
|
print("[embeddings] all images already embedded")
|
|
return
|
|
image_ids = [r[0] for r in rows]
|
|
captions = [r[1] for r in rows]
|
|
vectors = model.encode(captions, convert_to_numpy=True, show_progress_bar=False)
|
|
with pg_conn.cursor() as cur:
|
|
for image_id, vec in zip(image_ids, vectors):
|
|
vec_str = "[" + ",".join(f"{x:.6f}" for x in vec.tolist()) + "]"
|
|
cur.execute(
|
|
"INSERT INTO image_embedding (image_id, embedding) VALUES (%s, %s::vector) "
|
|
"ON CONFLICT (image_id) DO UPDATE SET embedding = EXCLUDED.embedding, embedded_at = now();",
|
|
(image_id, vec_str),
|
|
)
|
|
pg_conn.commit()
|
|
print(f"[embeddings] wrote {len(rows)} embeddings")
|
|
|
|
|
|
# ─── main ────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
driver = load_neo4j()
|
|
pg = load_postgres()
|
|
minio = load_minio()
|
|
|
|
seed_neo4j(driver)
|
|
seed_postgres(pg)
|
|
seed_minio(minio, pg)
|
|
|
|
pg.close()
|
|
driver.close()
|
|
print("\n✅ mock world loaded — try the MCP gateway at http://localhost:8765/mcp")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|