Files
lore-engine-poc/seed.py
Hermes add264eb04 T2: pgvector image embeddings — plugin, schema, seed, hook, tests
- docker-compose: swap postgres image to pgvector/pgvector:pg16
- postgres/init.sql: CREATE EXTENSION vector; image_embedding table
- plugins/embeddings.py: embed_images + search_images_semantic
  (sentence-transformers all-MiniLM-L6-v2, lazy-loaded, pgvector <=> cosine)
- plugins/images.py: register_image kicks off background embed worker
- seed.py: seed_embeddings writes 4 embeddings for the mock images
- README: semantic image search section + T3 note
- 11 tests across 4 files, all green:
    test_embeddings_plugin.py (4): schema, ordering, idempotency, stub
    test_embeddings_real_model.py (3): real MiniLM, acceptance queries
    test_register_image_hook.py (2): manifest row, end-to-end hook
    test_seed_embeddings.py (2): writes 4, idempotent
- Includes T3 consistency plugin skeleton (4 stub tools)
2026-06-16 14:30:10 +00:00

429 lines
19 KiB
Python

#!/usr/bin/env python3
"""
Generate a high-fantasy mock world and load it into the POC stack.
Mock world: the realm of Arda, two eras (1st and 2nd Age), three factions,
ten people, two locations, four items, ten events, ten lineage edges,
a handful of trades, and four images.
This script can be run repeatedly — it's idempotent (uses MERGE in Neo4j,
ON CONFLICT in Postgres).
"""
import datetime as dt
import os
import sys
import time
from pathlib import Path
from neo4j import GraphDatabase
import psycopg2
from minio import Minio
from PIL import Image, ImageDraw, ImageFont
# ─── config (also used by docker-compose) ────────────────────────────────────
NEO4J_URL = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
NEO4J_PASS = os.environ.get("NEO4J_PASSWORD", "lore-dev-password")
PG_URL = os.environ.get("POSTGRES_URL", "postgresql://lore:***@localhost:5432/lore")
MINIO_URL = os.environ.get("MINIO_URL", "http://localhost:9000")
MINIO_USER = os.environ.get("MINIO_ACCESS_KEY", "lorelore")
MINIO_PASS = os.environ.get("MINIO_SECRET_KEY", "lore-dev-password")
MINIO_BUCKET = os.environ.get("MINIO_BUCKET", "lore-images")
# ─── mock data ───────────────────────────────────────────────────────────────
PEOPLE = [
# (id, name, born, died, tier, culture)
("theron", "Theron Ashveil", 10, 120, "noble", "Valdorni"),
("maric", "Maric Vyr", 85, 160, "noble", "Valdorni"),
("aldric", "Aldric Raventhorne", 220, 285, "noble", "Valdorni"),
("elara", "Elara Raventhorne", 220, None, "noble", "Valdorni"),
("cael", "Cael Vyr", 160, 240, "noble", "Valdorni"),
("yssa", "Yssa Raventhorne", 165, None, "noble", "Valdorni"),
("vex", "Vex the Silent", 180, None, "commoner","Mardsvillan"),
("alessia", "Alessia Dusk", 190, None, "commoner","Mardsvillan"),
("kael", "General Kael", 200, None, "noble", "Crimson Pact"),
("guildmaster","Guildmaster Torren", 175, None, "noble", "Mardsvillan"),
]
FACTIONS = [
# (id, name, founded, dissolved)
("house_vyr", "House Vyr", 85, None),
("crimson_pact", "The Crimson Pact", 150, None),
("merchants", "Merchants Guild", 100, None),
]
LOCATIONS = [
# (id, name)
("valdorn", "Valdorn"),
("mardsville", "Mardsville"),
("thornwall", "Thornwall Keep"),
("black_spire", "Black Spire Pass"),
]
ERAS = [
# (slug, name, start, end, parent)
("1st_age", "First Age", 0, 100, None),
("2nd_age", "Second Age", 100, 300, None),
("2nd_age.age_of_iron", "Age of Iron", 150, 300, "2nd_age"),
]
EVENTS = [
# (id, name, in_fiction_time, era_slug, location_id)
("e1", "Battle of Black Spire", "2nd_age.year_232", "2nd_age", "black_spire"),
("e2", "Founding of House Vyr", "2nd_age.year_85", "2nd_age", "valdorn"),
("e3", "Crimson Pact Founded", "2nd_age.year_150", "2nd_age", "mardsville"),
("e4", "Aldric becomes lord", "2nd_age.year_240", "2nd_age", "thornwall"),
("e5", "The Mardsville Heist", "2nd_age.year_265", "2nd_age", "mardsville"),
("e6", "Crimson Pact attacks Thornwall", "2nd_age.year_280", "2nd_age", "thornwall"),
]
ITEMS = [
# (id, name, kind)
("sword_eventide", "Sword of Eventide", "weapon"),
("pale_ledger", "The Pale Ledger", "document"),
("ruby_eye", "Ruby Eye of Kael", "artifact"),
("silver_locket", "Elara's Locket", "jewelry"),
]
# Time-bounded relations (the interesting ones — not just static)
RELATIONS = [
# (from_kind, from_id, rel, to_kind, to_id, valid_from, valid_until)
("Person", "theron", "PARENT_OF", "Person", "maric", "1st_age.year_50", "2nd_age.year_120"),
("Person", "maric", "PARENT_OF", "Person", "cael", "2nd_age.year_180", None),
("Person", "cael", "PARENT_OF", "Person", "aldric", "2nd_age.year_240", "2nd_age.year_285"),
("Person", "yssa", "PARENT_OF", "Person", "aldric", "2nd_age.year_240", "2nd_age.year_285"),
("Person", "aldric", "SPOUSE_OF", "Person", "elara", "2nd_age.year_250", None),
("Person", "theron", "FOUNDED", "Faction", "house_vyr", "1st_age.year_85", None),
("Person", "maric", "MEMBER_OF", "Faction", "house_vyr", "2nd_age.year_100", "2nd_age.year_160"),
("Person", "aldric", "MEMBER_OF", "Faction", "house_vyr", "2nd_age.year_240", None),
("Person", "aldric", "RULES", "Location","thornwall","2nd_age.year_240", "2nd_age.year_285"),
("Person", "kael", "MEMBER_OF", "Faction", "crimson_pact","2nd_age.year_200", None),
("Faction","crimson_pact","RULES","Location", "mardsville","2nd_age.year_160", "2nd_age.year_232"),
("Faction","house_vyr","ALLIED_WITH","Faction","merchants", "2nd_age.year_100", None),
("Faction","crimson_pact","ENEMY_OF","Faction","house_vyr", "2nd_age.year_150", None),
("Person","aldric","POSSESSES","Item","sword_eventide", "2nd_age.year_245", None),
("Person","elara","POSSESSES","Item","silver_locket", "2nd_age.year_250", None),
("Location","thornwall","PART_OF","Location","valdorn", None, None),
("Location","mardsville","PART_OF","Location","valdorn", None, None),
("Event","e1","PARTICIPATED_IN","Person","aldric", "2nd_age.year_232", "2nd_age.year_232"),
("Event","e1","PARTICIPATED_IN","Person","kael", "2nd_age.year_232", "2nd_age.year_232"),
("Event","e5","PARTICIPATED_IN","Person","vex", "2nd_age.year_265", "2nd_age.year_265"),
("Event","e6","PARTICIPATED_IN","Person","aldric", "2nd_age.year_280", "2nd_age.year_280"),
]
# Lineage group
LINEAGES = [
("house_vyr_bloodline", "House Vyr (bloodline)", "theron"),
]
# Trade log entries (Postgres)
TRADES = [
# (buyer, seller, item, qty, unit, unit_price, in_fiction_time, location, notes)
("aldric", "guildmaster", "pale_ledger", 1, "gp", 500, "2nd_age.year_265", "mardsville", "Aldric bought the Pale Ledger via Vex"),
("elara", "guildmaster", "silver_locket", 1, "gp", 120, "2nd_age.year_255", "mardsville", "Gift for Elara"),
("kael", "guildmaster", "ruby_eye", 1, "gp", 900, "2nd_age.year_270", "mardsville", "Crimson Pact acquisition"),
]
# Images
IMAGES = [
# (image_id, object_key, entity_id, entity_type, caption, tags, era)
("img_aldric_portrait", "characters/aldric_portrait.png", "aldric", "Person",
"Portrait of Aldric Raventhorne, Lord of Thornwall. Middle-aged, dark hair, a scar above the left eye.",
["portrait", "noble", "thornwall"], "2nd_age"),
("img_vex_portrait", "characters/vex_portrait.png", "vex", "Person",
"Vex the Silent, a hooded thief from the alleys of Mardsville. Face mostly in shadow.",
["portrait", "thief", "mardsville"], "2nd_age"),
("img_thornwall", "places/thornwall.png", "thornwall", "Location",
"Thornwall Keep at dawn. The banners of House Vyr fly from the battlements.",
["keep", "house_vyr", "dawn"], "2nd_age"),
("img_battle", "events/battle_of_black_spire.png", "e1", "Event",
"The Battle of Black Spire, where Aldric defeated General Kael. House Vyr's banners hold the ridge.",
["battle", "aldric", "kael", "house_vyr"], "2nd_age"),
]
# ─── helpers ─────────────────────────────────────────────────────────────────
def load_neo4j():
print(f"[neo4j] connecting to {NEO4J_URL}")
d = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASS))
# wait for neo4j
for i in range(30):
try:
d.verify_connectivity()
return d
except Exception as e:
print(f"[neo4j] not ready ({e}); retry {i}")
time.sleep(2)
raise RuntimeError("neo4j never came up")
def load_postgres():
print(f"[postgres] connecting to {PG_URL}")
for i in range(30):
try:
return psycopg2.connect(PG_URL)
except Exception as e:
print(f"[postgres] not ready ({e}); retry {i}")
time.sleep(2)
raise RuntimeError("postgres never came up")
def load_minio():
print(f"[minio] connecting to {MINIO_URL}")
for i in range(30):
try:
c = Minio(MINIO_URL.replace("http://", ""),
access_key=MINIO_USER, secret_key=MINIO_PASS, secure=False)
# Make sure bucket exists
if not c.bucket_exists(MINIO_BUCKET):
c.make_bucket(MINIO_BUCKET)
return c
except Exception as e:
print(f"[minio] not ready ({e}); retry {i}")
time.sleep(2)
raise RuntimeError("minio never came up")
# ─── seeder functions ────────────────────────────────────────────────────────
def seed_neo4j(driver):
with driver.session() as s:
# Constraints
for label in ["Person", "Faction", "Location", "Item", "Event", "Era", "Lineage"]:
s.run(f"CREATE CONSTRAINT IF NOT EXISTS FOR (n:{label}) REQUIRE n.id IS UNIQUE")
s.run("CREATE CONSTRAINT era_slug IF NOT EXISTS FOR (e:Era) REQUIRE e.slug IS UNIQUE")
# Eras
for slug, name, start, end, parent in ERAS:
s.run("""
MERGE (e:Era {slug: $slug})
SET e.name = $name, e.start = $start, e.end = $end, e.parent_slug = $parent
""", slug=slug, name=name, start=start, end=end, parent=parent)
for slug, _, _, _, parent in ERAS:
if parent:
s.run("""
MATCH (child:Era {slug: $slug}), (parent:Era {slug: $p})
MERGE (child)-[:PART_OF]->(parent)
""", slug=slug, p=parent)
print(f"[neo4j] seeded {len(ERAS)} eras")
# People
for pid, name, born, died, tier, culture in PEOPLE:
s.run("""
MERGE (p:Person {id: $pid})
SET p.name = $name, p.born = $born, p.died = $died,
p.tier = $tier, p.culture = $culture
""", pid=pid, name=name, born=born, died=died, tier=tier, culture=culture)
print(f"[neo4j] seeded {len(PEOPLE)} people")
# Factions
for fid, name, founded, dissolved in FACTIONS:
s.run("""
MERGE (f:Faction {id: $fid})
SET f.name = $name, f.founded = $founded, f.dissolved = $dissolved
""", fid=fid, name=name, founded=founded, dissolved=dissolved)
print(f"[neo4j] seeded {len(FACTIONS)} factions")
# Locations
for lid, name in LOCATIONS:
s.run("MERGE (l:Location {id: $lid}) SET l.name = $name",
lid=lid, name=name)
print(f"[neo4j] seeded {len(LOCATIONS)} locations")
# Items
for iid, name, kind in ITEMS:
s.run("MERGE (i:Item {id: $iid}) SET i.name = $name, i.kind = $kind",
iid=iid, name=name, kind=kind)
print(f"[neo4j] seeded {len(ITEMS)} items")
# Events
for eid, name, when, era_slug, loc_id in EVENTS:
s.run("""
MERGE (e:Event {id: $eid})
SET e.name = $name, e.in_fiction_time = $when
WITH e
MATCH (era:Era {slug: $era_slug})
MERGE (e)-[:OCCURRED_DURING]->(era)
WITH e
MATCH (l:Location {id: $loc_id})
MERGE (e)-[:OCCURRED_AT]->(l)
""", eid=eid, name=name, when=when, era_slug=era_slug, loc_id=loc_id)
print(f"[neo4j] seeded {len(EVENTS)} events")
# Lineages
for lin_id, name, founder in LINEAGES:
s.run("""
MERGE (l:Lineage {id: $lin_id})
SET l.name = $name
WITH l
MATCH (f:Person {id: $founder})
MERGE (l)-[:FOUNDED_BY]->(f)
""", lin_id=lin_id, name=name, founder=founder)
# Add all Vyr-lineage people
for pid, *_ in PEOPLE:
if pid in {"theron", "maric", "cael", "aldric"}:
s.run("""
MATCH (l:Lineage {id: $lin_id}), (p:Person {id: $pid})
MERGE (p)-[:MEMBER_OF]->(l)
""", lin_id=lin_id, pid=pid)
print(f"[neo4j] seeded {len(LINEAGES)} lineages")
# Time-bounded relations
for fk, fid, rel, tk, tid, vf, vu in RELATIONS:
s.run(f"""
MATCH (a {{id: $fid}})
MATCH (b {{id: $tid}})
MERGE (a)-[r:`{rel}`]->(b)
SET r.valid_from = $vf, r.valid_until = $vu
""", fid=fid, tid=tid, vf=vf, vu=vu)
print(f"[neo4j] seeded {len(RELATIONS)} time-bounded relations")
def seed_postgres(conn):
with conn.cursor() as cur:
for buyer, seller, item, qty, unit, price, when, loc, notes in TRADES:
cur.execute("""
INSERT INTO trade_log
(buyer_id, seller_id, item_id, quantity, unit, unit_price, total_price,
location_id, in_fiction_time, notes)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT DO NOTHING
""", (buyer, seller, item, qty, unit, price, qty * price, loc, when, notes))
conn.commit()
print(f"[postgres] seeded {len(TRADES)} trade_log rows")
def make_placeholder_image(text: str, color: tuple) -> Image.Image:
"""Generate a simple 512x768 placeholder image with text on a colored background."""
img = Image.new("RGB", (512, 768), color=color)
d = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf", 36)
except Exception:
font = ImageFont.load_default()
# Wrap text roughly
lines = []
words = text.split()
line = ""
for w in words:
if len(line) + len(w) + 1 > 24:
lines.append(line)
line = w
else:
line = (line + " " + w).strip()
if line:
lines.append(line)
y = 280
for ln in lines[:6]:
bbox = d.textbbox((0, 0), ln, font=font)
w = bbox[2] - bbox[0]
d.text(((512 - w) // 2, y), ln, fill=(255, 255, 255), font=font)
y += 60
d.text((20, 720), "lore-engine-poc mock", fill=(180, 180, 180), font=font)
return img
def seed_minio(client, pg_conn):
palette = {
"Person": (60, 40, 90), # purple
"Location": (40, 70, 50), # dark green
"Event": (110, 40, 30), # dark red
"Item": (110, 90, 20), # gold
"Faction": (50, 50, 80), # slate
}
with pg_conn.cursor() as cur:
for image_id, object_key, entity_id, entity_type, caption, tags, era in IMAGES:
# 1. Generate + upload the image bytes
img = make_placeholder_image(caption, palette.get(entity_type, (50, 50, 50)))
tmp = f"/tmp/{image_id}.png"
img.save(tmp, "PNG")
size = Path(tmp).stat().st_size
client.fput_object(MINIO_BUCKET, object_key, tmp, content_type="image/png")
# 2. Register manifest in Postgres
cur.execute("""
INSERT INTO image_manifest
(image_id, object_key, entity_id, entity_type, caption, tags, era, width, height, bytes)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (image_id) DO UPDATE
SET object_key = EXCLUDED.object_key,
caption = EXCLUDED.caption,
tags = EXCLUDED.tags
""", (image_id, object_key, entity_id, entity_type, caption, tags, era,
img.width, img.height, size))
os.unlink(tmp)
pg_conn.commit()
print(f"[minio+postgres] seeded {len(IMAGES)} images")
# 4. Compute and store embeddings for the 4 mock images so
# `search_images_semantic` works out of the box.
seed_embeddings(pg)
def seed_embeddings(pg_conn):
"""Idempotent: compute + store a 384-dim embedding for each manifest row
that doesn't have one yet. Requires sentence-transformers; the model
is downloaded on first use (~80MB) and cached under ~/.cache/torch."""
try:
from sentence_transformers import SentenceTransformer
except ImportError:
print("[embeddings] sentence-transformers not installed — skipping")
return
print("[embeddings] loading model all-MiniLM-L6-v2 (~80MB, one-time)...")
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
with pg_conn.cursor() as cur:
# Ensure the embedding table exists (mirrors init.sql).
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("""
CREATE TABLE IF NOT EXISTS image_embedding (
image_id TEXT PRIMARY KEY REFERENCES image_manifest(image_id) ON DELETE CASCADE,
embedding vector(384) NOT NULL,
embedded_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
""")
cur.execute("""
SELECT m.image_id, m.caption
FROM image_manifest m
LEFT JOIN image_embedding e ON e.image_id = m.image_id
WHERE e.image_id IS NULL
""")
rows = cur.fetchall()
if not rows:
print("[embeddings] all images already embedded")
return
image_ids = [r[0] for r in rows]
captions = [r[1] for r in rows]
vectors = model.encode(captions, convert_to_numpy=True, show_progress_bar=False)
with pg_conn.cursor() as cur:
for image_id, vec in zip(image_ids, vectors):
vec_str = "[" + ",".join(f"{x:.6f}" for x in vec.tolist()) + "]"
cur.execute(
"INSERT INTO image_embedding (image_id, embedding) VALUES (%s, %s::vector) "
"ON CONFLICT (image_id) DO UPDATE SET embedding = EXCLUDED.embedding, embedded_at = now();",
(image_id, vec_str),
)
pg_conn.commit()
print(f"[embeddings] wrote {len(rows)} embeddings")
# ─── main ────────────────────────────────────────────────────────────────────
def main():
driver = load_neo4j()
pg = load_postgres()
minio = load_minio()
seed_neo4j(driver)
seed_postgres(pg)
seed_minio(minio, pg)
pg.close()
driver.close()
print("\n✅ mock world loaded — try the MCP gateway at http://localhost:8765/mcp")
if __name__ == "__main__":
main()