Files
lore-engine-poc/tests/test_seed_embeddings.py
Hermes add264eb04 T2: pgvector image embeddings — plugin, schema, seed, hook, tests
- docker-compose: swap postgres image to pgvector/pgvector:pg16
- postgres/init.sql: CREATE EXTENSION vector; image_embedding table
- plugins/embeddings.py: embed_images + search_images_semantic
  (sentence-transformers all-MiniLM-L6-v2, lazy-loaded, pgvector <=> cosine)
- plugins/images.py: register_image kicks off background embed worker
- seed.py: seed_embeddings writes 4 embeddings for the mock images
- README: semantic image search section + T3 note
- 11 tests across 4 files, all green:
    test_embeddings_plugin.py (4): schema, ordering, idempotency, stub
    test_embeddings_real_model.py (3): real MiniLM, acceptance queries
    test_register_image_hook.py (2): manifest row, end-to-end hook
    test_seed_embeddings.py (2): writes 4, idempotent
- Includes T3 consistency plugin skeleton (4 stub tools)
2026-06-16 14:30:10 +00:00

104 lines
3.9 KiB
Python

"""
Tests for seed.py's embedding step. Verifies the seed function is idempotent
and writes the expected 4 embeddings against a live pgvector DB.
"""
import os
import sys
import pytest
import psycopg2
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
for p in (os.path.join(ROOT, "gateway"), os.path.join(ROOT, "plugins")):
if p not in sys.path:
sys.path.insert(0, p)
# Make `import seed` work even though seed.py isn't a package
sys.path.insert(0, ROOT)
pytest.importorskip("sentence_transformers")
PG_PGVECTOR_URL = os.environ.get(
"TEST_PG_PGVECTOR_URL",
"postgresql://lore:***@localhost:5433/lore",
)
CAPTIONS = [
("img_aldric_portrait",
"Portrait of Aldric Raventhorne, Lord of Thornwall. Middle-aged, dark hair, a scar above the left eye."),
("img_vex_portrait",
"Vex the Silent, a hooded thief from the alleys of Mardsville. Face mostly in shadow."),
("img_thornwall",
"Thornwall Keep at dawn. The banners of House Vyr fly from the battlements."),
("img_battle",
"The Battle of Black Spire, where Aldric defeated General Kael. House Vyr's banners hold the ridge."),
]
@pytest.fixture(scope="module")
def seed_pg():
conn = psycopg2.connect(PG_PGVECTOR_URL)
with conn.cursor() as cur:
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("""
CREATE TABLE IF NOT EXISTS image_manifest (
id BIGSERIAL PRIMARY KEY,
image_id TEXT NOT NULL UNIQUE,
object_key TEXT NOT NULL,
entity_id TEXT, entity_type TEXT,
caption TEXT NOT NULL, tags TEXT[],
era TEXT, uploaded_at TIMESTAMPTZ NOT NULL DEFAULT now(),
width INT, height INT, bytes BIGINT
);
""")
cur.execute("""
CREATE TABLE IF NOT EXISTS image_embedding (
image_id TEXT PRIMARY KEY,
embedding vector(384) NOT NULL,
embedded_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
""")
for image_id, caption in CAPTIONS:
cur.execute(
"INSERT INTO image_manifest (image_id, object_key, caption) VALUES (%s,%s,%s) ON CONFLICT (image_id) DO UPDATE SET caption = EXCLUDED.caption;",
(image_id, f"k/{image_id}", caption),
)
conn.commit()
yield conn
conn.close()
def test_seed_embeddings_writes_four(seed_pg):
"""After a fresh seed, the 4 mock images have embeddings."""
from seed import seed_embeddings
# Wipe first to make sure we test the full write path
with seed_pg.cursor() as cur:
cur.execute("DELETE FROM image_embedding;")
seed_pg.commit()
seed_embeddings(seed_pg)
with seed_pg.cursor() as cur:
# Check that the 4 mock images specifically are embedded.
# (Other tests may have left additional manifest rows.)
cur.execute("""
SELECT image_id FROM image_embedding
WHERE image_id IN ('img_aldric_portrait','img_vex_portrait','img_thornwall','img_battle')
ORDER BY image_id
""")
rows = [r[0] for r in cur.fetchall()]
assert rows == ['img_aldric_portrait', 'img_battle', 'img_thornwall', 'img_vex_portrait'], rows
def test_seed_embeddings_is_idempotent(seed_pg):
"""Re-running seed_embeddings doesn't re-embed images that already have one."""
from seed import seed_embeddings
seed_embeddings(seed_pg)
with seed_pg.cursor() as cur:
# The 4 mock images should each have exactly one embedding row.
cur.execute("""
SELECT image_id, count(*) FROM image_embedding
WHERE image_id IN ('img_aldric_portrait','img_vex_portrait','img_thornwall','img_battle')
GROUP BY image_id
""")
rows = dict((r[0], r[1]) for r in cur.fetchall())
assert len(rows) == 4
assert all(c == 1 for c in rows.values()), rows