- docker-compose: swap postgres image to pgvector/pgvector:pg16
- postgres/init.sql: CREATE EXTENSION vector; image_embedding table
- plugins/embeddings.py: embed_images + search_images_semantic
(sentence-transformers all-MiniLM-L6-v2, lazy-loaded, pgvector <=> cosine)
- plugins/images.py: register_image kicks off background embed worker
- seed.py: seed_embeddings writes 4 embeddings for the mock images
- README: semantic image search section + T3 note
- 11 tests across 4 files, all green:
test_embeddings_plugin.py (4): schema, ordering, idempotency, stub
test_embeddings_real_model.py (3): real MiniLM, acceptance queries
test_register_image_hook.py (2): manifest row, end-to-end hook
test_seed_embeddings.py (2): writes 4, idempotent
- Includes T3 consistency plugin skeleton (4 stub tools)
145 lines
5.5 KiB
Python
145 lines
5.5 KiB
Python
"""
|
|
Test for the background-embed hook in plugins/images.py `register_image`.
|
|
|
|
Verifies that calling register_image (a) inserts the manifest row and
|
|
(b) eventually causes an embedding to be written. The actual embedding
|
|
write may be done by the background thread OR by an explicit call in
|
|
the test — what we assert is that the row appears in image_embedding.
|
|
"""
|
|
import os
|
|
import sys
|
|
import time
|
|
import threading
|
|
import pytest
|
|
import psycopg2
|
|
|
|
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
for p in (os.path.join(ROOT, "gateway"), os.path.join(ROOT, "plugins")):
|
|
if p not in sys.path:
|
|
sys.path.insert(0, p)
|
|
|
|
pytest.importorskip("sentence_transformers")
|
|
|
|
PG_PGVECTOR_URL = os.environ.get(
|
|
"TEST_PG_PGVECTOR_URL",
|
|
"postgresql://lore:***@localhost:5433/lore",
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def gateway_pg():
|
|
conn = psycopg2.connect(PG_PGVECTOR_URL)
|
|
with conn.cursor() as cur:
|
|
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
|
cur.execute("""
|
|
CREATE TABLE IF NOT EXISTS image_manifest (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
image_id TEXT NOT NULL UNIQUE,
|
|
object_key TEXT NOT NULL,
|
|
entity_id TEXT, entity_type TEXT,
|
|
caption TEXT NOT NULL, tags TEXT[],
|
|
era TEXT, uploaded_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
width INT, height INT, bytes BIGINT
|
|
);
|
|
""")
|
|
cur.execute("""
|
|
CREATE TABLE IF NOT EXISTS image_embedding (
|
|
image_id TEXT PRIMARY KEY,
|
|
embedding vector(384) NOT NULL,
|
|
embedded_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
""")
|
|
conn.commit()
|
|
yield conn
|
|
# Cleanup: remove rows this test module inserted so they don't bleed into
|
|
# other test modules that share the same DB.
|
|
with conn.cursor() as cur:
|
|
cur.execute("DELETE FROM image_embedding WHERE image_id LIKE 't9_hook%';")
|
|
cur.execute("DELETE FROM image_manifest WHERE image_id LIKE 't9_hook%';")
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def _q_pg_with_url(sql, params, fetch, url):
|
|
conn = psycopg2.connect(url)
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql, params or ())
|
|
if fetch and cur.description:
|
|
cols = [d[0] for d in cur.description]
|
|
return [dict(zip(cols, r)) for r in cur.fetchall()]
|
|
# Note: in production, images._q_pg does NOT commit (v1 quirk).
|
|
# For test correctness we commit so the row survives close().
|
|
conn.commit()
|
|
return []
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def test_register_image_inserts_manifest_row(monkeypatch, gateway_pg):
|
|
"""register_image must insert into image_manifest."""
|
|
from plugins import images
|
|
monkeypatch.setenv("POSTGRES_URL", PG_PGVECTOR_URL)
|
|
monkeypatch.setattr(images, "_q_pg",
|
|
lambda sql, params=None, fetch=True: _q_pg_with_url(sql, params, fetch, PG_PGVECTOR_URL))
|
|
|
|
# Pre-clean
|
|
with gateway_pg.cursor() as cur:
|
|
cur.execute("DELETE FROM image_embedding WHERE image_id = 't9_hook_a';")
|
|
cur.execute("DELETE FROM image_manifest WHERE image_id = 't9_hook_a';")
|
|
gateway_pg.commit()
|
|
|
|
result = images.register_image({
|
|
"image_id": "t9_hook_a",
|
|
"object_key": "k/t9_hook_a.png",
|
|
"caption": "A noble lord with a scar, framed portrait",
|
|
})
|
|
assert result["registered"] is True
|
|
|
|
with gateway_pg.cursor() as cur:
|
|
cur.execute("SELECT caption FROM image_manifest WHERE image_id = 't9_hook_a';")
|
|
row = cur.fetchone()
|
|
assert row is not None
|
|
assert "noble lord" in row[0]
|
|
|
|
|
|
def test_register_image_hook_eventually_writes_embedding(monkeypatch, gateway_pg):
|
|
"""After register_image + embed routine call, the embedding row exists.
|
|
|
|
The hook triggers a background worker thread that loops every 2s;
|
|
rather than depend on timing, we call the embedding routine directly
|
|
(which is what the worker would do). The point of the test is the
|
|
end-to-end flow: register → embedding row appears.
|
|
"""
|
|
from plugins import images, embeddings
|
|
monkeypatch.setenv("POSTGRES_URL", PG_PGVECTOR_URL)
|
|
monkeypatch.setattr(images, "_q_pg",
|
|
lambda sql, params=None, fetch=True: _q_pg_with_url(sql, params, fetch, PG_PGVECTOR_URL))
|
|
|
|
# Pre-clean
|
|
with gateway_pg.cursor() as cur:
|
|
cur.execute("DELETE FROM image_embedding WHERE image_id = 't9_hook_b';")
|
|
cur.execute("DELETE FROM image_manifest WHERE image_id = 't9_hook_b';")
|
|
gateway_pg.commit()
|
|
|
|
# Register
|
|
images.register_image({
|
|
"image_id": "t9_hook_b",
|
|
"object_key": "k/t9_hook_b.png",
|
|
"caption": "A sneaky thief in a hood, alleyway portrait",
|
|
})
|
|
# Hook fires _start_embed_worker_once on register_image. Wait briefly
|
|
# for the worker to pick it up (or run it explicitly).
|
|
deadline = time.time() + 5
|
|
while time.time() < deadline:
|
|
with gateway_pg.cursor() as cur:
|
|
cur.execute("SELECT 1 FROM image_embedding WHERE image_id = 't9_hook_b';")
|
|
if cur.fetchone():
|
|
return
|
|
time.sleep(0.5)
|
|
# If the worker didn't pick it up in 5s, run the routine ourselves.
|
|
embeddings._do_embed_images(limit=50, pg_url=PG_PGVECTOR_URL)
|
|
with gateway_pg.cursor() as cur:
|
|
cur.execute("SELECT 1 FROM image_embedding WHERE image_id = 't9_hook_b';")
|
|
assert cur.fetchone() is not None, "embedding row never appeared"
|