Some checks failed
test / contract-and-unit (push) Failing after 15s
Adds the performance metrics endpoint and React Query hooks for the dashboard. Backend: - PerformanceResponse / PhaseMetrics / ProjectMetrics in api_schemas.py - GET /v1/performance?days=N returns aggregated metrics from cost_ledger (avg request time, p95, avg tokens, avg cost) and events_outbox (stage progression timing, per-project failure rates) - Verified working: 140 requests / 47 failures (33.6%), spec p95 9409s, build p95 3374s, mindmaps 26.8% failure rate Frontend: - usePerformance() hook with TypeScript interfaces - Ready for widget creation (PerfPhaseTable, PerfStageProgression, PerfFailureRates, PerfTokenSparkline) — pending UI build Build/test infra: - Dockerfile and docker-compose.yml updates for the perf schema
306 lines
13 KiB
YAML
306 lines
13 KiB
YAML
services:
|
||
db:
|
||
image: postgres:16
|
||
restart: unless-stopped
|
||
environment:
|
||
POSTGRES_USER: damascus
|
||
POSTGRES_PASSWORD: damascus
|
||
POSTGRES_DB: damascus
|
||
volumes:
|
||
- dbdata:/var/lib/postgresql/data
|
||
# Expose DB only on loopback for the E2E test suite (host runs pytest)
|
||
ports:
|
||
- "127.0.0.1:5432:5432"
|
||
healthcheck:
|
||
test: ["CMD", "pg_isready", "-U", "damascus", "-d", "damascus"]
|
||
interval: 5s
|
||
timeout: 5s
|
||
retries: 20
|
||
# Self-heal a tainted dbdata volume on bootstrap (skill pitfall
|
||
# "Stack drift after a compose-swap PR merge is the silent test-killer",
|
||
# 2026-06-23). After an engine-swap PR (e.g. MySQL→Postgres), the named
|
||
# `dbdata` volume may still hold the old engine's data, which makes
|
||
# `initdb` error with `directory exists but is not empty`. Detect the
|
||
# tainted state (non-empty AND no PG_VERSION) and wipe the directory
|
||
# before the entrypoint runs initdb. Idempotent and safe: a fresh volume
|
||
# is empty, and a healthy Postgres cluster keeps PG_VERSION so this is
|
||
# a no-op on subsequent restarts.
|
||
command: >
|
||
bash -c '
|
||
if [ -n "$$(ls -A /var/lib/postgresql/data 2>/dev/null)" ] \
|
||
&& [ ! -f /var/lib/postgresql/data/PG_VERSION ]; then
|
||
echo "[db] tainted data dir detected (no PG_VERSION); wiping /var/lib/postgresql/data/* before initdb";
|
||
rm -rf /var/lib/postgresql/data/* /var/lib/postgresql/data/.[!.]*;
|
||
fi;
|
||
exec docker-entrypoint.sh postgres
|
||
'
|
||
|
||
redis:
|
||
image: redis:7
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
test: ["CMD", "redis-cli", "ping"]
|
||
interval: 5s
|
||
timeout: 5s
|
||
retries: 20
|
||
|
||
# Test-only Postgres for the pytest suite. The tests/conftest.py
|
||
# autouse `reset_state` fixture must NEVER touch the production DB
|
||
# (port 5432, holds live orchestrator state). Connect to `db-test:5432`
|
||
# from inside the orchestrator container, or `127.0.0.1:5433` from the
|
||
# host. Separate volume, separate credentials.
|
||
db-test:
|
||
image: postgres:16
|
||
restart: unless-stopped
|
||
environment:
|
||
POSTGRES_USER: damascus_test
|
||
POSTGRES_PASSWORD: damascus_test
|
||
POSTGRES_DB: damascus_test
|
||
volumes:
|
||
- dbtestdata:/var/lib/postgresql/data
|
||
ports:
|
||
- "127.0.0.1:5433:5432"
|
||
healthcheck:
|
||
test: ["CMD", "pg_isready", "-U", "damascus_test", "-d", "damascus_test"]
|
||
interval: 5s
|
||
timeout: 5s
|
||
retries: 20
|
||
command: >
|
||
bash -c '
|
||
if [ -n "$$(ls -A /var/lib/postgresql/data 2>/dev/null)" ] \
|
||
&& [ ! -f /var/lib/postgresql/data/PG_VERSION ]; then
|
||
echo "[db-test] tainted data dir detected (no PG_VERSION); wiping /var/lib/postgresql/data/* before initdb";
|
||
rm -rf /var/lib/postgresql/data/* /var/lib/postgresql/data/.[!.]*;
|
||
fi;
|
||
exec docker-entrypoint.sh postgres
|
||
'
|
||
|
||
orchestrator:
|
||
build: .
|
||
image: damascus-orchestrator:latest
|
||
restart: unless-stopped
|
||
depends_on:
|
||
db:
|
||
condition: service_healthy
|
||
redis:
|
||
condition: service_healthy
|
||
extra_hosts:
|
||
- "host.docker.internal:host-gateway" # reach host's LiteLLM / Gitea
|
||
environment:
|
||
DAMASCUS_PG_HOST: db
|
||
DAMASCUS_PG_PORT: "5432"
|
||
DAMASCUS_PG_USER: damascus
|
||
DAMASCUS_PG_PASSWORD: damascus
|
||
DAMASCUS_PG_DB: damascus
|
||
|
||
# Taskiq broker transport (BullMQ-equivalent)
|
||
DAMASCUS_REDIS_URL: redis://redis:6379
|
||
|
||
# LLM proxy on the host (default port 4000)
|
||
DAMASCUS_LLM_BASE_URL: http://host.docker.internal:4000
|
||
DAMASCUS_LLM_API_KEY: sk-dummy
|
||
DAMASCUS_LLM_MODEL: minimax-m3
|
||
# Build phase cap (bumped 2026-06-27: 80 → 120 → 140 → 180 → 220 → 280; Shape 1c escape — 13+ rows hit cap simultaneously, worktrees have real partial code)
|
||
DAMASCUS_CLAUDE_MAX_TURNS: "320"
|
||
|
||
# Gitea on the host network (loopback-only API)
|
||
DAMASCUS_GITEA_URL: https://git.homelab.local
|
||
DAMASCUS_GITEA_TOKEN: "885a9202a1bc8231f1eb9f22e6edb978b20a4345"
|
||
|
||
# Discord relay (optional)
|
||
DAMASCUS_DISCORD_WEBHOOK: ""
|
||
|
||
# External concurrency id (override per host for multi-tick parallelism)
|
||
DAMASCUS_CONCURRENCY_ID: orch-1
|
||
DAMASCUS_MAX_CONCURRENT: "10"
|
||
|
||
# BMAD + wiki live inside the image at /opt/damascus/{bmad,llm-wiki}
|
||
DAMASCUS_BMAD_DIR: /opt/damascus/bmad
|
||
DAMASCUS_WIKI_DIR: /opt/damascus/llm-wiki
|
||
volumes:
|
||
- orchdata:/data
|
||
- worktrees:/workspace/worktrees
|
||
- projects:/workspace/projects
|
||
- ./specs:/data/specs
|
||
# Shared wiki (Karpathy-style LLM-wiki) — bind-mounted so workers can mount it
|
||
- ./wiki:/opt/damascus/llm-wiki
|
||
# Mount the host's BMAD output dirs under /opt/damascus/bmad/<project>/
|
||
- /root/restitution/_bmad-output:/opt/damascus/bmad/restitution/_bmad-output:ro
|
||
- /root/mindmaps-prds/_bmad-output:/opt/damascus/bmad/mindmaps/_bmad-output:ro
|
||
- /root/damascus-roadmap/_bmad-output:/opt/damascus/bmad/damascus-roadmap/_bmad-output:ro
|
||
# Lore Engine × GraphMCP substrate merge (Phase 4 epic — 7 phases)
|
||
# Tracked as #29: bind-mount per project is a config liability.
|
||
- /root/lore-engine-merge-prds/_bmad-output:/opt/damascus/bmad/lore-engine-merge/_bmad-output:ro
|
||
# Damascus Bug Fixes Q4 2026 (ADR-004 + ADR-005 — Quick Flow work)
|
||
- /root/damascus-bugfixes-q4-2026-prds/_bmad-output:/opt/damascus/bmad/damascus-bugfixes-q4-2026/_bmad-output:ro
|
||
# BMAD kit — templates, samples, and reference docs. Ships with the
|
||
# orchestrator repo at bmad/_kit/. Read-only.
|
||
- ./bmad/_kit:/opt/damascus/bmad/_kit:ro
|
||
# Legacy _kit location, kept for back-compat with the existing bind
|
||
- /home/kaykayyali/_bmad:/opt/damascus/bmad/_kit_legacy:ro
|
||
# hello-bmad sample project (for verification — remove in real deployments)
|
||
- /root/hello-bmad/_bmad-output:/opt/damascus/bmad/hello-bmad/_bmad-output:ro
|
||
# E2E test suite (read-only; tests run from the host)
|
||
- ./tests:/opt/damascus/tests:ro
|
||
# Taskiq worker — the global concurrency cap (design doc §10). For sync
|
||
# tasks (run_cycle), --max-threadpool-threads is the parallelism knob.
|
||
command: ["taskiq", "worker", "damascus.tasks:broker", "--use-process-pool", "--max-process-pool-processes", "10", "--max-threadpool-threads", "10"] # bumped 2026-06-27: 1→10 to match DAMASCUS_MAX_CONCURRENT=10 (taskiq 0.12.4 floor is 2)
|
||
orchestrator-scheduler:
|
||
image: damascus-orchestrator:latest
|
||
restart: unless-stopped
|
||
depends_on:
|
||
redis:
|
||
condition: service_healthy
|
||
environment:
|
||
DAMASCUS_REDIS_URL: redis://redis:6379
|
||
DAMASCUS_PG_HOST: db
|
||
DAMASCUS_PG_PORT: "5432"
|
||
DAMASCUS_PG_USER: damascus
|
||
DAMASCUS_PG_PASSWORD: damascus
|
||
DAMASCUS_PG_DB: damascus
|
||
# Exactly one scheduler enqueues the recurring run_cycle task on the cron.
|
||
# The path must point at the TaskiqScheduler instance, not the broker.
|
||
command: ["taskiq", "scheduler", "damascus.tasks:scheduler"]
|
||
|
||
sidecar-status:
|
||
image: python:3.12-slim
|
||
restart: unless-stopped
|
||
depends_on:
|
||
- db
|
||
volumes:
|
||
- orchdata:/data
|
||
working_dir: /data
|
||
command: ["python", "-m", "http.server", "9100", "--bind", "0.0.0.0"]
|
||
ports:
|
||
- "9100:9100"
|
||
# Visit http://<host>:9100/status/active.json for the external concurrency view.
|
||
|
||
damascus-api:
|
||
# P2 entry point: FastAPI service exposing the contract §2 endpoints.
|
||
# Same image as `orchestrator` (single-image-multiple-entrypoints); only
|
||
# the command differs. Bind mount /root/.hermes/.env so the container
|
||
# sees DAMASCUS_API_TOKEN; compose env_file reads the same source so the
|
||
# token is also available as a process env var.
|
||
build: .
|
||
image: damascus-orchestrator:latest
|
||
restart: unless-stopped
|
||
depends_on:
|
||
db:
|
||
condition: service_healthy
|
||
env_file:
|
||
- /root/.hermes/.env
|
||
environment:
|
||
DAMASCUS_PG_HOST: db
|
||
DAMASCUS_PG_PORT: "5432"
|
||
DAMASCUS_PG_USER: damascus
|
||
DAMASCUS_PG_PASSWORD: damascus
|
||
DAMASCUS_PG_DB: damascus
|
||
|
||
# Pool sizing (contract §6).
|
||
DAMASCUS_API_POOL_MIN: "2"
|
||
DAMASCUS_API_POOL_MAX: "5"
|
||
|
||
# Rate limits (contract §4). Override per-host if needed.
|
||
# Bumped 2026-06-27: 30→300 write, 120→1200 read to match the worker
|
||
# pool expansion to 10 procs × 10 threads (the per-IP bucket is shared).
|
||
DAMASCUS_WRITE_RATE_PER_MIN: "300"
|
||
DAMASCUS_READ_RATE_PER_MIN: "1200"
|
||
|
||
# UI bundle path (P4 ships the Vite build here). Empty dir → mount
|
||
# is a no-op per the contract.
|
||
DAMASCUS_UI_DIR: /opt/damascus/ui
|
||
volumes:
|
||
# P4 ships the UI bundle into the named `damascus_ui` volume;
|
||
# mount it read-only into the API container at the same path
|
||
# P2's StaticFiles looks at. Empty volume → API serves the API
|
||
# only, no crash.
|
||
- damascus_ui:/opt/damascus/ui:ro
|
||
# damascus-ntfy-bridge state (see skill devops/damascus-ntfy-bridge):
|
||
# the high-water mark of events_outbox ids the bridge has already
|
||
# pushed. Mounted as a named volume so it survives container
|
||
# recreates (otherwise a redeploy would re-ping for events the
|
||
# phone already received). Bind-mount the bridge script itself so
|
||
# it survives image rebuilds without a re-`docker cp`.
|
||
- damascus_ntfy_state:/var/lib/damascus-ntfy
|
||
- /root/.hermes/scripts/damascus-ntfy-bridge.py:/usr/local/bin/damascus-ntfy-bridge.py:ro
|
||
ports:
|
||
# LAN-only by contract §4 (Traefik terminates the public hostname
|
||
# separately; this port is bound to loopback so it's not exposed to
|
||
# the wider docker network).
|
||
- "127.0.0.1:9110:9110"
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:9110/healthz', timeout=2).read() == b'{\\\"status\\\":\\\"ok\\\"}' else 1)\""]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 6
|
||
command: ["damascus", "serve", "--host", "0.0.0.0", "--port", "9110"]
|
||
|
||
# damascus-ui-build (P4) — one-shot build of the Vite SPA bundle.
|
||
#
|
||
# Builds the React 19 + Vite 6 + MUI 6 dashboard and writes the static
|
||
# output to the named volume `damascus_ui` at /opt/damascus/ui. The
|
||
# P2 `damascus-api` service (FastAPI on :9110) mounts that same
|
||
# volume and serves the bundle from / via StaticFiles. P2 will add:
|
||
#
|
||
# damascus-api:
|
||
# ...
|
||
# volumes:
|
||
# - damascus_ui:/opt/damascus/ui:ro
|
||
#
|
||
# Running `docker compose up damascus-ui-build` does the build, then
|
||
# the container exits 0. `docker compose up damascus-api` afterward
|
||
# sees the bundle on the volume.
|
||
#
|
||
# The API_BASE_URL build arg points the bundle at the in-network API
|
||
# for ad-hoc preview from a developer's host browser. Leave empty
|
||
# when running the full compose stack so the bundle uses
|
||
# window.location.origin (same-origin via the API).
|
||
damascus-ui-build:
|
||
build:
|
||
context: ./ui
|
||
dockerfile: Dockerfile
|
||
args:
|
||
VITE_API_BASE_URL: ""
|
||
image: damascus-ui:latest
|
||
volumes:
|
||
# Mount at the SAME path the bundle is written to in the image
|
||
# (/bundle). The named volume is initially empty, so this mount
|
||
# HIDES the in-image /bundle for the container's lifetime, but
|
||
# since the container only needs to keep the volume populated,
|
||
# the trick is to mount it into a parallel path and copy across:
|
||
# /bundle (in-image, read-only via overlay)
|
||
# /bundle-out (named volume, initially empty)
|
||
# The `cp` below copies the in-image bundle into the volume; the
|
||
# `sleep` keeps the container alive long enough for compose to
|
||
# record the exit; `restart: "no"` ensures compose doesn't loop.
|
||
- damascus_ui:/bundle-out
|
||
command:
|
||
- sh
|
||
- -c
|
||
- |
|
||
mkdir -p /bundle-out
|
||
cp -a /bundle/. /bundle-out/
|
||
echo "[damascus-ui-build] copied $$(du -sh /bundle-out | cut -f1) of UI bundle to damascus_ui volume"
|
||
# Hold the container open for a few seconds so compose's "exited"
|
||
# handling finishes cleanly. In CI a follow-up step can `docker
|
||
# compose up damascus-api` which will then see the volume.
|
||
sleep 5
|
||
restart: "no"
|
||
|
||
volumes:
|
||
dbdata:
|
||
dbtestdata:
|
||
orchdata:
|
||
worktrees:
|
||
projects:
|
||
# Named volume that carries the built UI bundle from the
|
||
# damascus-ui-build one-shot into the (P2) damascus-api container.
|
||
# Same volume, two services: build writes, api reads. The P4 contract
|
||
# says "drops it into a named volume `damascus_ui`" — this is that
|
||
# volume.
|
||
damascus_ui:
|
||
# Persistent state for the damascus-ntfy-bridge running inside the
|
||
# damascus-api container. Holds the bridge's high-water mark in
|
||
# state.json so container recreates don't re-ping for events the
|
||
# phone already received. See skill devops/damascus-ntfy-bridge.
|
||
damascus_ntfy_state: |