Files
hermes-kanban 79e3e59ab5 feat(verify): P6a manual verification recipe + verify.sh
scripts/verify.sh — bash E2E smoke that proves 'v1 works' without a browser.
8 sections (preflight, stack-up, mcp-stdio, ingest-via-mcp, ui-shows-it,
drive-cycle, cleanup, summary); exits non-zero on first failure. Drives
phase transitions via direct SQL to bypass the orchestrator worker's claim
loop. Cleans up its own rows so re-runs are idempotent.

scripts/_verify_mcp_helper.py — Python MCP stdio helper used by verify.sh.
Drives python -m damascus.mcp_server via the official mcp SDK client and
frames the JSON-RPC handshake + tools/list + ingest_story so bash does
not have to manage Content-Length headers or heredoc framing.

docs/VERIFICATION.md — <1 page runnable-by-hand recipe plus architecture
notes (token source, MCP upstream DNS, why direct SQL, failure modes).

Verified end-to-end: bash scripts/verify.sh exits 0 against the live stack
(7/7 sections green; log at .hermes/evidence/p6a/verify.log, gitignored).
tests/contract + tests/unit still 56/56 green.
2026-06-26 07:03:45 +00:00

318 lines
13 KiB
Bash
Executable File

#!/usr/bin/env bash
# Damascus Entry Points v1 — manual verification recipe (P6a).
#
# End-to-end smoke that proves "v1 works" without a browser. Each
# section gates the next; the script exits non-zero on the first
# failure so it can be wired into a deploy gate later.
#
# Usage:
# bash scripts/verify.sh
#
# Sections (in order):
# 1. preflight — stack healthy + API reachable
# 2. stack-up — bring up db / damascus-api / damascus-ui-build (idempotent)
# 3. mcp-stdio — MCP server handshake + 7 tools visible
# 4. ingest-via-mcp — create one item via MCP ingest_story
# 5. ui-shows-it — GET /v1/items reflects the new item, phase=spec
# 6. drive-cycle — spec → build → review → merged via direct SQL
# 7. cleanup — DELETE the verify-smoke rows so re-runs stay tidy
# 8. summary — green/red checklist
#
# Assumes:
# - /root/damascus-orchestrator is the project root
# - /root/.hermes/.env contains DAMASCUS_API_TOKEN
# - docker compose is on PATH and the damascus stack is registered
# - python3 (with `mcp` and `httpx` installed) is on PATH
set -uo pipefail
# --- paths & config ---------------------------------------------------------
REPO_ROOT="${REPO_ROOT:-/root/damascus-orchestrator}"
COMPOSE_FILE="${REPO_ROOT}/docker-compose.yml"
API_BASE="${DAMASCUS_API_BASE:-http://127.0.0.1:9110}"
MCP_HELPER="${REPO_ROOT}/scripts/_verify_mcp_helper.py"
EVIDENCE_DIR="${REPO_ROOT}/.hermes/evidence/p6a"
LOG_FILE="${EVIDENCE_DIR}/verify.log"
VERIFY_PROJECT="verify-smoke"
DB_CONTAINER="damascus-orchestrator-db-1"
API_CONTAINER="damascus-orchestrator-damascus-api-1"
# --- bash output helpers ----------------------------------------------------
bold() { printf "\033[1m%s\033[0m\n" "$*"; }
green() { printf " \033[32mok\033[0m %s\n" "$*"; }
red() { printf " \033[31mFAIL\033[0m %s\n" "$*"; }
# Track per-section results for the summary checklist. Entries are
# "name|exit_code|note". Failures use the helper _fail.
declare -a RESULTS=()
CURRENT_SECTION=""
_section_start() {
CURRENT_SECTION="$1"
bold ""
bold "[${CURRENT_SECTION}]"
}
_record() {
RESULTS+=("$1")
}
# --- failure handler --------------------------------------------------------
_fail() {
local note="$*"
red "${CURRENT_SECTION}: ${note}"
_record "${CURRENT_SECTION}|1|${note}"
# Allow trap to write the summary if requested.
exit 1
}
# --- prerequisites ----------------------------------------------------------
mkdir -p "${EVIDENCE_DIR}"
if ! command -v docker >/dev/null 2>&1; then
_fail "docker not on PATH"
fi
if ! command -v curl >/dev/null 2>&1; then
_fail "curl not on PATH"
fi
if ! command -v python3 >/dev/null 2>&1; then
_fail "python3 not on PATH"
fi
if [[ ! -r "${COMPOSE_FILE}" ]]; then
_fail "compose file not readable: ${COMPOSE_FILE}"
fi
if [[ ! -r "${MCP_HELPER}" ]]; then
_fail "MCP helper not readable: ${MCP_HELPER}"
fi
# ===========================================================================
# 1. preflight
# ===========================================================================
_section_start "1. preflight"
API_LINE=$(docker compose -f "${COMPOSE_FILE}" ps damascus-api 2>/dev/null | tail -n +2 | head -1 || true)
if [[ -z "${API_LINE}" ]]; then
_fail "damascus-api not running; bring it up first (stack-up section will do that next)"
fi
if ! grep -q "healthy" <<<"${API_LINE}"; then
_fail "damascus-api is not healthy: ${API_LINE}"
fi
green "docker compose ps damascus-api -> healthy"
HEALTHZ_BODY=$(curl -fsS "${API_BASE}/healthz" 2>/dev/null) || _fail "/healthz request failed"
[[ "${HEALTHZ_BODY}" == '{"status":"ok"}' ]] || _fail "/healthz body unexpected: ${HEALTHZ_BODY}"
green "${API_BASE}/healthz -> {\"status\":\"ok\"}"
ITEMS_STATUS=$(curl -s -o /dev/null -w '%{http_code}' "${API_BASE}/v1/items")
[[ "${ITEMS_STATUS}" == "200" ]] || _fail "/v1/items returned ${ITEMS_STATUS}"
green "${API_BASE}/v1/items -> 200"
_record "1. preflight|0|stack healthy + API reachable"
# ===========================================================================
# 2. stack-up
# ===========================================================================
_section_start "2. stack-up"
# `up -d` is idempotent on running services. damascus-ui-build is a
# one-shot (restart: "no") that copies the Vite bundle into the named
# volume; if the bundle is already there from a previous build the
# one-shot just exits 0 again. Acceptable side effect on re-runs.
docker compose -f "${COMPOSE_FILE}" up -d db damascus-api damascus-ui-build >/dev/null 2>&1 \
|| _fail "docker compose up failed"
# Wait up to 30s for /healthz (covers the case where we just started a cold stack).
WAITED=0
HEALTHZ_BODY=""
while (( WAITED < 30 )); do
HEALTHZ_BODY=$(curl -fsS "${API_BASE}/healthz" 2>/dev/null || true)
if [[ "${HEALTHZ_BODY}" == '{"status":"ok"}' ]]; then
break
fi
sleep 1
WAITED=$((WAITED + 1))
done
[[ "${HEALTHZ_BODY}" == '{"status":"ok"}' ]] || _fail "/healthz not ok after ${WAITED}s"
green "stack up; /healthz ok (waited ${WAITED}s)"
_record "2. stack-up|0|db + api + ui-build up; healthz responsive"
# ===========================================================================
# 3. mcp-stdio
# ===========================================================================
_section_start "3. mcp-stdio"
INIT_JSON=$(python3 "${MCP_HELPER}" initialize 2>/dev/null) \
|| { INIT_ERR=$(python3 "${MCP_HELPER}" initialize 2>&1 >/dev/null); _fail "MCP initialize failed: ${INIT_ERR}"; }
SERVER_NAME=$(printf '%s' "${INIT_JSON}" | python3 -c "import sys, json; print(json.load(sys.stdin)['server_name'])")
[[ "${SERVER_NAME}" == "damascus-mcp" ]] || _fail "MCP server name=${SERVER_NAME!r} (expected damascus-mcp)"
green "initialize -> server_name=${SERVER_NAME}"
TOOLS_JSON=$(python3 "${MCP_HELPER}" list-tools 2>/dev/null) \
|| { TOOLS_ERR=$(python3 "${MCP_HELPER}" list-tools 2>&1 >/dev/null); _fail "MCP list-tools failed: ${TOOLS_ERR}"; }
TOOL_COUNT=$(printf '%s' "${TOOLS_JSON}" | python3 -c "import sys, json; print(json.load(sys.stdin)['tool_count'])")
[[ "${TOOL_COUNT}" == "7" ]] || _fail "MCP tool_count=${TOOL_COUNT} (expected 7)"
TOOL_NAMES=$(printf '%s' "${TOOLS_JSON}" | python3 -c "import sys, json; print(', '.join(json.load(sys.stdin)['tool_names']))")
green "tools/list -> ${TOOL_COUNT} tools: ${TOOL_NAMES}"
_record "3. mcp-stdio|0|handshake + 7 tools visible"
# ===========================================================================
# 4. ingest-via-mcp
# ===========================================================================
_section_start "4. ingest-via-mcp"
STORY_ID="VERIFY-$(date +%s)-$$"
TITLE="P6a smoke (auto-generated)"
PRIORITY=100
# Capture only stdout. If the helper exits non-zero, re-run with stderr
# merged so the error message reaches _fail.
INGEST_JSON=$(python3 "${MCP_HELPER}" ingest-story "${VERIFY_PROJECT}" "${STORY_ID}" "${TITLE}" "${PRIORITY}" 2>/dev/null) \
|| { INGEST_ERR=$(python3 "${MCP_HELPER}" ingest-story "${VERIFY_PROJECT}" "${STORY_ID}" "${TITLE}" "${PRIORITY}" 2>&1 >/dev/null); _fail "MCP ingest_story failed: ${INGEST_ERR}"; }
INGEST_PHASE=$(printf '%s' "${INGEST_JSON}" | python3 -c "import sys, json; print(json.load(sys.stdin)['payload']['item']['phase'])")
INGEST_ID=$(printf '%s' "${INGEST_JSON}" | python3 -c "import sys, json; print(json.load(sys.stdin)['payload']['item']['id'])")
[[ "${INGEST_PHASE}" == "spec" ]] || _fail "ingest phase=${INGEST_PHASE} (expected spec)"
green "ingest_story -> id=${INGEST_ID}, phase=${INGEST_PHASE}, project=${VERIFY_PROJECT}, story_id=${STORY_ID}"
_record "4. ingest-via-mcp|0|story=${STORY_ID} phase=spec"
# ===========================================================================
# 5. ui-shows-it
# ===========================================================================
_section_start "5. ui-shows-it"
ITEMS_JSON=$(curl -fsS "${API_BASE}/v1/items" 2>/dev/null) || _fail "/v1/items failed"
# Inline Python matcher: find the item by id, print phase or exit non-zero.
MATCHED=$(ITEM_ID="${INGEST_ID}" ITEMS_JSON="${ITEMS_JSON}" python3 <<'PY'
import json, os
target = os.environ["ITEM_ID"]
data = json.loads(os.environ["ITEMS_JSON"])
for item in data.get("items", []):
if item.get("id") == target:
print(json.dumps({
"id": item["id"],
"phase": item["phase"],
"project": item["project"],
"story_id": item["story_id"],
}))
raise SystemExit(0)
raise SystemExit(2)
PY
) || _fail "item ${INGEST_ID} not found in /v1/items"
MATCH_PHASE=$(printf '%s' "${MATCHED}" | python3 -c "import sys, json; print(json.load(sys.stdin)['phase'])")
[[ "${MATCH_PHASE}" == "spec" ]] || _fail "matched item phase=${MATCH_PHASE} (expected spec)"
green "/v1/items -> row visible: ${MATCHED}"
_record "5. ui-shows-it|0|/v1/items reflects new row at phase=spec"
# ===========================================================================
# 6. drive-cycle
# ===========================================================================
_section_start "6. drive-cycle"
# We drive phase transitions via direct SQL on the db container (matches
# the pattern in tests/e2e/test_entry_points_e2e.py::phase3). Rationale:
# the orchestrator worker is running and could race a `state.set_phase`
# call, so the SQL UPDATE bypasses claim semantics entirely. We also
# null out claimed_* and stamp merged_at so the row matches the shape
# of one that the cycle actually produced.
#
# IMPORTANT: this test rows race the live orchestrator cycle. The
# orchestrator may have already moved this item from `spec` to a
# different phase by the time we get here — e.g. it may already be
# `blocked` with a `spec_wrong` verdict. We assert the *transition*
# succeeds at the SQL level and the API reflects each new phase, but
# we tolerate the case where the row is already past spec.
drive_one() {
local target_phase="$1"
local item_id="$2"
if [[ "${target_phase}" == "merged" ]]; then
docker exec "${DB_CONTAINER}" psql -U damascus -d damascus -v ON_ERROR_STOP=1 -q \
-c "UPDATE work_items SET phase='${target_phase}', claimed_by=NULL, claimed_at=NULL, merged_at=NOW(), updated_at=NOW() WHERE id='${item_id}'" \
>/dev/null 2>&1 \
|| _fail "psql UPDATE to phase=${target_phase} failed"
else
docker exec "${DB_CONTAINER}" psql -U damascus -d damascus -v ON_ERROR_STOP=1 -q \
-c "UPDATE work_items SET phase='${target_phase}', claimed_by=NULL, claimed_at=NULL, updated_at=NOW() WHERE id='${item_id}'" \
>/dev/null 2>&1 \
|| _fail "psql UPDATE to phase=${target_phase} failed"
fi
local actual_phase
actual_phase=$(curl -fsS "${API_BASE}/v1/items/${item_id}" 2>/dev/null \
| python3 -c "import sys, json; print(json.load(sys.stdin)['item']['phase'])") \
|| _fail "/v1/items/${item_id} failed after UPDATE to ${target_phase}"
[[ "${actual_phase}" == "${target_phase}" ]] || _fail "phase after UPDATE = ${actual_phase} (expected ${target_phase})"
green " -> phase=${actual_phase} (via API)"
}
drive_one build "${INGEST_ID}"
sleep 1
drive_one review "${INGEST_ID}"
sleep 1
drive_one merged "${INGEST_ID}"
# Sanity: merged_at must be populated on the merged row.
MERGED_AT=$(docker exec "${DB_CONTAINER}" psql -U damascus -d damascus -tA \
-c "SELECT merged_at IS NOT NULL FROM work_items WHERE id='${INGEST_ID}'")
[[ "${MERGED_AT}" == "t" ]] || _fail "merged_at not set on item ${INGEST_ID}"
green " -> merged_at populated"
_record "6. drive-cycle|0|spec->build->review->merged, merged_at set"
# ===========================================================================
# 7. cleanup
# ===========================================================================
_section_start "7. cleanup"
DELETED=$(docker exec "${DB_CONTAINER}" psql -U damascus -d damascus -tA \
-c "DELETE FROM work_items WHERE project='${VERIFY_PROJECT}' RETURNING id")
DELETED_COUNT=$(printf '%s\n' "${DELETED}" | grep -cE '^[0-9a-f-]{36}$' || true)
[[ "${DELETED_COUNT}" -ge 1 ]] || _fail "cleanup DELETE removed ${DELETED_COUNT} rows (expected >=1)"
green "DELETE FROM work_items WHERE project='${VERIFY_PROJECT}' -> ${DELETED_COUNT} row(s) removed"
_record "7. cleanup|0|verify-smoke rows purged (${DELETED_COUNT})"
# ===========================================================================
# 8. summary
# ===========================================================================
bold ""
bold "[8. summary]"
GREEN_COUNT=0
RED_COUNT=0
for entry in "${RESULTS[@]}"; do
name="${entry%%|*}"
rest="${entry#*|}"
code="${rest%%|*}"
note="${rest#*|}"
if [[ "${code}" == "0" ]]; then
green "${name} ${note}"
GREEN_COUNT=$((GREEN_COUNT + 1))
else
red "${name} ${note}"
RED_COUNT=$((RED_COUNT + 1))
fi
done
bold ""
bold "verify.sh: ${GREEN_COUNT} passed, ${RED_COUNT} failed"
if [[ "${RED_COUNT}" -gt 0 ]]; then
exit 1
fi
echo "evidence: ${LOG_FILE}"
echo " (re-run with: bash scripts/verify.sh 2>&1 | tee ${LOG_FILE})"
exit 0