Files
lore-engine-poc/examples/run_questions.sh
Hermes cfc555925d v2.T4: LLM consumer driving the 16-tool MCP gateway end-to-end
- examples/llm_consumer.py: raw httpx + urllib driver — discovers tools
  via tools/list, runs the tool-use loop against LiteLLM (minimax-m3), saves
  per-question JSON traces. No agent framework per task scope.
- examples/system_prompt.txt: 5 question types + tool protocol (per
  lore-engine/docs/07-reasoning-harness.md).
- examples/run_questions.sh: bash driver — exits 0 iff all 5 questions pass
  hand-verified correctness against the seed data.
- examples/results/*.json: traces from a real end-to-end run, all 5 PASS.
- examples/REPORT.md: per-question ground truth vs answer, with tool-call
  audit. The model used 9 distinct tools across 5 questions (requirement
  was >=4); every factual claim is grounded in a tool result; no
  fabrication.
2026-06-16 22:47:52 +00:00

60 lines
2.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# run_questions.sh — drive all 5 question types end-to-end via llm_consumer.py.
#
# Saves JSON traces under examples/results/. Prints a one-line PASS/FAIL summary.
# Exits 0 only if all 5 questions pass their hand-verified evaluation.
set -uo pipefail
cd "$(dirname "$0")"
mkdir -p results
# Pre-flight: gateway + LiteLLM reachable.
GATEWAY_URL="${GATEWAY_URL:-http://localhost:8765/mcp}"
LITELLM_URL="${LITELLM_URL:-http://localhost:4000/v1}"
curl -s --max-time 5 -X POST "$GATEWAY_URL" -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' >/dev/null \
|| { echo "FAIL: gateway unreachable at $GATEWAY_URL"; exit 1; }
curl -s --max-time 5 "$LITELLM_URL/models" >/dev/null \
|| { echo "FAIL: LiteLLM unreachable at $LITELLM_URL"; exit 1; }
# Defaults match llm_consumer.py's defaults; export them so the consumer
# doesn't have to be re-invoked with flags on every change.
export GATEWAY_URL="${GATEWAY_URL:-http://localhost:8765/mcp}"
export LITELLM_URL="${LITELLM_URL:-http://localhost:4000/v1}"
export LITELLM_MODEL="${LITELLM_MODEL:-minimax-m3}"
declare -a IDS=(
"q1_who_is_aldric"
"q2_was_allied_230"
"q3_aldric_ancestors"
"q4_images_of_aldric"
"q5_consistency_issues"
)
declare -a QS=(
"Who is Aldric Raventhorne? Give a brief bio and his known relations."
"Was House Vyr allied with the Merchants Guild at 2nd_age.year_230?"
"What is the lineage / ancestry of Aldric Raventhorne? Walk back as far as you can."
"Show me images of Aldric Raventhorne — portraits or otherwise."
"What are the open consistency issues in the world graph right now? Check contradictions, anachronisms, orphans, and ontology violations."
)
overall=0
for i in "${!IDS[@]}"; do
id="${IDS[$i]}"
q="${QS[$i]}"
echo "============================================================"
echo "[$((i+1))/5] $id"
echo "============================================================"
if ! python3 llm_consumer.py --question-id "$id" --question "$q" \
--out "results/${id}.json"; then
overall=1
fi
done
echo
echo "============================================================"
echo "DONE — results in examples/results/"
ls -1 results/
echo "============================================================"
exit $overall