feat: docs pass, test fixes, advanced review
Some checks failed
tests / Unit tests (Node 22) (push) Failing after 28s

This commit is contained in:
2026-06-19 16:15:06 +00:00
parent e2c92e854f
commit fbd991a2b0
22 changed files with 590 additions and 552 deletions

143
README.md
View File

@@ -2,7 +2,7 @@
A Discord-native, LLM-driven D&D encounter system for the Land of Mardonar.
Discord threads are encounter sessions. The LLM narrates, voices NPCs, tracks
hidden goals, emits skill checks, and logs everything to Neo4j.
hidden goals, emits skill checks, and logs everything to GraphMCP.
---
@@ -12,9 +12,9 @@ hidden goals, emits skill checks, and logs everything to Neo4j.
|---|---|
| Discord bot | discord.js v14 |
| Language | TypeScript (Node.js, ESM) |
| LLM | gemma4-it:e2b via Ollama |
| LLM | gemma4-it:e2b via Ollama (with LiteLLM as optional primary) |
| Session cache | Redis (ioredis) |
| Persistence | Neo4j 5 (neo4j-driver) |
| NPC memory / events | GraphMCP (external JSON-RPC server) |
| Schema validation | Zod |
| Test runner | Vitest |
@@ -23,9 +23,10 @@ hidden goals, emits skill checks, and logs everything to Neo4j.
## Prerequisites
- Node.js 20+
- Docker + Docker Compose (for local Redis and Neo4j)
- Docker + Docker Compose (for local Redis)
- Ollama running on your network with `gemma4-it:e2b` pulled
- A Discord bot token and application ID
- A reachable GraphMCP JSON-RPC server (separate stack — see `docs/architecture.md` §2)
---
@@ -53,15 +54,9 @@ DISCORD_CLIENT_ID=your_discord_application_id
REDIS_URL=redis://localhost:6379
NEO4J_URI=bolt://localhost:7687
NEO4J_USER=neo4j
NEO4J_PASSWORD=mardonardev
# Point at your Ollama node — can be a LAN IP
OLLAMA_BASE_URL=http://192.168.1.x:11434
OLLAMA_MODEL=gemma4-it:e2b
LOG_LEVEL=debug
```
### 3. Start local services
@@ -70,8 +65,8 @@ LOG_LEVEL=debug
docker compose -f docker-compose.dev.yml up -d
```
This starts Redis on `localhost:6379` and Neo4j on `localhost:7687`.
Neo4j browser UI is available at `http://localhost:7474` (login: neo4j / mardonardev).
This starts Redis on `localhost:6379`. GraphMCP is expected to be reachable
on the `mardonar-internal` Docker network — see `docs/deployment-guide.md`.
### 4. Register Discord slash commands
@@ -97,48 +92,63 @@ npm run start # run compiled output
mardonar-bot/
├── src/
│ ├── bot/
│ │ ├── commands/
│ │ │ ├── dndname.ts # /dndname set|show|clear
│ │ │ ── encounter.ts # /encounter start|status|end
│ │ ├── embeds/
│ │ │ ├── skillCheck.ts # Skill check embed builder
│ │ │ ├── playerGate.ts # "Please register your name" embed
│ │ │ ── resolution.ts # Encounter complete embed
│ │ └── handlers/
│ │ └── messageRouter.ts # Main event loop for encounter threads
│ ├── session/
│ │ ├── playerRegistry.ts # Redis: discordId → dndName
│ │ └── sessionManager.ts # Redis: threadId → SessionState
├── harness/
│ │ ├── promptBuilder.ts # System prompt assembly
│ │ ├── contextAssembler.ts # History + token budget management
│ │ ├── ollamaClient.ts # Ollama API client
│ │ ├── toolParser.ts # tool_call block detection and parsing
│ │ └── toolDispatcher.ts # Routes parsed tool calls to handlers
│ ├── mcp/
│ │ ├── server.ts # MCP server setup (@modelcontextprotocol/sdk)
│ │ ── tools/
│ │ ├── skillCheckEmit.ts
│ │ ├── skillCheckResolve.ts
│ │ ├── eventLogAppend.ts
│ ├── npcMemoryRead.ts
│ │ ├── npcMemoryWrite.ts
│ │ ── encounterResolve.ts
│ │ ├── commands/ # Slash command modules (data + execute)
│ │ │ ├── dndname.ts # /dndname set|show|clear
│ │ │ ── encounter.ts # /encounter start|status|end
│ │ │ ├── character.ts # /character register|show|clear
│ │ │ ├── roll.ts # /roll <dice>
│ │ │ ├── actions.ts # /action <verb>
│ │ │ ── turn.ts # /turn pass|list
│ │ │ ├── xp.ts # /xp grant|show
│ │ └── encounters.ts # /encounters list|info
│ ├── embeds/ # Discord embed builders
│ │ │ ├── skillCheck.ts
│ │ │ ├── playerGate.ts
│ │ ├── resolution.ts
│ │ │ ├── encounterDiscovery.ts
│ │ │ └── loreAnswer.ts
│ │ ├── handlers/ # Event handlers and queues
│ │ │ └── messageRouter.ts # Main event loop for encounter threads
│ │ └── index.ts # discord.js client + startup
│ ├── session/ # Redis-backed registries and state
│ │ ├── playerRegistry.ts
│ │ ── characterRegistry.ts
│ │ ├── sessionManager.ts
│ │ ├── encounterLog.ts
│ │ └── xpAwarder.ts
├── harness/ # LLM orchestration
│ │ ├── promptBuilder.ts
│ │ ── contextAssembler.ts
│ │ ├── llmClient.ts # LiteLLM primary, Ollama fallback
│ │ ├── litellmClient.ts
│ │ ├── ollamaClient.ts
│ │ ├── toolParser.ts
│ │ ├── toolDispatcher.ts
│ │ ├── toolRegistry.ts
│ │ └── tools/ # Tool plugin implementations
│ ├── graphmcp/ # GraphMCP JSON-RPC client (NPC lore, events)
│ │ ├── client.ts
│ │ ├── ingest.ts
│ │ ├── loreResolver.ts
│ │ └── vocabularyResolver.ts
│ ├── vtt/ # Optional Foundry VTT relay
│ │ ├── foundryClient.ts
│ │ └── relaySession.ts
│ ├── db/
│ │ ── redis.ts # ioredis singleton
│ └── neo4j.ts # neo4j-driver singleton + runQuery helper
│ ├── spec/
│ └── loader.ts # YAML spec loader + Zod validation
│ ├── config.ts # Zod-validated env vars
│ └── types/
└── index.ts # All shared TypeScript interfaces
│ │ ── redis.ts # ioredis singleton
├── spec/loader.ts # YAML spec loader + Zod validation
│ ├── persona/loader.ts # persona.yaml loader for @mention
├── lib/
│ ├── logger.ts # Custom plaintext stdout logger
│ └── historyTrim.ts # Shared chat-history trimmer
├── scripts/deploy-commands.ts # Slash command registration (REST v10)
│ ├── config.ts # Zod-validated env vars
│ └── types/index.ts # All shared TypeScript types
├── specs/
│ └── market-thief.yaml # Example encounter spec
│ └── market-thief.yaml # Example encounter spec
├── tests/
│ ├── unit/
│ └── integration/
├── scripts/
│ └── deploy-commands.ts # Registers slash commands with Discord
│ ├── unit/ # 33 unit test files
│ └── integration/ # Phase1 integration tests
├── docker-compose.dev.yml
├── package.json
├── tsconfig.json
@@ -184,7 +194,7 @@ See `specs/market-thief.yaml` for a fully annotated example.
### Key fields
```yaml
encounterId: # Unique ID — used as Neo4j node key
encounterId: # Unique ID — used as the encounter session key
title: # Display name shown in Discord embeds
setting: # location, mood, ambientNpcs (all strings)
openingNarrative: # The scene-setting text posted at session start
@@ -208,7 +218,7 @@ skillChecks: # Named DCs e.g. chase_dc: 13
5. Ollama response is parsed:
- Narrative text → posted to the thread
- tool_call block → dispatched silently
6. Tool results (skill check embeds, Neo4j writes) happen automatically
6. Tool results (skill check embeds, GraphMCP writes) happen automatically
7. When a goal is reached, LLM calls encounter_resolve
8. Bot posts the resolution embed and archives the thread
```
@@ -249,10 +259,11 @@ block are always preserved.
## NPC Memory
Named NPCs with a `memoryKey` in their spec have persistent memory in Neo4j.
Named NPCs with a `memoryKey` in their spec have persistent memory in the
GraphMCP-backed graph (long-term NPC lore, prior encounter history, etc.).
At session start, their memory facts are loaded and injected into the system
prompt. At encounter resolution, any `npc_memory_write` tool calls are
committed to the graph.
prompt. At encounter resolution, the `foundry_reward` tool call commits new
facts to the graph.
This means a NPC like Miriam can remember that your party helped her
in a previous encounter — or that you let the thief go.
@@ -267,7 +278,8 @@ npm run test:unit # unit tests only (no external services)
npm run test:int # integration tests (requires Docker services running)
```
Integration tests require live Redis and Neo4j. Start them first:
Integration tests require live Redis and a reachable GraphMCP endpoint. Start
Redis first:
```bash
docker compose -f docker-compose.dev.yml up -d
@@ -279,9 +291,10 @@ npm run test:int
## Adding a New Encounter
1. Copy `specs/market-thief.yaml` to `specs/your-encounter.yaml`
2. Fill in all required fields
3. Test spec loading: `npm run validate-spec your-encounter`
2. Fill in all required fields (see `EncounterSpec` in `src/spec/loader.ts`)
3. Run `npm run build` to confirm the spec passes Zod validation
4. Run `/encounter start your-encounter` in Discord
5. Re-deploy slash commands: `npm run deploy-commands`
---
@@ -289,7 +302,7 @@ npm run test:int
The bot is a single Node.js process. It connects to:
- Redis (ioredis) for session and player registry
- Neo4j (neo4j-driver) for NPC memory and event logs
- GraphMCP JSON-RPC server for NPC memory and event logs
- Ollama over HTTP for LLM inference
- Discord over WebSocket (discord.js)
@@ -309,7 +322,11 @@ node dist/bot/index.js
## Architecture Documents
Full design rationale and phased build plan are in `docs/`:
Full design rationale and the phased build plan are in `docs/`:
- `docs/mardonar-encounter-engine.md` — system overview and key decisions
- `docs/mardonar-build-plan.md` — phased build plan with packages and test guidance
- `docs/architecture.md` — system overview, design decisions, drift log
- `docs/development-guide.md` — local development + test conventions
- `docs/deployment-guide.md` — Docker, env vars, deploy-commands flow
- `docs/api-contracts.md` — slash command and tool interface contracts
> **Note:** `Docs/mardonar-encounter-engine.md` and `Docs/mardonar-build-plan.md` (capital `D`) are historical documents from an earlier Go-based design and are not kept in sync. They are retained as a record of the project's evolution.

View File

@@ -27,7 +27,7 @@ The Mardonar Encounter Engine is a Discord bot that runs structured D&D encounte
| Lore / NPC memory | GraphMCP HTTP JSON-RPC | (env: `GRAPHMCP_URL`) | 6 RPC tools exposed |
| Foundry VTT | VTT relay HTTPS | (env: `VTT_RELAY_URL`) | Optional, requires API key |
| Validation | Zod | 3.24 | env + encounter spec |
| Logging | pino + pino-pretty | 9.6 / 13 | structured JSON in prod |
| Logging | custom (src/lib/logger.ts) | — | plaintext stdout; no env-driven level filter |
| Testing | Vitest | 3.1 | `tests/unit` + `tests/integration` |
| Build | tsc → dist/ | 5.8 | multi-stage Dockerfile |
@@ -93,7 +93,7 @@ mardonar-bot/
│ ├── db/redis.ts # ioredis singleton (lazy connect)
│ ├── spec/loader.ts # YAML loader + Zod schema
│ ├── persona/loader.ts # persona.yaml loader for @mention
│ ├── lib/logger.ts # pino wrapper
│ ├── lib/logger.ts # custom tag+message logger (plaintext stdout)
│ ├── config.ts # Zod env schema + parsed config singleton
│ ├── scripts/deploy-commands.ts # Slash command registration (REST v10)
│ └── types/index.ts # Shared interfaces + CONTEXT_BUDGET const
@@ -122,7 +122,7 @@ mardonar-bot/
├── persona.yaml # Zalram Cloudwalker (bot's @mention persona)
├── prd.md # Active PRD: Dynamic Goal Registration
├── Dockerfile # Multi-stage node:22-alpine
├── docker-compose.dev.yml # Local Redis + Neo4j
├── docker-compose.dev.yml # Builds the bot image; expects Redis + GraphMCP on the external `mardonar-internal` network
├── package.json
├── tsconfig.json
└── vitest.config.ts
@@ -331,7 +331,7 @@ Defined in `src/harness/tools/` and registered at module load. Each spec filters
### 6.1 Local development
```bash
docker compose -f docker-compose.dev.yml up -d # Redis + Neo4j
docker compose -f docker-compose.dev.yml up -d # Builds + runs bot; relies on Redis + GraphMCP already running on the `mardonar-internal` Docker network (see `docs/deployment-guide.md`)
npm install
npm run deploy-commands # registers slash commands with Discord
npm run dev # tsx watch mode
@@ -354,7 +354,7 @@ npm run dev # tsx watch mode
- Session state has a 12h TTL by default — stale encounters auto-expire
- Bot connects to Redis on `main()` startup (`redis.connect()`)
- VTT relay auto-spins up a headless Foundry session on connection failure (RSA-OAEP encrypted handshake)
- `LOG_LEVEL=info` in prod; pino writes structured JSON
- Logging: `src/lib/logger.ts` writes plaintext to stdout. No `LOG_LEVEL` env knob; callers pick the level per-call. (Earlier docs claimed pino + structured JSON — that was aspirational; the pino deps were unused and have been removed.)
---
@@ -374,11 +374,11 @@ npm run dev # tsx watch mode
### 7.2 Test coverage
- 21 unit test files in `tests/unit/`
- 33 unit test files in `tests/unit/` (393 tests, 2 skipped)
- 1 integration test (`tests/integration/phase1.test.ts`)
- `tests/fixtures/spec.ts` — shared encounter spec fixture
Notable test surfaces: `promptBuilder`, `contextAssembler`, `toolParser`, `toolDispatcher`, `sessionManager`, `playerRegistry`, `characterRegistry`, `specLoader`, `rollHandler`, `rollDetection`, `responseFilter`, `queueCap`, `generationQueue`, `reactionManager`, `encounterLog`, `encounterDiscoveryEmbed`, `loreAnswerEmbed`, `skillCheckEmbed`, `graphmcpClient`, `foundryClientRetry`, `foundryClientFormatters`, `goalRegister`, `relaySession`.
Notable test surfaces: `promptBuilder`, `contextAssembler`, `historyTrim`, `toolParser`, `toolDispatcher`, `toolRegistry`, `sessionManager`, `playerRegistry`, `characterRegistry`, `specLoader`, `rollHandler`, `rollDetection`, `responseFilter`, `queueCap`, `generationQueue`, `reactionManager`, `encounterLog`, `encounterDiscoveryEmbed`, `loreAnswerEmbed`, `skillCheckEmbed`, `graphmcpClient`, `foundryClientRetry`, `foundryClientFormatters`, `goalRegister`, `relaySession`, `litellmClient`, `ollamaClient`, `personaLoader`, `foundryReward`, `xpAwarder`, `redisErrorPath`, `messageRouterRunLLMTurn`, `specsToolsConsistency` (the last is a structural-consistency guard, not a module surface).
---
@@ -404,14 +404,16 @@ Notable test surfaces: `promptBuilder`, `contextAssembler`, `toolParser`, `toolD
Items the deep scan surfaced that aren't bugs but should be tracked:
- **Drift: `Docs/mardonar-encounter-engine.md` describes a Go bot with an embedded MCP layer; the actual code is TypeScript with an external JSON-RPC GraphMCP server.** Treat the doc as historical/aspirational.
- **Drift: `README.md`'s "Project Structure" tree references `src/mcp/` and the old `src/bot/commands/{dndname,encounter}.ts` layout.** Update README, or trim it to a pointer to the index.
- **Duplicate `trimHistory` logic** in `src/session/sessionManager.ts` and `src/harness/contextAssembler.ts` (identical body). Could be extracted to `src/lib/historyTrim.ts`.
- **Resolved 2026-06-19 — `README.md`'s "Project Structure" tree referenced `src/mcp/` and the old 2-command layout.** README now reflects the actual 8-command structure, `src/graphmcp/` (Neo4j/`src/mcp/` retired), and includes a callout noting `Docs/mardonar-encounter-engine.md` is historical.
- **Resolved 2026-06-19 — Duplicate `trimHistory` logic** in `src/session/sessionManager.ts` and `src/harness/contextAssembler.ts` was extracted to `src/lib/historyTrim.ts`. `tests/unit/historyTrim.test.ts` covers the shared module at 100%.
- **No production compose file** — only `docker-compose.dev.yml`. The Dockerfile is production-ready but deployment is ad-hoc.
- **No CI/CD** — `.github/workflows/` does not exist.
- **Resolved 2026-06-19 — No CI/CD**`.gitea/workflows/test.yml` runs `tsc --noEmit`, `npm run test:unit`, and `npm run test:coverage` on push/PR to `main` (Node 22, cached npm).
- **`DISCORD_ALLOWED_USERS` is empty by default → anyone in allowed channels can run `/encounter start`.** The access control is channel-scoped, not user-scoped; admins need to set the env var explicitly.
- **`OLLAMA_BASE_URL` defaults to `localhost`** — fine for dev, but production needs the LAN IP or proxy URL set.
- **Spec tool list must be kept in sync** — `specs/*.yaml` declare `tools: [...]`, but no test verifies every referenced tool is registered. A stale spec name silently filters to no active tools.
- **Schema mismatch risk:** `types/index.ts` `EncounterSpec` and `spec/loader.ts` Zod schema have diverged slightly — `EncounterSpec` is missing `tone`, `tools`, `randomizable`, and `npcs.nameKey`. `assembleContext` reads `spec.tone`; `loader` doesn't validate it. Consider regenerating `types/index.ts` from the Zod schema via `z.infer`.
- **Resolved 2026-06-19 — Spec tool list must be kept in sync**`tests/unit/specsToolsConsistency.test.ts` walks every `specs/*.yaml`, asserts each entry in `tools: [...]` is registered in the tool plugin registry, and fails loudly with the file and unknown name if drift appears. Also asserts every registered tool is referenced by at least one spec.
- **Resolved 2026-06-19 — Schema mismatch risk:** `src/types/index.ts` now re-exports `EncounterSpec` (and its sub-shapes) derived from `z.infer<typeof EncounterSpecSchema>`. The static type and the runtime validator are now the same source of truth — drift is structurally impossible. Side effect: `loadSpec` now also validates `xpReward` as a number (was previously typed but unenforced).
-**Resolved 2026-06-19 — Logging drift:** the architecture previously claimed `pino + pino-pretty` + structured JSON. The actual logger is the custom `src/lib/logger.ts` (plaintext stdout, no env-driven level filter). The unused `pino` and `pino-pretty` dependencies were removed from `package.json`; §2.1, §2.2, and §6.3 now describe reality.
-**Resolved 2026-06-19 — README drift:** `README.md` was significantly out of date: it told new contributors to set a no-op `LOG_LEVEL=debug`, run the non-existent `npm run validate-spec`, and look at `src/mcp/` (renamed to `src/graphmcp/`) and `src/db/neo4j.ts` (no Neo4j in the project). It also linked `Docs/mardonar-encounter-engine.md` (Go architecture, historical) as the current architecture doc. The dead top-level `scripts/deploy-commands.ts` — a stale duplicate of `src/scripts/deploy-commands.ts` that only knew about 2 of 8 commands — was removed. The README now reflects the actual layout, command set, and persistence layer.
---

View File

@@ -114,21 +114,20 @@
| Type | File | Purpose |
|---|---|---|
| `EncounterSpec` | `types/index.ts` | Spec shape (note: diverged slightly from Zod schema — see architecture.md §9) |
| `NpcPersona` | `types/index.ts` | NPC definition |
| `EncounterGoal` / `EncounterGoals` | `types/index.ts` | Primary/secondary goals |
| `EncounterSpec` | `types/index.ts` (re-exported from `spec/loader.ts`) | Spec shape — derived from the Zod schema via `z.infer`; cannot drift from the runtime validator |
| `NpcPersona` | `types/index.ts` (re-exported from `spec/loader.ts`) | NPC definition |
| `EncounterGoal` / `EncounterGoals` | `types/index.ts` (re-exported from `spec/loader.ts`) | Primary/secondary goals |
| `SessionState` | `types/index.ts` | Full session shape |
| `ChatMessage` | `types/index.ts` | History turn (with `pinned` flag) |
| `HeldMessage` | `types/index.ts` | Pre-registration messages |
| `ToolCallBlock` / `LLMResponse` | `types/index.ts` | LLM tool surface |
| `ToolName` | `types/index.ts` | Discriminated union of valid tools |
| `*Args` per tool | `types/index.ts` | Per-tool arg types |
| `NpcNode` / `EncounterNode` / `EncounterEventNode` | `types/index.ts` | Neo4j graph node types |
| `ToolName` | `types/index.ts` | String alias — the actual set of valid tools is enforced at runtime by the plugin registry (see `harness/toolRegistry.ts`) |
| `*Args` per tool | `harness/tools/<name>.ts` | Per-tool arg types live next to the plugin that uses them |
| `CONTEXT_BUDGET` (const) | `types/index.ts` | Hard token budget zones |
## Config & logging
| Component | File | Role |
|---|---|---|
| `config` (singleton) | `config.ts` | Zod-validated env (Discord, Redis, LiteLLM, Ollama, GraphMCP, VTT, persona, logging) |
| `log` (pino wrapper) | `lib/logger.ts` | Structured logging with `pino-pretty` in dev |
| `config` (singleton) | `config.ts` | Zod-validated env (Discord, Redis, LiteLLM, Ollama, GraphMCP, VTT, persona) |
| `log` | `lib/logger.ts` | Custom plaintext logger writing to stdout. No `pino-pretty`; no env-driven level filter — callers pick the level per call. |

View File

@@ -2,7 +2,7 @@
> Persistent and transient data shapes in the Mardonar Encounter Engine. Generated 2026-06-19.
The bot's data lives in three places: Redis (transient session state), the filesystem (`data/`, runtime artifacts), and the GraphMCP-backed Neo4j graph (long-term NPC memory + encounter history). The bot does not query Neo4j directly — it goes through the GraphMCP JSON-RPC client.
The bot's data lives in three places: Redis (transient session state), the filesystem (`data/`, runtime artifacts), and the GraphMCP-backed graph — Neo4j, accessed through GraphMCP JSON-RPC. The bot itself does not query Neo4j directly.
## Encounter spec (YAML → Zod → TypeScript)
@@ -10,7 +10,7 @@ Defined by `EncounterSpecSchema` in `src/spec/loader.ts`. Loaded by `/encounter
```ts
{
encounterId: string, // unique ID — also Neo4j node key
encounterId: string, // unique ID — encounter session key in Redis
title: string, // display name in Discord embeds
tone?: string, // "tense" | "comedic" | ... optional flavor block
setting: {
@@ -126,9 +126,9 @@ System messages are emitted by the harness for tool results, filter corrections,
}
```
## Neo4j graph (via GraphMCP)
## GraphMCP graph
The bot does not directly define the Neo4j schema — it consumes whatever GraphMCP returns. The conceptual model based on the GraphMCP client types and the legacy design doc:
The bot does not directly define the graph schema — it consumes whatever GraphMCP returns. The conceptual model based on the GraphMCP client types and the legacy design doc:
```
(:NPC {id, name, persona_summary, memory: [], last_seen_encounter})
@@ -177,21 +177,26 @@ type LLMResponse = {
}
```
Tool names (`src/types/index.ts`):
Tool names:
```ts
type ToolName =
| 'skill_check_emit'
| 'skill_check_resolve' // (defined in types but no longer registered — see architecture.md §9)
| 'event_log_append' // (defined in types but no longer registered)
| 'npc_memory_read' // (defined in types but no longer registered)
| 'npc_memory_write' // (defined in types but no longer registered)
| 'encounter_resolve'
| 'goal_register'
| 'context_recall'
| 'foundry_lookup'
| 'foundry_reward';
```
The `ToolName` type in `src/types/index.ts` is `string` (a string alias), not a
discriminated union — the actual set of valid tools is enforced at runtime by
the plugin registry (`src/harness/toolRegistry.ts`, populated by side-effect
imports in `src/harness/tools/index.ts`). `tests/unit/specsToolsConsistency.test.ts`
catches drift between the registry and `specs/*.yaml` `tools:` lists.
Currently registered tools (`src/harness/tools/`):
| Name | File | Purpose |
|---|---|---|
| `skill_check_emit` | `skillCheckEmit.ts` | Posts a skill-check embed and updates `pendingSkillCheck` |
| `encounter_resolve` | `encounterResolve.ts` | Writes the encounter summary and archives the thread |
| `context_recall` | `contextRecall.ts` | Returns canonical facts from `resolvedContext` |
| `goal_register` | `goalRegister.ts` | Adds a dynamic goal mid-encounter |
| `foundry_lookup` | `foundryLookup.ts` | Live VTT actor data |
| `foundry_reward` | `foundryReward.ts` | XP / item grant to a VTT actor |
Removed in earlier refactors: `skill_check_resolve`, `event_log_append`, `npc_memory_read`, `npc_memory_write` — see `docs/architecture.md §9`. Their work is now handled by the per-encounter event log + GraphMCP `log_encounter` and `query_as_npc` RPC methods.
The four `*_resolve / *_read / *_write` entries are **dead** in the current implementation — replaced by GraphMCP `log_encounter` and other RPC calls. They should be removed from the type union (or actually re-implemented) to avoid confusion.

View File

@@ -139,9 +139,6 @@ GRAPHMCP_INGEST_STREAM=raw.messages
# Persisted state
DATA_DIR=/app/data # or wherever you mount the volume
# Logging
LOG_LEVEL=info
```
> ⚠ **Security note:** `DISCORD_ALLOWED_CHANNELS` is **empty by default**, which means the bot will respond in **no channels**. This is secure-by-default but easy to misconfigure. Set it explicitly.
@@ -168,14 +165,14 @@ A simple `docker` healthcheck using Discord WebSocket isn't trivially scriptable
## Logging
The bot uses pino. In dev, `pino-pretty` formats to a human-readable stream. In prod, pino emits structured JSON to stdout — pipe to your log shipper (Loki, CloudWatch, etc.).
The bot logs to stdout via a small custom logger (`src/lib/logger.ts`): plaintext lines of the form `[tag] message key=value key=value`. There is no `LOG_LEVEL` env knob and no env-driven level filter — callers pick the level per call (`log.info`, `log.warn`, `log.error`, `log.debug`). Pipe container stdout to your log shipper (Loki, CloudWatch, etc.) if you need aggregation.
Useful fields to index:
Useful tokens to index/filter on:
- `level`, `time`, `msg`
- `threadId`, `encounterId` (for encounter-specific queries)
- `latencyMs` (for LLM and tool latency)
- `error` (for failure analysis)
- The `[tag]` prefix (e.g. `[bot]`, `[redis]`, `[encounter]`) identifies the subsystem.
- `threadId`, `encounterId` fields (for encounter-specific queries).
- `latencyMs` (for LLM and tool latency).
- `error` (for failure analysis).
## Operational runbook
@@ -215,5 +212,5 @@ These are real but not blockers:
- **No production compose file** — only `docker-compose.dev.yml`. Production deploy is ad-hoc.
- **No CI/CD** — no `.github/workflows/`. Build and deploy are manual.
- **No health endpoint** — no HTTP probe target.
- **No metrics export** — pino logs are the only observability surface.
- **No metrics export** — the plaintext stdout logs are the only observability surface.
- **`docker-compose.dev.yml` references an external Docker network (`mardonar-internal`)** — fine for the dev stack it's designed for, but a fresh deployment needs to either join the same network or remove the reference.

View File

@@ -53,7 +53,7 @@ npm run build # compile TypeScript to dist/
npm run start # run the compiled output
```
The bot logs to stdout (pino with `pino-pretty` in dev). Set `LOG_LEVEL=debug` for verbose output.
The bot logs to stdout via a small custom logger (`src/lib/logger.ts`): plaintext `[tag] message key=value` lines. There is no `LOG_LEVEL` env knob — callers pick the level per call (`log.info` / `log.warn` / `log.error` / `log.debug`).
## Testing
@@ -138,7 +138,6 @@ The tool's `args` schema (string / number / boolean) is surfaced to the LLM via
| `VTT_USERNAME` | empty | Foundry username |
| `VTT_PASSWORD` | empty | Foundry password (encrypted with RSA-OAEP for handoff) |
| `VTT_WORLD` | empty | Foundry world to launch |
| `LOG_LEVEL` | `info` | `trace` / `debug` / `info` / `warn` / `error` |
## Common tasks
@@ -158,7 +157,7 @@ In Discord, in an encounter thread: `/encounter status`
`/encounter audit` (DMs the file) — or read `data/summaries/` directly
### Tail the bot log
With pino-pretty in dev, logs are pretty-printed to stdout. In prod, pipe container stdout to your log shipper.
The custom logger writes plaintext `[tag] message key=value` lines to stdout. In prod, pipe container stdout to your log shipper.
### Reset Redis state
```bash

View File

@@ -92,14 +92,20 @@ Plus `specs/` (8 encounter YAML files), `tests/` (22 test files), `data/` (runti
## Known drift and open issues
- `Docs/mardonar-encounter-engine.md` describes a Go bot with embedded MCP — superseded by `docs/architecture.md` but still referenced by the README.
- `README.md`'s project-structure tree is out of date (mentions `src/mcp/`, missing commands).
- `src/types/index.ts` `EncounterSpec` diverged from `src/spec/loader.ts` Zod schema (missing `tone`, `tools`, `randomizable`, `nameKey`).
- Duplicate `trimHistory` between `sessionManager.ts` and `contextAssembler.ts`.
- No production `docker-compose.yml`, no CI/CD, no HTTP health endpoint.
- `DISCORD_ALLOWED_USERS` empty by default — channel-scoped access only.
Resolved in the 2026-06-19 /loop improvement pass (see `docs/architecture.md §9` for the full list with dates):
See `docs/architecture.md §9` for full drift list.
- ~~`Docs/mardonar-encounter-engine.md` describes a Go bot with embedded MCP — superseded by `docs/architecture.md` but still referenced by the README.~~ README now points to the right doc and notes the historical status of `Docs/mardonar-encounter-engine.md`.
- ~~`README.md`'s project-structure tree is out of date (mentions `src/mcp/`, missing commands).~~ README tree now reflects the actual 8-command layout and `src/graphmcp/` (Neo4j/old `src/mcp/` were both retired).
- ~~`src/types/index.ts` `EncounterSpec` diverged from `src/spec/loader.ts` Zod schema (missing `tone`, `tools`, `randomizable`, `nameKey`).~~ `EncounterSpec` is now `z.infer<typeof EncounterSpecSchema>` — the static type and the runtime validator cannot drift. `src/types/index.ts` re-exports it.
- ~~Duplicate `trimHistory` between `sessionManager.ts` and `contextAssembler.ts`.~~ Extracted to `src/lib/historyTrim.ts`; `tests/unit/historyTrim.test.ts` covers the shared module at 100%.
Still open:
- No production `docker-compose.yml`, no CI/CD, no HTTP health endpoint. CI was added in 2026-06-19 (`.gitea/workflows/test.yml`) — the other two remain open.
- `DISCORD_ALLOWED_USERS` empty by default — channel-scoped access only.
- Two tracked dead-code files at the project root: `index.ts` and `promptBuilder.ts`. These are stale duplicates from before the project was reorganized into `src/`; `tsconfig.json` has `rootDir: "src"` so they are never compiled or imported. `git rm` is pending user approval.
See `docs/architecture.md §9` for the canonical drift log.
## When you're ready to plan new features

View File

@@ -0,0 +1,92 @@
{
"workflow_version": "1.2.0",
"timestamps": {
"started": "2026-06-19T05:15:07Z",
"last_updated": "2026-06-19T05:35:00Z",
"completed": "2026-06-19T05:35:00Z"
},
"mode": "initial_scan",
"scan_level": "deep",
"project_root": "/home/kaykayyali/hosting/mardonar-npcs",
"project_knowledge": "/home/kaykayyali/hosting/mardonar-npcs/docs",
"completed_steps": [
{ "step": "step_1", "status": "completed", "timestamp": "2026-06-19T05:16:00Z", "summary": "Classified as monolith with 1 part (backend) — Discord.js v14 bot on Node 20 TypeScript with Neo4j + Redis + Ollama" },
{ "step": "step_2", "status": "completed", "timestamp": "2026-06-19T05:17:00Z", "summary": "Found 12 existing docs" },
{ "step": "step_3", "status": "completed", "timestamp": "2026-06-19T05:20:00Z", "summary": "Stack: Node 22 / TS ESM / discord.js v14 / LiteLLM primary + Ollama fallback / ioredis / neo4j-driver / GraphMCP JSON-RPC / Zod / pino / Vitest" },
{ "step": "step_4", "status": "completed", "timestamp": "2026-06-19T05:25:00Z", "summary": "Conditional analysis: 8 slash commands, 6 LLM tools, 5 embeds, 7 event handlers" },
{ "step": "step_5", "status": "completed", "timestamp": "2026-06-19T05:27:00Z", "summary": "Source tree written" },
{ "step": "step_6", "status": "completed", "timestamp": "2026-06-19T05:28:00Z", "summary": "Dev + deployment guides written" },
{ "step": "step_7", "status": "skipped", "timestamp": "2026-06-19T05:29:00Z", "summary": "Skipped — single-part project" },
{ "step": "step_8", "status": "completed", "timestamp": "2026-06-19T05:30:00Z", "summary": "Architecture written" },
{ "step": "step_9", "status": "completed", "timestamp": "2026-06-19T05:32:00Z", "summary": "Supporting docs written" },
{ "step": "step_10", "status": "completed", "timestamp": "2026-06-19T05:33:00Z", "summary": "Master index written" },
{ "step": "step_11", "status": "completed", "timestamp": "2026-06-19T05:34:00Z", "summary": "Validation: no incomplete markers; all 9 docs present and internally linked" },
{ "step": "step_12", "status": "completed", "timestamp": "2026-06-19T05:35:00Z", "summary": "Workflow finalized by user" }
],
"current_step": "completed",
"findings": {
"project_classification": "monolith, 1 part, backend",
"primary_tech": "TypeScript / Node.js 22 ESM, discord.js v14, LiteLLM (gemma4-it:e2b via Ollama), ioredis, neo4j-driver, GraphMCP JSON-RPC, Zod, pino, Vitest, Docker",
"project_type_id": "backend",
"existing_docs_count": 12,
"user_context": "Proceed as-is, cross-reference existing docs and specs",
"tech_stack": {
"language": "TypeScript 5.8 (ESM, NodeNext modules)",
"runtime": "Node.js 22 (Dockerfile: node:22-alpine)",
"framework": "discord.js v14 (Discord bot)",
"llm_primary": "LiteLLM proxy (LITELLM_BASE_URL)",
"llm_fallback": "Ollama via ollama npm + direct HTTP",
"session_cache": "Redis (ioredis)",
"graph_db": "Neo4j (via GraphMCP JSON-RPC)",
"lore_memory": "GraphMCP HTTP JSON-RPC server",
"foundry_integration": "VTT relay (https://vtt-relay.damascusfront.net)",
"validation": "Zod (env + spec)",
"logging": "pino + pino-pretty",
"testing": "Vitest 3 (tests/unit + tests/integration)",
"build": "tsc → dist/, multi-stage Dockerfile",
"container": "Docker Compose (docker-compose.dev.yml for local Redis/Neo4j)"
},
"architecture_pattern": "Layered backend with plug-in tool registry",
"source_files": 56,
"test_files": 24,
"specs": 8
},
"project_parts": [
{
"part_id": "mardonar-bot",
"display_name": "Mardonar Encounter Engine",
"root_path": "/home/kaykayyali/hosting/mardonar-npcs",
"project_type_id": "backend"
}
],
"outputs_generated": [
"project-scan-report.json",
"project-overview.md",
"architecture.md",
"source-tree-analysis.md",
"component-inventory.md",
"development-guide.md",
"deployment-guide.md",
"api-contracts.md",
"data-models.md",
"index.md"
],
"verification_summary": "Read 13 source files (key modules: bot/index, messageRouter, toolDispatcher, toolParser, promptBuilder, llmClient, graphmcpClient, sessionManager, spec/loader, relaySession, skillCheckEmit, encounterLog, types/index). Inventoried all 8 slash commands via setName() scan. Inventoried all 6 tool plugins by side-effect imports. Verified all 9 generated docs are present and internally cross-linked. No incomplete-marker strings found in index.md.",
"open_risks": [
"Drift in pre-existing Docs/mardonar-encounter-engine.md (describes Go architecture) — flagged in architecture.md §9",
"Drift in README.md project-structure tree — flagged in architecture.md §9",
"EncounterSpec type vs Zod schema divergence (tone/tools/randomizable/nameKey) — flagged in architecture.md §9",
"Duplicate trimHistory between sessionManager.ts and contextAssembler.ts — flagged in architecture.md §9",
"No production docker-compose, no CI/CD, no HTTP health endpoint — flagged in architecture.md §9 and deployment-guide.md",
"Dead ToolName entries (skill_check_resolve, event_log_append, npc_memory_read, npc_memory_write) still in types/index.ts — flagged in data-models.md",
"DISCORD_ALLOWED_USERS empty by default — deployment guide calls this out"
],
"next_checks": [
"Before merging any change: run `npm run test:unit` (24 unit tests) and `npm run build`",
"Before deploying: re-run `npm run deploy-commands` after any src/bot/commands/* change",
"Before trusting a new spec: validate against the Zod schema in src/spec/loader.ts; add a fixture to tests/fixtures/",
"When adding a new tool: write a unit test in tests/unit/ before merging",
"When changing env: re-read src/config.ts — schema is the source of truth"
],
"resume_instructions": "Workflow complete. To deep-dive, set mode='deep_dive' in this file and current_step='step_13'."
}

276
package-lock.json generated
View File

@@ -17,8 +17,6 @@
"js-yaml": "^4.1.0",
"ollama": "^0.5.0",
"openai": "^6.39.0",
"pino": "^9.6.0",
"pino-pretty": "^13.0.0",
"zod": "^3.24.0"
},
"devDependencies": {
@@ -736,12 +734,6 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@pinojs/redact": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz",
"integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==",
"license": "MIT"
},
"node_modules/@pkgjs/parseargs": {
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
@@ -1410,15 +1402,6 @@
"js-tokens": "^10.0.0"
}
},
"node_modules/atomic-sleep": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz",
"integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==",
"license": "MIT",
"engines": {
"node": ">=8.0.0"
}
},
"node_modules/balanced-match": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
@@ -1508,12 +1491,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/colorette": {
"version": "2.0.20",
"resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz",
"integrity": "sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==",
"license": "MIT"
},
"node_modules/cross-spawn": {
"version": "7.0.6",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
@@ -1529,15 +1506,6 @@
"node": ">= 8"
}
},
"node_modules/dateformat": {
"version": "4.6.3",
"resolved": "https://registry.npmjs.org/dateformat/-/dateformat-4.6.3.tgz",
"integrity": "sha512-2P0p0pFGzHS5EMnhdxQi7aJN+iMheud0UhG4dlE1DLAlvL8JHjJJTX/CSm4JXwV0Ka5nGk3zC5mcb5bUQUxxMA==",
"license": "MIT",
"engines": {
"node": "*"
}
},
"node_modules/debug": {
"version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -1655,15 +1623,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/end-of-stream": {
"version": "1.4.5",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
"license": "MIT",
"dependencies": {
"once": "^1.4.0"
}
},
"node_modules/es-module-lexer": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz",
@@ -1733,24 +1692,12 @@
"node": ">=12.0.0"
}
},
"node_modules/fast-copy": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/fast-copy/-/fast-copy-4.0.3.tgz",
"integrity": "sha512-58apWr0GUiDFM8+3afrO6eYwJBn9ZAhDOzG3L+/9llab/haCARS2UIfffmOurYLwbgDRs8n0rfr6qAAPEAuAQw==",
"license": "MIT"
},
"node_modules/fast-deep-equal": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
"license": "MIT"
},
"node_modules/fast-safe-stringify": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/fast-safe-stringify/-/fast-safe-stringify-2.1.1.tgz",
"integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==",
"license": "MIT"
},
"node_modules/fdir": {
"version": "6.5.0",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
@@ -1894,12 +1841,6 @@
"node": ">=8"
}
},
"node_modules/help-me": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/help-me/-/help-me-5.0.0.tgz",
"integrity": "sha512-7xgomUX6ADmcYzFik0HzAxh/73YlKR9bmFzf51CZwR+b6YtzU2m0u49hQCqV6SvlqIqsaxovfwdvbnsw3b/zpg==",
"license": "MIT"
},
"node_modules/html-escaper": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
@@ -2039,15 +1980,6 @@
"@pkgjs/parseargs": "^0.11.0"
}
},
"node_modules/joycon": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz",
"integrity": "sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==",
"license": "MIT",
"engines": {
"node": ">=10"
}
},
"node_modules/js-tokens": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-10.0.0.tgz",
@@ -2165,15 +2097,6 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/minipass": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz",
@@ -2218,24 +2141,6 @@
"whatwg-fetch": "^3.6.20"
}
},
"node_modules/on-exit-leak-free": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz",
"integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==",
"license": "MIT",
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
"license": "ISC",
"dependencies": {
"wrappy": "1"
}
},
"node_modules/openai": {
"version": "6.39.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-6.39.0.tgz",
@@ -2328,76 +2233,6 @@
"url": "https://github.com/sponsors/jonschlinkert"
}
},
"node_modules/pino": {
"version": "9.14.0",
"resolved": "https://registry.npmjs.org/pino/-/pino-9.14.0.tgz",
"integrity": "sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w==",
"license": "MIT",
"dependencies": {
"@pinojs/redact": "^0.4.0",
"atomic-sleep": "^1.0.0",
"on-exit-leak-free": "^2.1.0",
"pino-abstract-transport": "^2.0.0",
"pino-std-serializers": "^7.0.0",
"process-warning": "^5.0.0",
"quick-format-unescaped": "^4.0.3",
"real-require": "^0.2.0",
"safe-stable-stringify": "^2.3.1",
"sonic-boom": "^4.0.1",
"thread-stream": "^3.0.0"
},
"bin": {
"pino": "bin.js"
}
},
"node_modules/pino-abstract-transport": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-2.0.0.tgz",
"integrity": "sha512-F63x5tizV6WCh4R6RHyi2Ml+M70DNRXt/+HANowMflpgGFMAym/VKm6G7ZOQRjqN7XbGxK1Lg9t6ZrtzOaivMw==",
"license": "MIT",
"dependencies": {
"split2": "^4.0.0"
}
},
"node_modules/pino-pretty": {
"version": "13.1.3",
"resolved": "https://registry.npmjs.org/pino-pretty/-/pino-pretty-13.1.3.tgz",
"integrity": "sha512-ttXRkkOz6WWC95KeY9+xxWL6AtImwbyMHrL1mSwqwW9u+vLp/WIElvHvCSDg0xO/Dzrggz1zv3rN5ovTRVowKg==",
"license": "MIT",
"dependencies": {
"colorette": "^2.0.7",
"dateformat": "^4.6.3",
"fast-copy": "^4.0.0",
"fast-safe-stringify": "^2.1.1",
"help-me": "^5.0.0",
"joycon": "^3.1.1",
"minimist": "^1.2.6",
"on-exit-leak-free": "^2.1.0",
"pino-abstract-transport": "^3.0.0",
"pump": "^3.0.0",
"secure-json-parse": "^4.0.0",
"sonic-boom": "^4.0.1",
"strip-json-comments": "^5.0.2"
},
"bin": {
"pino-pretty": "bin.js"
}
},
"node_modules/pino-pretty/node_modules/pino-abstract-transport": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-3.0.0.tgz",
"integrity": "sha512-wlfUczU+n7Hy/Ha5j9a/gZNy7We5+cXp8YL+X+PG8S0KXxw7n/JXA3c46Y0zQznIJ83URJiwy7Lh56WLokNuxg==",
"license": "MIT",
"dependencies": {
"split2": "^4.0.0"
}
},
"node_modules/pino-std-serializers": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.1.0.tgz",
"integrity": "sha512-BndPH67/JxGExRgiX1dX0w1FvZck5Wa4aal9198SrRhZjH3GxKQUKIBnYJTdj2HDN3UQAS06HlfcSbQj2OHmaw==",
"license": "MIT"
},
"node_modules/postcss": {
"version": "8.5.15",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.15.tgz",
@@ -2427,38 +2262,6 @@
"node": "^10 || ^12 || >=14"
}
},
"node_modules/process-warning": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz",
"integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fastify"
},
{
"type": "opencollective",
"url": "https://opencollective.com/fastify"
}
],
"license": "MIT"
},
"node_modules/pump": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz",
"integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==",
"license": "MIT",
"dependencies": {
"end-of-stream": "^1.1.0",
"once": "^1.3.1"
}
},
"node_modules/quick-format-unescaped": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz",
"integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==",
"license": "MIT"
},
"node_modules/readline-sync": {
"version": "1.4.10",
"resolved": "https://registry.npmjs.org/readline-sync/-/readline-sync-1.4.10.tgz",
@@ -2469,15 +2272,6 @@
"node": ">= 0.8.0"
}
},
"node_modules/real-require": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz",
"integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==",
"license": "MIT",
"engines": {
"node": ">= 12.13.0"
}
},
"node_modules/redis-errors": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/redis-errors/-/redis-errors-1.2.0.tgz",
@@ -2551,31 +2345,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/safe-stable-stringify": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz",
"integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==",
"license": "MIT",
"engines": {
"node": ">=10"
}
},
"node_modules/secure-json-parse": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-4.1.0.tgz",
"integrity": "sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fastify"
},
{
"type": "opencollective",
"url": "https://opencollective.com/fastify"
}
],
"license": "BSD-3-Clause"
},
"node_modules/semver": {
"version": "7.8.1",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.8.1.tgz",
@@ -2632,15 +2401,6 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/sonic-boom": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.1.tgz",
"integrity": "sha512-w6AxtubXa2wTXAUsZMMWERrsIRAdrK0Sc+FUytWvYAhBJLyuI4llrMIC1DtlNSdI99EI86KZum2MMq3EAZlF9Q==",
"license": "MIT",
"dependencies": {
"atomic-sleep": "^1.0.0"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@@ -2651,15 +2411,6 @@
"node": ">=0.10.0"
}
},
"node_modules/split2": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
"integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==",
"license": "ISC",
"engines": {
"node": ">= 10.x"
}
},
"node_modules/sprintf-js": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
@@ -2791,18 +2542,6 @@
"node": ">=8"
}
},
"node_modules/strip-json-comments": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-5.0.3.tgz",
"integrity": "sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==",
"license": "MIT",
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/strip-literal": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz",
@@ -2851,15 +2590,6 @@
"node": ">=18"
}
},
"node_modules/thread-stream": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-3.1.0.tgz",
"integrity": "sha512-OqyPZ9u96VohAyMfJykzmivOrY2wfMSf3C5TtFJVgN+Hm6aj+voFhlK+kZEIv2FBh1X6Xp3DlnCOfEQ3B2J86A==",
"license": "MIT",
"dependencies": {
"real-require": "^0.2.0"
}
},
"node_modules/tinybench": {
"version": "2.9.0",
"resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
@@ -3783,12 +3513,6 @@
"node": ">=8"
}
},
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"license": "ISC"
},
"node_modules/ws": {
"version": "8.21.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.21.0.tgz",

View File

@@ -25,8 +25,6 @@
"js-yaml": "^4.1.0",
"ollama": "^0.5.0",
"openai": "^6.39.0",
"pino": "^9.6.0",
"pino-pretty": "^13.0.0",
"zod": "^3.24.0"
},
"devDependencies": {

View File

@@ -1,27 +0,0 @@
// Run once per deploy (or whenever slash commands change):
// npm run deploy-commands
import { REST, Routes } from 'discord.js';
import 'dotenv/config';
import { config } from '../src/config.js';
import { data as dndnameData } from '../src/bot/commands/dndname.js';
import { data as encounterData } from '../src/bot/commands/encounter.js';
const commands = [dndnameData.toJSON(), encounterData.toJSON()];
const rest = new REST({ version: '10' }).setToken(config.DISCORD_TOKEN);
async function deploy(): Promise<void> {
console.log(`Registering ${commands.length} slash commands globally…`);
await rest.put(Routes.applicationCommands(config.DISCORD_CLIENT_ID), {
body: commands,
});
console.log('Done.');
}
deploy().catch((err) => {
console.error('deploy-commands failed:', err);
process.exit(1);
});

View File

@@ -2,10 +2,12 @@ import { SlashCommandBuilder } from '@discordjs/builders';
import { EmbedBuilder, AttachmentBuilder } from 'discord.js';
import type { ChatInputCommandInteraction, TextChannel } from 'discord.js';
import { buildEncounterListEmbed } from '../embeds/encounterDiscovery.js';
import { readdirSync, writeFileSync } from 'fs';
import { writeFileSync } from 'fs';
import { join } from 'path';
import { load, dump } from 'js-yaml';
import { loadSpec, EncounterSpecSchema } from '../../spec/loader.js';
import { loadSpec, EncounterSpecSchema, listSpecFiles } from '../../spec/loader.js';
import { getAllToolNames } from '../../harness/toolRegistry.js';
import type { EncounterSpec } from '../../types/index.js';
import { sessionManager } from '../../session/sessionManager.js';
import { playerRegistry } from '../../session/playerRegistry.js';
import { config } from '../../config.js';
@@ -123,9 +125,9 @@ function interpolate(text: string, ctx: Record<string, string>): string {
// Apply resolved context to NPC display names and setting location.
// Original spec is not mutated — returns a shallow copy with names replaced.
function applyResolved(
spec: import('../../types/index.js').EncounterSpec,
spec: EncounterSpec,
ctx: Record<string, string>,
): import('../../types/index.js').EncounterSpec {
): EncounterSpec {
const npcs = spec.npcs.map(npc => {
const resolved = npc.nameKey ? ctx[npc.nameKey] : undefined;
return resolved ? { ...npc, name: resolved } : npc;
@@ -162,6 +164,23 @@ async function handleStart(
return;
}
// Fail loud at encounter load if the spec opts into tools that aren't
// registered, instead of letting getActiveTools silently drop them mid-
// session (the LLM would be told the tool exists but the dispatcher would
// reject every call). The specs-tools consistency test guards the same
// invariant for committed specs; this catches generated/ad-hoc specs.
if (spec.tools && spec.tools.length > 0) {
const registered = getAllToolNames();
const unknown = spec.tools.filter(name => !registered.has(name));
if (unknown.length > 0) {
await interaction.editReply(
`Spec **${specName}** references unregistered tools: ${unknown.join(', ')}. ` +
`Registered: ${Array.from(registered).sort().join(', ')}.`,
);
return;
}
}
const channel = interaction.channel as TextChannel;
if (!channel?.isTextBased()) {
await interaction.editReply('Run this command in a text channel.');
@@ -240,9 +259,7 @@ async function handleRandom(
): Promise<void> {
let specs: string[];
try {
specs = readdirSync(config.SPECS_DIR)
.filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))
.map(f => f.replace(/\.ya?ml$/, ''));
specs = listSpecFiles(config.SPECS_DIR);
} catch (err) {
await interaction.reply({ content: `Could not read specs directory: ${String(err)}`, ephemeral: true });
return;
@@ -480,11 +497,7 @@ async function handleGenerate(interaction: ChatInputCommandInteraction): Promise
// Existing slugs so the LLM avoids collisions
const existingSlugs: string[] = [];
try {
existingSlugs.push(
...readdirSync(config.SPECS_DIR)
.filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))
.map(f => f.replace(/\.ya?ml$/, '')),
);
existingSlugs.push(...listSpecFiles(config.SPECS_DIR));
} catch { /* specs dir may not exist yet */ }
const recentSection = recentEncounters.length > 0

View File

@@ -75,9 +75,6 @@ const EnvSchema = z.object({
VTT_USERNAME: z.string().default(''),
VTT_PASSWORD: z.string().default(''),
VTT_WORLD: z.string().default(''),
// ── Logging ──────────────────────────────────────────────────────────────
LOG_LEVEL: z.enum(['trace', 'debug', 'info', 'warn', 'error']).default('info'),
});
export { EnvSchema };

View File

@@ -1,25 +1,7 @@
import { encode } from 'gpt-tokenizer';
import type { SessionState, ChatMessage } from '../types/index.js';
import { CONTEXT_BUDGET } from '../types/index.js';
import { trimHistory } from '../lib/historyTrim.js';
import { buildSystemPrompt } from './promptBuilder.js';
function estimateTokens(text: string): number {
return Math.ceil(encode(text).length * 1.15);
}
function estimateMessages(messages: ChatMessage[]): number {
return messages.reduce((sum, m) => sum + estimateTokens(m.content) + 4, 0);
}
function trimHistory(messages: ChatMessage[]): ChatMessage[] {
const budget = CONTEXT_BUDGET.HISTORY - CONTEXT_BUDGET.SAFETY;
const result = [...messages];
while (estimateMessages(result) > budget && result.length > 6) {
result.splice(0, 2);
}
return result;
}
export function assembleContext(session: SessionState): ChatMessage[] {
const systemPrompt = buildSystemPrompt(session.spec, session.npcMemories, session.resolvedContext, session.players);
const pinned = session.history.filter(m => m.pinned);

37
src/lib/historyTrim.ts Normal file
View File

@@ -0,0 +1,37 @@
import { encode } from 'gpt-tokenizer';
import type { ChatMessage } from '../types/index.js';
import { CONTEXT_BUDGET } from '../types/index.js';
// ---------------------------------------------------------------------------
// Shared chat-history trimmer.
//
// Both `sessionManager.addMessage` and `contextAssembler.assembleContext` need
// the same algorithm: estimate token usage of the sliding (non-pinned) history
// and drop the oldest pairs until the result fits the budget. Extracted here
// so the two call sites can't drift.
// ---------------------------------------------------------------------------
// 15% buffer on top of GPT tokenizer estimate to account for Gemma differences
export function estimateTokens(text: string): number {
return Math.ceil(encode(text).length * 1.15);
}
export function estimateMessages(messages: ChatMessage[]): number {
return messages.reduce((sum, m) => sum + estimateTokens(m.content) + 4, 0);
}
/**
* Trim a sliding (non-pinned) message array down to fit the history budget.
*
* Drops oldest non-pinned pairs (splice(0, 2)) until either the budget is met
* or only the 6-message floor remains. The floor preserves the most recent
* conversational exchange even when messages are unusually long.
*/
export function trimHistory(messages: ChatMessage[]): ChatMessage[] {
const budget = CONTEXT_BUDGET.HISTORY - CONTEXT_BUDGET.SAFETY;
const result = [...messages];
while (estimateMessages(result) > budget && result.length > 6) {
result.splice(0, 2);
}
return result;
}

View File

@@ -1,31 +1,12 @@
import { redis } from '../db/redis.js';
import { config } from '../config.js';
import type { SessionState, ChatMessage } from '../types/index.js';
import { CONTEXT_BUDGET } from '../types/index.js';
import { encode } from 'gpt-tokenizer';
import { trimHistory } from '../lib/historyTrim.js';
const SESSION_TTL = 60 * 60 * config.SESSION_TTL_HOURS;
const sessionKey = (threadId: string) => `session:${threadId}`;
const guildThreadsKey = (guildId: string) => `guild_threads:${guildId}`;
// 15% buffer on top of GPT tokenizer estimate to account for Gemma differences
function estimateTokens(text: string): number {
return Math.ceil(encode(text).length * 1.15);
}
function estimateMessages(messages: ChatMessage[]): number {
return messages.reduce((sum, m) => sum + estimateTokens(m.content) + 4, 0);
}
function trimHistory(messages: ChatMessage[]): ChatMessage[] {
const budget = CONTEXT_BUDGET.HISTORY - CONTEXT_BUDGET.SAFETY;
const result = [...messages];
while (estimateMessages(result) > budget && result.length > 6) {
result.splice(0, 2);
}
return result;
}
export const sessionManager = {
async create(threadId: string, state: SessionState): Promise<void> {
const pipe = redis.pipeline();

View File

@@ -1,11 +1,15 @@
import { readFileSync } from 'fs';
import { readFileSync, readdirSync } from 'fs';
import { join } from 'path';
import { load } from 'js-yaml';
import { z } from 'zod';
import { config } from '../config.js';
// ---------------------------------------------------------------------------
// Zod schema
// Zod schema — also the source of truth for the EncounterSpec type. The
// type is derived via `z.infer` (see the named `export type` block at the
// bottom of this file) and re-exported from src/types/index.ts.
//
// Editing the schema here is the only place to add/remove/rename fields.
// ---------------------------------------------------------------------------
const NpcSchema = z.object({
@@ -47,19 +51,38 @@ export const EncounterSpecSchema = z.object({
category: z.string().optional(),
})).optional(),
dmNotes: z.string().optional(),
// XP awarded to all participants when the encounter resolves.
xpReward: z.number().optional(),
tools: z.array(z.string()).optional(),
tone: z.string().optional(),
});
export type EncounterSpecLoaded = z.infer<typeof EncounterSpecSchema>;
// Named type exports — z.infer gives us structural types, but consumers
// historically import these names from src/types/index.js. Keep the names.
export type NpcPersona = z.infer<typeof NpcSchema>;
export type EncounterGoal = z.infer<typeof GoalSchema>;
export type EncounterSpec = z.infer<typeof EncounterSpecSchema>;
export type EncounterGoals = EncounterSpec['goals'];
export type EncounterSetting = EncounterSpec['setting'];
export type RandomizableItem = NonNullable<EncounterSpec['randomizable']>[number];
// ---------------------------------------------------------------------------
// Loader
// ---------------------------------------------------------------------------
export function loadSpec(specName: string): EncounterSpecLoaded {
export function loadSpec(specName: string): EncounterSpec {
const filePath = join(config.SPECS_DIR, `${specName}.yaml`);
const raw = readFileSync(filePath, 'utf-8');
const parsed = load(raw);
return EncounterSpecSchema.parse(parsed);
}
// List every encounter spec in `dir` (defaults to config.SPECS_DIR) as spec
// names with the file extension stripped. The single source for spec discovery
// — used by /encounter random, /encounter generate, and the specs-tools
// consistency test so the discovery rule can't drift between call sites.
export function listSpecFiles(dir: string = config.SPECS_DIR): string[] {
return readdirSync(dir)
.filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))
.map(f => f.replace(/\.ya?ml$/, ''));
}

View File

@@ -1,4 +1,23 @@
// Shared types used across all layers of the Mardonar Encounter Engine.
//
// EncounterSpec (and its sub-shapes) is *derived* from the Zod schema in
// src/spec/loader.ts so the runtime validator and the static type can never
// drift. If you need to change the shape, edit the schema; the type follows.
// ---------------------------------------------------------------------------
// Re-export the encounter spec types from the schema's source of truth
// ---------------------------------------------------------------------------
import type { EncounterSpec } from '../spec/loader.js';
export type {
NpcPersona,
EncounterGoal,
EncounterGoals,
EncounterSpec,
EncounterSetting,
RandomizableItem,
} from '../spec/loader.js';
// ---------------------------------------------------------------------------
// Players
@@ -10,69 +29,6 @@ export interface Player {
pronouns?: string;
}
// ---------------------------------------------------------------------------
// Encounter Spec
// ---------------------------------------------------------------------------
export interface NpcPersona {
id: string;
name: string;
// If set, the display name for this session is resolved from resolvedContext[nameKey].
// The canonical `name` field remains the graph identity used for memory queries.
nameKey?: string;
role: string;
persona: string;
memoryKey?: string;
}
export interface EncounterGoal {
id: string;
label: string;
}
export interface EncounterGoals {
hidden: boolean;
primary: EncounterGoal[];
secondary: EncounterGoal[];
}
export interface EncounterSetting {
location: string;
mood: string;
ambientNpcs: string;
}
export interface RandomizableItem {
key: string;
query: string;
fallback: string;
// 'vocabulary' samples from lore/vocabulary.yaml using `category` (dot-path, e.g. 'names.dwarf.female').
// Default / absent means 'graphmcp' — semantic search against the knowledge graph.
source?: 'graphmcp' | 'vocabulary';
category?: string;
}
export interface EncounterSpec {
encounterId: string;
title: string;
setting: EncounterSetting;
openingNarrative: string;
npcs: NpcPersona[];
goals: EncounterGoals;
sportsmanshipRules: string[];
skillChecks: Record<string, number | string>;
randomizable?: RandomizableItem[];
dmNotes?: string;
// XP awarded to all participants when the encounter resolves.
xpReward?: number;
// Optional allow-list of tool plugin names active for this encounter.
// Omit to enable all registered tools (default behaviour).
tools?: string[];
// Narration flavor for this encounter (e.g. "grim", "tense", "comedic").
// Drives the system prompt tone block and drop notice string selection.
tone?: string;
}
// ---------------------------------------------------------------------------
// Session State
// ---------------------------------------------------------------------------

View File

@@ -22,13 +22,18 @@ tests/
## Running
```bash
npm test # alias for `npm run test:unit` + runs once (not watch)
npm run test:unit # run all tests in tests/unit
npm run test:int # run all tests in tests/integration
npm test # vitest run — runs ALL tests, including integration (requires live infra)
npm run test:unit # run only tests/unit
npm run test:int # run only tests/integration (requires live infra)
npm run test:coverage # run unit tests with v8 coverage report
npm run test:watch # vitest in watch mode
```
> **CI default:** `.gitea/workflows/test.yml` runs `npm run test:unit` (and the
> coverage report), not `npm test` — so a missing Redis or GraphMCP endpoint
> will not fail CI. Run `npm run test:int` explicitly only when you intend to
> exercise the live-infrastructure path.
## Conventions
### 1. One module per file

View File

@@ -53,8 +53,11 @@ describe('EnvSchema', () => {
expect(c.GRAPHMCP_SCORE_THRESHOLD).toBe(0.9);
});
it('rejects invalid LOG_LEVEL', () => {
expect(() => EnvSchema.parse({ ...base, LOG_LEVEL: 'verbose' })).toThrow();
it('ignores LOG_LEVEL (no longer a config knob — logging is per-call via src/lib/logger.ts)', () => {
// Zod strips unknown keys, so a stale LOG_LEVEL in .env is silently
// ignored rather than crashing startup on an invalid enum value.
const c = EnvSchema.parse({ ...base, LOG_LEVEL: 'verbose' });
expect(c.LOG_LEVEL).toBeUndefined();
});
it('rejects OLLAMA_TEMPERATURE above 2', () => {

View File

@@ -0,0 +1,127 @@
import { describe, it, expect } from 'vitest';
import { trimHistory, estimateTokens, estimateMessages } from '../../src/lib/historyTrim.js';
import { CONTEXT_BUDGET } from '../../src/types/index.js';
import type { ChatMessage } from '../../src/types/index.js';
function makeMessage(role: ChatMessage['role'], content: string, pinned = false): ChatMessage {
return { role, content, pinned, timestamp: Date.now() };
}
describe('estimateTokens', () => {
it('estimates zero for an empty string', () => {
expect(estimateTokens('')).toBe(0);
});
it('applies the 15% Gemma buffer on top of the BPE count', () => {
// "hello" is a single BPE token; 1 * 1.15 = 1.15 → ceil = 2.
expect(estimateTokens('hello')).toBe(2);
});
it('rounds up (never down) for fractional buffers', () => {
// 3 BPE tokens × 1.15 = 3.45 → ceil = 4.
const text = 'the quick brown'; // ~3 BPE tokens
expect(estimateTokens(text)).toBeGreaterThanOrEqual(4);
});
});
describe('estimateMessages', () => {
it('returns zero for an empty array', () => {
expect(estimateMessages([])).toBe(0);
});
it('sums the per-message token estimate plus the 4-token role/separator overhead', () => {
const messages: ChatMessage[] = [
makeMessage('user', 'hello'), // 2 tokens (BPE 1 × 1.15 = 1.15 → 2) + 4 = 6
makeMessage('assistant', 'hi there'), // 3 tokens (BPE 2 × 1.15 = 2.3 → 3) + 4 = 7
];
// 6 + 7 = 13
expect(estimateMessages(messages)).toBe(13);
});
});
describe('trimHistory', () => {
it('returns the input array unchanged when it fits the budget', () => {
const messages = [
makeMessage('user', 'short message'),
makeMessage('assistant', 'short reply'),
];
const result = trimHistory(messages);
expect(result).toHaveLength(2);
});
it('never returns fewer than 6 messages even if all are huge', () => {
// Build messages that each tokenize to >CONTEXT_BUDGET.HISTORY tokens —
// i.e. individually over the whole history budget. The trim function must
// still respect the 6-message floor.
//
// We approximate by constructing strings that, after the 1.15× buffer,
// overflow the budget. BPE compresses repetitive input; we use varied
// natural-language content so each message lands well above the budget.
const budget = CONTEXT_BUDGET.HISTORY - CONTEXT_BUDGET.SAFETY;
// 2× the budget per message — guaranteed to be too big on its own.
const perMessage = 'the quick brown fox jumps over the lazy dog. '.repeat(budget);
const messages: ChatMessage[] = Array.from({ length: 10 }, (_, i) =>
makeMessage(i % 2 === 0 ? 'user' : 'assistant', perMessage),
);
const result = trimHistory(messages);
// Floor: 6 messages even when every message alone overflows the budget.
expect(result.length).toBe(6);
});
it('drops oldest pairs first (FIFO)', () => {
// Force the trim loop to actually drop pairs: each message alone overflows
// the history budget, so the 6-message floor is the binding constraint.
// With 10 messages (5 pairs) the loop drops the oldest 2 pairs down to 6.
// BPE compresses repetitive input, so use varied natural language (same
// trick as the "never fewer than 6" test above).
const budget = CONTEXT_BUDGET.HISTORY - CONTEXT_BUDGET.SAFETY;
const big = 'the quick brown fox jumps over the lazy dog. '.repeat(budget);
const pair = (user: string, assistant: string): ChatMessage[] => [
makeMessage('user', `${user} ${big}`),
makeMessage('assistant', `${assistant} ${big}`),
];
const messages: ChatMessage[] = [
...pair('P0_USER', 'P0_ASSISTANT'), // oldest pair — must be dropped
...pair('P1_USER', 'P1_ASSISTANT'), // must be dropped
...pair('P2_USER', 'P2_ASSISTANT'),
...pair('P3_USER', 'P3_ASSISTANT'),
...pair('P4_USER', 'P4_ASSISTANT'), // newest pair — must survive
];
expect(messages).toHaveLength(10);
const result = trimHistory(messages);
// Floor stops the loop at 6 messages, having dropped the oldest 2 pairs.
expect(result).toHaveLength(6);
// Oldest two pairs are gone — this is the actual FIFO assertion.
expect(result.some(m => m.content.startsWith('P0_'))).toBe(false);
expect(result.some(m => m.content.startsWith('P1_'))).toBe(false);
// Newest pair survives.
expect(result.some(m => m.content.startsWith('P4_USER'))).toBe(true);
expect(result.some(m => m.content.startsWith('P4_ASSISTANT'))).toBe(true);
});
it('does not mutate the input array', () => {
const messages = [
makeMessage('user', 'first'),
makeMessage('assistant', 'second'),
];
const before = messages.length;
trimHistory(messages);
expect(messages).toHaveLength(before);
expect(messages[0].content).toBe('first');
});
it('lands just at or under the budget after trimming', () => {
// Push a long sliding history that overflows the budget, then verify
// the result fits. The "drops oldest non-pinned pairs" test in
// contextAssembler.test.ts already exercises the overflow path end-to-end.
const big = 'the quick brown fox jumps over the lazy dog. '.repeat(500);
const messages: ChatMessage[] = Array.from({ length: 40 }, (_, i) =>
makeMessage(i % 2 === 0 ? 'user' : 'assistant', big),
);
const result = trimHistory(messages);
const budget = CONTEXT_BUDGET.HISTORY - CONTEXT_BUDGET.SAFETY;
expect(estimateMessages(result)).toBeLessThanOrEqual(budget);
});
});

View File

@@ -0,0 +1,102 @@
import { describe, it, expect, beforeAll } from 'vitest';
import { readdirSync, readFileSync } from 'fs';
import { join } from 'path';
import { load as parseYaml } from 'js-yaml';
// Mock config so any module that reads config.SPECS_DIR (directly or
// transitively) resolves to the real specs/ directory.
vi.mock('../../src/config.js', () => ({
config: { SPECS_DIR: './specs' },
}));
// Importing toolDispatcher triggers the side-effect imports in
// src/harness/tools/index.ts which populate the registry. Importing
// toolRegistry alone would NOT do that — the registry is module-local.
import { getAllToolNames } from '../../src/harness/toolRegistry.js';
import '../../src/harness/tools/index.js';
// Discover and parse every .yaml file in specs/ once at setup, so the three
// `it` blocks below iterate a cached snapshot instead of re-reading and
// re-parsing each file per test.
const SPECS_DIR = './specs';
interface SpecFile {
file: string;
raw: Record<string, unknown>;
}
const specFiles: SpecFile[] = [];
beforeAll(() => {
const names: string[] = [];
for (const entry of readdirSync(SPECS_DIR)) {
if (entry.endsWith('.yaml') || entry.endsWith('.yml')) {
names.push(entry);
}
}
// Sanity check — if the spec dir is missing or empty the test suite has no
// signal and silently passes; surface that loudly instead.
if (names.length === 0) {
throw new Error(`No spec YAML files found in ${SPECS_DIR}`);
}
for (const name of names) {
specFiles.push({
file: name,
raw: parseYaml(readFileSync(join(SPECS_DIR, name), 'utf8')) as Record<string, unknown>,
});
}
});
describe('specs/*.yaml tool references', () => {
it('every spec file declares a `tools:` list (so the active set is intentional, not implicit-all)', () => {
const missing = specFiles
.filter(s => !Array.isArray(s.raw.tools))
.map(s => s.file);
expect(missing, `specs without an explicit tools: list: ${missing.join(', ')}`).toEqual([]);
});
it('every name in every spec `tools:` list is registered in the plugin registry', () => {
const registered = getAllToolNames();
const offenders: Array<{ file: string; unknown: string[] }> = [];
for (const { file, raw } of specFiles) {
if (!Array.isArray(raw.tools)) continue; // covered by the previous test
const unknown = (raw.tools as unknown[]).filter(
(name): name is string => typeof name === 'string' && !registered.has(name),
);
if (unknown.length > 0) offenders.push({ file, unknown });
}
expect(
offenders,
`unknown tool names referenced in specs:\n${offenders
.map(o => ` ${o.file}: ${o.unknown.join(', ')}`)
.join('\n')}\nRegistered tools: ${Array.from(registered).sort().join(', ')}`,
).toEqual([]);
});
it('every registered tool is referenced by at least one spec (sanity: the registry is reachable from the default active set)', () => {
// Skipped if a tool is intentionally global-only (currently none are).
// This catches the case where a tool gets registered but no spec opts
// into it, leaving it dead code from a spec's perspective.
const referenced = new Set<string>();
for (const { raw } of specFiles) {
if (Array.isArray(raw.tools)) {
for (const name of raw.tools as unknown[]) {
if (typeof name === 'string') referenced.add(name);
}
}
}
const registered = getAllToolNames();
const unused = Array.from(registered).filter(name => !referenced.has(name));
// If new tools are added that aren't yet referenced by any spec, the
// maintainer can suppress this failure or add a spec — surfacing it
// here is the point. As of 2026-06-19 all 6 registered tools are used.
expect(
unused,
`registered tools never referenced by any spec: ${unused.join(', ')}`,
).toEqual([]);
});
});