Files
zalbot/tests/unit/messageRouterRunLLMTurn.test.ts
Kaysser Kayyali e2c92e854f
Some checks failed
tests / Unit tests (Node 22) (push) Failing after 2m13s
Add unit tests for LLM clients, persona loader, and XP/Foundry rewards
Expands the unit test suite from 320 to 380 tests (+60) and adds a
Gitea Actions CI workflow. Closes all six follow-up recommendations
from the test-architecture validation report.

New tests (tests/unit/):
  - ollamaClient.test.ts          — Ollama SDK wrapper, options passthrough
  - litellmClient.test.ts         — OpenAI SDK wrapper, model fallback
  - personaLoader.test.ts         — Zod validation + cache invalidation
  - foundryReward.test.ts         — Tool plugin: lookup, errors, partial grants
  - xpAwarder.test.ts             — Bulk XP awards + per-player skip reasons
  - redisErrorPath.test.ts        — Singleton error handler does not crash
  - messageRouterRunLLMTurn.test.ts — 18 cases for the runtime heart:
    narrative-only path, tool dispatch, filter correction, retry loop
    guard, missed-skill-check heuristic, typing indicator interval,
    LLM error fallback, archive on resolve.

Coverage (line %):
  - harness/litellmClient.ts      0 → 100
  - harness/ollamaClient.ts       0 → 100
  - harness/tools/foundryReward.ts 0 → 100
  - session/xpAwarder.ts          0 → 100
  - persona/loader.ts             0 → 100
  - db/redis.ts                   0 → 100
  - bot/handlers/messageRouter.ts 0 → 39.86 (runLLMTurn now covered)

Tooling:
  - package.json: + test:coverage, test:watch scripts
  - devDep: @vitest/coverage-v8@^3.1.0
  - tests/README.md: conventions, anti-patterns, template map
  - .gitignore: exclude coverage/
  - .gitea/workflows/test.yml: Node 22, npm cache, tsc --noEmit gate

Documentation (from earlier /bmad-document-project run, now committed):
  - docs/index.md
  - docs/project-overview.md
  - docs/architecture.md
  - docs/deployment-guide.md
  - docs/api-contracts.md
  - docs/data-models.md
  - docs/source-tree-analysis.md
  - docs/component-inventory.md
  - docs/development-guide.md
  - _bmad-output/test-artifacts/automate-validation-report.md

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 05:59:13 +00:00

410 lines
17 KiB
TypeScript

import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
// ── assembled-context mock ───────────────────────────────────────────────────
const { mockAssembleContext } = vi.hoisted(() => ({
mockAssembleContext: vi.fn(),
}));
vi.mock('../../src/harness/contextAssembler.js', () => ({
assembleContext: mockAssembleContext,
}));
// ── LLM client mock ──────────────────────────────────────────────────────────
const { mockCallLLM } = vi.hoisted(() => ({
mockCallLLM: vi.fn(),
}));
vi.mock('../../src/harness/llmClient.js', () => ({
callLLM: mockCallLLM,
}));
// ── dispatchTool mock ────────────────────────────────────────────────────────
const { mockDispatchTool } = vi.hoisted(() => ({
mockDispatchTool: vi.fn(),
}));
vi.mock('../../src/harness/toolDispatcher.js', () => ({
dispatchTool: mockDispatchTool,
}));
// ── sessionManager mock ──────────────────────────────────────────────────────
const { mockAddMessage, mockUpdate, mockGet } = vi.hoisted(() => ({
mockAddMessage: vi.fn(),
mockUpdate: vi.fn(),
mockGet: vi.fn(),
}));
vi.mock('../../src/session/sessionManager.js', () => ({
sessionManager: {
addMessage: mockAddMessage,
update: mockUpdate,
get: mockGet,
},
}));
// ── responseFilter mock ──────────────────────────────────────────────────────
const { mockFilterLLMResponse, mockDetectMissedSkillCheck, mockLogFiltered } = vi.hoisted(() => ({
mockFilterLLMResponse: vi.fn(),
mockDetectMissedSkillCheck: vi.fn(),
mockLogFiltered: vi.fn(),
}));
vi.mock('../../src/bot/handlers/responseFilter.js', () => ({
filterLLMResponse: mockFilterLLMResponse,
detectMissedSkillCheck: mockDetectMissedSkillCheck,
logFiltered: mockLogFiltered,
}));
// ── reaction / burst mocks ───────────────────────────────────────────────────
vi.mock('../../src/bot/handlers/reactionManager.js', () => ({
registerScheduled: vi.fn(),
drainPending: vi.fn(() => []),
clearPending: vi.fn(),
upgradeToProcessing: vi.fn(),
upgradeToComplete: vi.fn(),
cleanupReactions: vi.fn(),
}));
vi.mock('../../src/bot/handlers/queueCap.js', () => ({
isBurstCapped: vi.fn(() => false),
incrementBurst: vi.fn(),
resetBurst: vi.fn(),
sendDropNotice: vi.fn(),
}));
vi.mock('../../src/lib/logger.js', () => ({
log: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
}));
// ── subject under test ───────────────────────────────────────────────────────
// Import AFTER all vi.mock calls. We re-import per-test where needed so we can
// attach vi.spyOn() to the module's scheduleEncounterLLMTurn export.
let runLLMTurn: typeof import('../../src/bot/handlers/messageRouter.js').runLLMTurn;
let scheduleSpy: ReturnType<typeof vi.spyOn>;
import { mockSession } from '../fixtures/spec.js';
import type { SessionState } from '../../src/types/index.js';
function makeThread(extra: Partial<{ setArchived: any }> = {}) {
const thread: any = {
send: vi.fn().mockResolvedValue({ id: 'sent-msg' }),
sendTyping: vi.fn().mockResolvedValue(undefined),
setArchived: extra.setArchived ?? vi.fn().mockResolvedValue(undefined),
messages: { fetch: vi.fn().mockResolvedValue(null) },
};
return thread;
}
function sessionWith(history: SessionState['history'], pending?: SessionState['pendingSkillCheck']): SessionState {
return { ...mockSession, history, pendingSkillCheck: pending };
}
beforeEach(async () => {
vi.clearAllMocks();
vi.useFakeTimers();
// Re-import the module under test each time so we can spy on its
// scheduleEncounterLLMTurn export. The mocks are reused across imports.
const mod = await import('../../src/bot/handlers/messageRouter.js');
runLLMTurn = mod.runLLMTurn;
scheduleSpy = vi.spyOn(mod, 'scheduleEncounterLLMTurn').mockImplementation(() => undefined);
// Always default: context assembles to something, filter accepts everything.
mockAssembleContext.mockReturnValue([{ role: 'system', content: 'sys', timestamp: 0 }]);
mockFilterLLMResponse.mockReturnValue({ ok: true });
mockDetectMissedSkillCheck.mockReturnValue(false);
mockAddMessage.mockResolvedValue(undefined);
mockUpdate.mockResolvedValue(undefined);
mockGet.mockImplementation(async (threadId: string) => ({ ...mockSession, threadId }));
mockDispatchTool.mockResolvedValue({ systemMessage: '[TOOL] done', resolved: undefined, error: undefined });
});
afterEach(() => {
vi.useRealTimers();
});
describe('runLLMTurn — narrative-only response (no tool call)', () => {
it('posts the narrative to the thread', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: 'The wind howls.', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(thread.send).toHaveBeenCalledWith('The wind howls.');
});
it('stores the assistant narrative in session history', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: 'A leaf falls.', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockAddMessage).toHaveBeenCalledWith(
mockSession.threadId,
expect.objectContaining({ role: 'assistant', content: 'A leaf falls.' }),
);
});
it('does not call dispatchTool when there is no tool call', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: 'quiet.', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockDispatchTool).not.toHaveBeenCalled();
});
it('passes skipRollClaim:true when a [SKILL CHECK RESULT] message is in the recent 6 messages', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: 'You rolled a 15 and hit the goblin.', toolCall: undefined });
const thread = makeThread();
const history: SessionState['history'] = [
{ role: 'system', content: '[SKILL CHECK RESULT] Aelindra rolled 15 vs DC 12. Result: SUCCESS.', timestamp: 1 },
];
await runLLMTurn(sessionWith(history), thread, {} as any);
expect(mockFilterLLMResponse).toHaveBeenCalledWith(
'You rolled a 15 and hit the goblin.',
{ skipRollClaim: true },
);
});
it('passes skipRollClaim:false when no recent [SKILL CHECK RESULT] message exists', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: '...', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockFilterLLMResponse).toHaveBeenCalledWith('...', { skipRollClaim: false });
});
});
describe('runLLMTurn — filter correction', () => {
it('on filter rejection with no recent correction, sends a [FILTER CORRECTION] system message', async () => {
mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'fabricated_roll_result' });
mockCallLLM.mockResolvedValueOnce({ narrative: 'You rolled a 17.', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockLogFiltered).toHaveBeenCalledWith(
'fabricated_roll_result',
'You rolled a 17.',
expect.objectContaining({ threadId: mockSession.threadId, encounterId: mockSession.encounterId }),
);
expect(mockAddMessage).toHaveBeenCalledWith(
mockSession.threadId,
expect.objectContaining({
role: 'system',
content: expect.stringMatching(/^\[FILTER CORRECTION\]/),
}),
);
// The retry path also invokes scheduleEncounterLLMTurn with immediate=true.
// (We can't reliably observe the internal call via the export spy in ESM
// live-bindings, so we verify the side effects directly.)
const correction = mockAddMessage.mock.calls.find(([_, m]) =>
(m as { content: string }).content.startsWith('[FILTER CORRECTION]'),
)?.[1] as { content: string };
expect(correction.content).toMatch(/Do NOT state or imply a specific dice result/);
});
it('on filter rejection when last message is already a correction, skips the retry to avoid loops', async () => {
mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'empty_response' });
mockCallLLM.mockResolvedValueOnce({ narrative: '', toolCall: undefined });
const thread = makeThread();
const history: SessionState['history'] = [
{ role: 'system', content: '[FILTER CORRECTION] previous turn suppressed (empty_response).', timestamp: 1 },
];
await runLLMTurn(sessionWith(history), thread, {} as any);
// No new correction message should be added when one was just sent.
const correctionAdds = mockAddMessage.mock.calls.filter(([_, m]) =>
(m as { content: string }).content.startsWith('[FILTER CORRECTION]'),
);
expect(correctionAdds).toHaveLength(0);
});
it('uses the echoed_system_tag correction text when filter rejects for that reason', async () => {
mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'echoed_system_tag' });
mockCallLLM.mockResolvedValueOnce({ narrative: '[TOOL] something', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
const correction = mockAddMessage.mock.calls.find(([_, m]) =>
(m as { content: string }).content.startsWith('[FILTER CORRECTION]'),
)?.[1] as { content: string };
expect(correction.content).toMatch(/Do NOT echo internal system tags/);
});
it('does NOT post the filtered narrative to the thread', async () => {
mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'fabricated_roll_result' });
mockCallLLM.mockResolvedValueOnce({ narrative: 'You rolled a 17.', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(thread.send).not.toHaveBeenCalledWith('You rolled a 17.');
});
});
describe('runLLMTurn — tool call dispatch', () => {
it('dispatches the toolCall with a freshly fetched session and writes the system message', async () => {
mockCallLLM.mockResolvedValueOnce({
narrative: '',
toolCall: { tool: 'goal_register', args: { goals: ['x'] } },
});
const freshSession = { ...mockSession, fetched: true };
mockGet.mockResolvedValueOnce(freshSession);
mockDispatchTool.mockResolvedValueOnce({ systemMessage: '[TOOL] ok', error: undefined, resolved: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockGet).toHaveBeenCalledWith(mockSession.threadId);
expect(mockDispatchTool).toHaveBeenCalledWith(
{ tool: 'goal_register', args: { goals: ['x'] } },
expect.objectContaining({ session: freshSession, thread }),
);
expect(mockAddMessage).toHaveBeenCalledWith(
mockSession.threadId,
expect.objectContaining({ role: 'system', content: '[TOOL] ok' }),
);
});
it('posts a friendly fallback message when dispatchTool returns an error', async () => {
mockCallLLM.mockResolvedValueOnce({
narrative: '',
toolCall: { tool: 'goal_register', args: {} },
});
mockDispatchTool.mockResolvedValueOnce({ systemMessage: '[TOOL] failed', error: new Error('boom'), resolved: undefined });
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(thread.send).toHaveBeenCalledWith(expect.stringMatching(/narrator stumbles/));
});
it('marks the session resolved and schedules archive when tool reports resolved', async () => {
mockCallLLM.mockResolvedValueOnce({
narrative: '',
toolCall: { tool: 'encounter_resolve', args: { outcomeId: 'catch', summary: 'got him' } },
});
mockDispatchTool.mockResolvedValueOnce({
systemMessage: '[TOOL] resolved',
resolved: { outcomeId: 'catch', summary: 'got him' },
error: undefined,
});
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockUpdate).toHaveBeenCalledWith(mockSession.threadId, {
phase: 'resolved',
outcome: 'catch',
outcomeSummary: 'got him',
});
// The archive setTimeout fires after 5 seconds.
expect(thread.setArchived).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(5_000);
expect(thread.setArchived).toHaveBeenCalledWith(true);
});
it('does not throw and returns early when the session was deleted before dispatch', async () => {
mockCallLLM.mockResolvedValueOnce({
narrative: '',
toolCall: { tool: 'goal_register', args: {} },
});
mockGet.mockResolvedValueOnce(null); // session disappeared
const thread = makeThread();
await expect(runLLMTurn(sessionWith([]), thread, {} as any)).resolves.toBeUndefined();
expect(mockDispatchTool).not.toHaveBeenCalled();
});
it('still dispatches the tool even when the narrative was filtered', async () => {
mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'fabricated_roll_result' });
mockCallLLM.mockResolvedValueOnce({
narrative: 'You rolled a 12. ',
toolCall: { tool: 'goal_register', args: { foo: 'bar' } },
});
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockDispatchTool).toHaveBeenCalled();
// But the narrative was suppressed.
expect(thread.send).not.toHaveBeenCalledWith('You rolled a 12.');
});
});
describe('runLLMTurn — LLM error', () => {
it('posts a friendly error message when the LLM throws and clears the typing interval', async () => {
mockCallLLM.mockRejectedValueOnce(new Error('503 from upstream'));
const thread = makeThread();
const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(consoleSpy).toHaveBeenCalledWith('[messageRouter] LLM call failed:', expect.any(Error));
expect(thread.send).toHaveBeenCalledWith(expect.stringMatching(/narrator pauses/));
// The interval would normally fire every 8s — advance to confirm it's gone.
await vi.advanceTimersByTimeAsync(20_000);
expect(thread.sendTyping).toHaveBeenCalled();
// No filter or dispatch should have happened.
expect(mockFilterLLMResponse).not.toHaveBeenCalled();
expect(mockDispatchTool).not.toHaveBeenCalled();
consoleSpy.mockRestore();
});
});
describe('runLLMTurn — missed skill check heuristic', () => {
it('logs a warning when the narrative asks for a roll but no tool call was emitted and no roll is pending', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: 'Make a Strength check.', toolCall: undefined });
mockDetectMissedSkillCheck.mockReturnValueOnce(true);
const thread = makeThread();
await runLLMTurn(sessionWith([]), thread, {} as any);
expect(mockDetectMissedSkillCheck).toHaveBeenCalledWith('Make a Strength check.');
});
it('skips the heuristic when a roll result is already pending', async () => {
mockCallLLM.mockResolvedValueOnce({ narrative: 'Make a check.', toolCall: undefined });
const thread = makeThread();
await runLLMTurn(
sessionWith([], { player: 'Aelindra', dc: 12, messageId: 'm1' }),
thread,
{} as any,
);
expect(mockDetectMissedSkillCheck).not.toHaveBeenCalled();
});
});
describe('runLLMTurn — typing indicator', () => {
it('starts a typing indicator that fires every 8s while the LLM is being awaited', async () => {
// Make callLLM slow so we can observe the interval
let resolveCall!: (v: unknown) => void;
mockCallLLM.mockReturnValueOnce(new Promise(r => { resolveCall = r; }));
const thread = makeThread();
const pending = runLLMTurn(sessionWith([]), thread, {} as any);
expect(thread.sendTyping).toHaveBeenCalledTimes(1);
await vi.advanceTimersByTimeAsync(8_000);
expect(thread.sendTyping).toHaveBeenCalledTimes(2);
resolveCall({ narrative: 'ok', toolCall: undefined });
await pending;
// After resolution the interval is cleared; advancing further should not send typing again.
const callsBefore = thread.sendTyping.mock.calls.length;
await vi.advanceTimersByTimeAsync(20_000);
expect(thread.sendTyping.mock.calls.length).toBe(callsBefore);
});
});