zalbot/tests/unit/messageRouterRunLLMTurn.test.ts

import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';

// ── assembled-context mock ───────────────────────────────────────────────────
const { mockAssembleContext } = vi.hoisted(() => ({
  mockAssembleContext: vi.fn(),
}));

vi.mock('../../src/harness/contextAssembler.js', () => ({
  assembleContext: mockAssembleContext,
}));

// ── LLM client mock ──────────────────────────────────────────────────────────
const { mockCallLLM } = vi.hoisted(() => ({
  mockCallLLM: vi.fn(),
}));

vi.mock('../../src/harness/llmClient.js', () => ({
  callLLM: mockCallLLM,
}));

// ── dispatchTool mock ────────────────────────────────────────────────────────
const { mockDispatchTool } = vi.hoisted(() => ({
  mockDispatchTool: vi.fn(),
}));

vi.mock('../../src/harness/toolDispatcher.js', () => ({
  dispatchTool: mockDispatchTool,
}));

// ── sessionManager mock ──────────────────────────────────────────────────────
const { mockAddMessage, mockUpdate, mockGet } = vi.hoisted(() => ({
  mockAddMessage: vi.fn(),
  mockUpdate: vi.fn(),
  mockGet: vi.fn(),
}));

vi.mock('../../src/session/sessionManager.js', () => ({
  sessionManager: {
    addMessage: mockAddMessage,
    update: mockUpdate,
    get: mockGet,
  },
}));

// ── responseFilter mock ──────────────────────────────────────────────────────
const { mockFilterLLMResponse, mockDetectMissedSkillCheck, mockLogFiltered } = vi.hoisted(() => ({
  mockFilterLLMResponse: vi.fn(),
  mockDetectMissedSkillCheck: vi.fn(),
  mockLogFiltered: vi.fn(),
}));

vi.mock('../../src/bot/handlers/responseFilter.js', () => ({
  filterLLMResponse: mockFilterLLMResponse,
  detectMissedSkillCheck: mockDetectMissedSkillCheck,
  logFiltered: mockLogFiltered,
}));

// ── reaction / burst mocks ───────────────────────────────────────────────────
vi.mock('../../src/bot/handlers/reactionManager.js', () => ({
  registerScheduled: vi.fn(),
  drainPending: vi.fn(() => []),
  clearPending: vi.fn(),
  upgradeToProcessing: vi.fn(),
  upgradeToComplete: vi.fn(),
  cleanupReactions: vi.fn(),
}));

vi.mock('../../src/bot/handlers/queueCap.js', () => ({
  isBurstCapped: vi.fn(() => false),
  incrementBurst: vi.fn(),
  resetBurst: vi.fn(),
  sendDropNotice: vi.fn(),
}));

vi.mock('../../src/lib/logger.js', () => ({
  log: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
}));

// ── subject under test ───────────────────────────────────────────────────────
// Import AFTER all vi.mock calls. We re-import per-test where needed so we can
// attach vi.spyOn() to the module's scheduleEncounterLLMTurn export.
let runLLMTurn: typeof import('../../src/bot/handlers/messageRouter.js').runLLMTurn;
let scheduleSpy: ReturnType<typeof vi.spyOn>;
import { mockSession } from '../fixtures/spec.js';
import type { SessionState } from '../../src/types/index.js';

function makeThread(extra: Partial<{ setArchived: any }> = {}) {
  const thread: any = {
    send: vi.fn().mockResolvedValue({ id: 'sent-msg' }),
    sendTyping: vi.fn().mockResolvedValue(undefined),
    setArchived: extra.setArchived ?? vi.fn().mockResolvedValue(undefined),
    messages: { fetch: vi.fn().mockResolvedValue(null) },
  };
  return thread;
}

function sessionWith(history: SessionState['history'], pending?: SessionState['pendingSkillCheck']): SessionState {
  return { ...mockSession, history, pendingSkillCheck: pending };
}

beforeEach(async () => {
  vi.clearAllMocks();
  vi.useFakeTimers();
  // Re-import the module under test each time so we can spy on its
  // scheduleEncounterLLMTurn export. The mocks are reused across imports.
  const mod = await import('../../src/bot/handlers/messageRouter.js');
  runLLMTurn = mod.runLLMTurn;
  scheduleSpy = vi.spyOn(mod, 'scheduleEncounterLLMTurn').mockImplementation(() => undefined);

  // Always default: context assembles to something, filter accepts everything.
  mockAssembleContext.mockReturnValue([{ role: 'system', content: 'sys', timestamp: 0 }]);
  mockFilterLLMResponse.mockReturnValue({ ok: true });
  mockDetectMissedSkillCheck.mockReturnValue(false);
  mockAddMessage.mockResolvedValue(undefined);
  mockUpdate.mockResolvedValue(undefined);
  mockGet.mockImplementation(async (threadId: string) => ({ ...mockSession, threadId }));
  mockDispatchTool.mockResolvedValue({ systemMessage: '[TOOL] done', resolved: undefined, error: undefined });
});

afterEach(() => {
  vi.useRealTimers();
});

describe('runLLMTurn — narrative-only response (no tool call)', () => {
  it('posts the narrative to the thread', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: 'The wind howls.', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(thread.send).toHaveBeenCalledWith('The wind howls.');
  });

  it('stores the assistant narrative in session history', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: 'A leaf falls.', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockAddMessage).toHaveBeenCalledWith(
      mockSession.threadId,
      expect.objectContaining({ role: 'assistant', content: 'A leaf falls.' }),
    );
  });

  it('does not call dispatchTool when there is no tool call', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: 'quiet.', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockDispatchTool).not.toHaveBeenCalled();
  });

  it('passes skipRollClaim:true when a [SKILL CHECK RESULT] message is in the recent 6 messages', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: 'You rolled a 15 and hit the goblin.', toolCall: undefined });
    const thread = makeThread();

    const history: SessionState['history'] = [
      { role: 'system', content: '[SKILL CHECK RESULT] Aelindra rolled 15 vs DC 12. Result: SUCCESS.', timestamp: 1 },
    ];
    await runLLMTurn(sessionWith(history), thread, {} as any);

    expect(mockFilterLLMResponse).toHaveBeenCalledWith(
      'You rolled a 15 and hit the goblin.',
      { skipRollClaim: true },
    );
  });

  it('passes skipRollClaim:false when no recent [SKILL CHECK RESULT] message exists', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: '...', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockFilterLLMResponse).toHaveBeenCalledWith('...', { skipRollClaim: false });
  });
});

describe('runLLMTurn — filter correction', () => {
  it('on filter rejection with no recent correction, sends a [FILTER CORRECTION] system message', async () => {
    mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'fabricated_roll_result' });
    mockCallLLM.mockResolvedValueOnce({ narrative: 'You rolled a 17.', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockLogFiltered).toHaveBeenCalledWith(
      'fabricated_roll_result',
      'You rolled a 17.',
      expect.objectContaining({ threadId: mockSession.threadId, encounterId: mockSession.encounterId }),
    );
    expect(mockAddMessage).toHaveBeenCalledWith(
      mockSession.threadId,
      expect.objectContaining({
        role: 'system',
        content: expect.stringMatching(/^\[FILTER CORRECTION\]/),
      }),
    );
    // The retry path also invokes scheduleEncounterLLMTurn with immediate=true.
    // (We can't reliably observe the internal call via the export spy in ESM
    // live-bindings, so we verify the side effects directly.)
    const correction = mockAddMessage.mock.calls.find(([_, m]) =>
      (m as { content: string }).content.startsWith('[FILTER CORRECTION]'),
    )?.[1] as { content: string };
    expect(correction.content).toMatch(/Do NOT state or imply a specific dice result/);
  });

  it('on filter rejection when last message is already a correction, skips the retry to avoid loops', async () => {
    mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'empty_response' });
    mockCallLLM.mockResolvedValueOnce({ narrative: '', toolCall: undefined });
    const thread = makeThread();

    const history: SessionState['history'] = [
      { role: 'system', content: '[FILTER CORRECTION] previous turn suppressed (empty_response).', timestamp: 1 },
    ];

    await runLLMTurn(sessionWith(history), thread, {} as any);

    // No new correction message should be added when one was just sent.
    const correctionAdds = mockAddMessage.mock.calls.filter(([_, m]) =>
      (m as { content: string }).content.startsWith('[FILTER CORRECTION]'),
    );
    expect(correctionAdds).toHaveLength(0);
  });

  it('uses the echoed_system_tag correction text when filter rejects for that reason', async () => {
    mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'echoed_system_tag' });
    mockCallLLM.mockResolvedValueOnce({ narrative: '[TOOL] something', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    const correction = mockAddMessage.mock.calls.find(([_, m]) =>
      (m as { content: string }).content.startsWith('[FILTER CORRECTION]'),
    )?.[1] as { content: string };
    expect(correction.content).toMatch(/Do NOT echo internal system tags/);
  });

  it('does NOT post the filtered narrative to the thread', async () => {
    mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'fabricated_roll_result' });
    mockCallLLM.mockResolvedValueOnce({ narrative: 'You rolled a 17.', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(thread.send).not.toHaveBeenCalledWith('You rolled a 17.');
  });
});

describe('runLLMTurn — tool call dispatch', () => {
  it('dispatches the toolCall with a freshly fetched session and writes the system message', async () => {
    mockCallLLM.mockResolvedValueOnce({
      narrative: '',
      toolCall: { tool: 'goal_register', args: { goals: ['x'] } },
    });
    const freshSession = { ...mockSession, fetched: true };
    mockGet.mockResolvedValueOnce(freshSession);
    mockDispatchTool.mockResolvedValueOnce({ systemMessage: '[TOOL] ok', error: undefined, resolved: undefined });

    const thread = makeThread();
    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockGet).toHaveBeenCalledWith(mockSession.threadId);
    expect(mockDispatchTool).toHaveBeenCalledWith(
      { tool: 'goal_register', args: { goals: ['x'] } },
      expect.objectContaining({ session: freshSession, thread }),
    );
    expect(mockAddMessage).toHaveBeenCalledWith(
      mockSession.threadId,
      expect.objectContaining({ role: 'system', content: '[TOOL] ok' }),
    );
  });

  it('posts a friendly fallback message when dispatchTool returns an error', async () => {
    mockCallLLM.mockResolvedValueOnce({
      narrative: '',
      toolCall: { tool: 'goal_register', args: {} },
    });
    mockDispatchTool.mockResolvedValueOnce({ systemMessage: '[TOOL] failed', error: new Error('boom'), resolved: undefined });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(thread.send).toHaveBeenCalledWith(expect.stringMatching(/narrator stumbles/));
  });

  it('marks the session resolved and schedules archive when tool reports resolved', async () => {
    mockCallLLM.mockResolvedValueOnce({
      narrative: '',
      toolCall: { tool: 'encounter_resolve', args: { outcomeId: 'catch', summary: 'got him' } },
    });
    mockDispatchTool.mockResolvedValueOnce({
      systemMessage: '[TOOL] resolved',
      resolved: { outcomeId: 'catch', summary: 'got him' },
      error: undefined,
    });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockUpdate).toHaveBeenCalledWith(mockSession.threadId, {
      phase: 'resolved',
      outcome: 'catch',
      outcomeSummary: 'got him',
    });

    // The archive setTimeout fires after 5 seconds.
    expect(thread.setArchived).not.toHaveBeenCalled();
    await vi.advanceTimersByTimeAsync(5_000);
    expect(thread.setArchived).toHaveBeenCalledWith(true);
  });

  it('does not throw and returns early when the session was deleted before dispatch', async () => {
    mockCallLLM.mockResolvedValueOnce({
      narrative: '',
      toolCall: { tool: 'goal_register', args: {} },
    });
    mockGet.mockResolvedValueOnce(null); // session disappeared
    const thread = makeThread();

    await expect(runLLMTurn(sessionWith([]), thread, {} as any)).resolves.toBeUndefined();
    expect(mockDispatchTool).not.toHaveBeenCalled();
  });

  it('still dispatches the tool even when the narrative was filtered', async () => {
    mockFilterLLMResponse.mockReturnValue({ ok: false, reason: 'fabricated_roll_result' });
    mockCallLLM.mockResolvedValueOnce({
      narrative: 'You rolled a 12. ',
      toolCall: { tool: 'goal_register', args: { foo: 'bar' } },
    });
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockDispatchTool).toHaveBeenCalled();
    // But the narrative was suppressed.
    expect(thread.send).not.toHaveBeenCalledWith('You rolled a 12.');
  });
});

describe('runLLMTurn — LLM error', () => {
  it('posts a friendly error message when the LLM throws and clears the typing interval', async () => {
    mockCallLLM.mockRejectedValueOnce(new Error('503 from upstream'));
    const thread = makeThread();
    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(consoleSpy).toHaveBeenCalledWith('[messageRouter] LLM call failed:', expect.any(Error));
    expect(thread.send).toHaveBeenCalledWith(expect.stringMatching(/narrator pauses/));
    // The interval would normally fire every 8s — advance to confirm it's gone.
    await vi.advanceTimersByTimeAsync(20_000);
    expect(thread.sendTyping).toHaveBeenCalled();
    // No filter or dispatch should have happened.
    expect(mockFilterLLMResponse).not.toHaveBeenCalled();
    expect(mockDispatchTool).not.toHaveBeenCalled();

    consoleSpy.mockRestore();
  });
});

describe('runLLMTurn — missed skill check heuristic', () => {
  it('logs a warning when the narrative asks for a roll but no tool call was emitted and no roll is pending', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: 'Make a Strength check.', toolCall: undefined });
    mockDetectMissedSkillCheck.mockReturnValueOnce(true);
    const thread = makeThread();

    await runLLMTurn(sessionWith([]), thread, {} as any);

    expect(mockDetectMissedSkillCheck).toHaveBeenCalledWith('Make a Strength check.');
  });

  it('skips the heuristic when a roll result is already pending', async () => {
    mockCallLLM.mockResolvedValueOnce({ narrative: 'Make a check.', toolCall: undefined });
    const thread = makeThread();

    await runLLMTurn(
      sessionWith([], { player: 'Aelindra', dc: 12, messageId: 'm1' }),
      thread,
      {} as any,
    );

    expect(mockDetectMissedSkillCheck).not.toHaveBeenCalled();
  });
});

describe('runLLMTurn — typing indicator', () => {
  it('starts a typing indicator that fires every 8s while the LLM is being awaited', async () => {
    // Make callLLM slow so we can observe the interval
    let resolveCall!: (v: unknown) => void;
    mockCallLLM.mockReturnValueOnce(new Promise(r => { resolveCall = r; }));
    const thread = makeThread();

    const pending = runLLMTurn(sessionWith([]), thread, {} as any);
    expect(thread.sendTyping).toHaveBeenCalledTimes(1);

    await vi.advanceTimersByTimeAsync(8_000);
    expect(thread.sendTyping).toHaveBeenCalledTimes(2);

    resolveCall({ narrative: 'ok', toolCall: undefined });
    await pending;

    // After resolution the interval is cleared; advancing further should not send typing again.
    const callsBefore = thread.sendTyping.mock.calls.length;
    await vi.advanceTimersByTimeAsync(20_000);
    expect(thread.sendTyping.mock.calls.length).toBe(callsBefore);
  });
});