zalbot/tests/unit/litellmClient.test.ts

import { vi, describe, it, expect, beforeEach } from 'vitest';

// ── config mock ──────────────────────────────────────────────────────────────
vi.mock('../../src/config.js', () => ({
  config: {
    LITELLM_BASE_URL: 'http://100.83.8.74:4000',
    LITELLM_API_KEY: 'test-key',
    LITELLM_MODEL: 'ollama-cloud',
    OLLAMA_TEMPERATURE: 0.75,
    OLLAMA_TIMEOUT_MS: 120_000,
    OLLAMA_MODEL: 'gemma4-it:e2b',
  },
}));

vi.mock('../../src/lib/logger.js', () => ({
  log: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
}));

// ── openai client mock ────────────────────────────────────────────────────────
const { mockCreate } = vi.hoisted(() => ({
  mockCreate: vi.fn(),
}));

vi.mock('openai', () => ({
  default: vi.fn().mockImplementation(() => ({
    chat: { completions: { create: mockCreate } },
  })),
}));

import { callLLM } from '../../src/harness/litellmClient.js';

beforeEach(() => {
  vi.clearAllMocks();
  // Reset LITELLM_MODEL in case a previous test mutated it.
  return import('../../src/config.js').then(({ config }) => {
    (config as Record<string, unknown>).LITELLM_MODEL = 'ollama-cloud';
  });
});

describe('litellmClient.callLLM', () => {
  it('returns parsed narrative and tool call from the OpenAI-compatible response', async () => {
    mockCreate.mockResolvedValueOnce({
      choices: [
        {
          message: {
            content: 'Roll for initiative. ```tool_call\n{"tool":"encounter_resolve","args":{"sessionId":"s1","outcomeId":"catch","summary":"Caught him"}}\n```',
          },
        },
      ],
      usage: { completion_tokens: 88, prompt_tokens: 4000 },
    });

    const result = await callLLM([{ role: 'user', content: 'I tackle him.', timestamp: 1 }]);

    expect(result.narrative).toBe('Roll for initiative.');
    expect(result.toolCall?.tool).toBe('encounter_resolve');
    expect(result.toolCall?.args).toEqual({ sessionId: 's1', outcomeId: 'catch', summary: 'Caught him' });
    expect(result.rawTokensUsed).toBe(88);
  });

  it('configures the OpenAI client with the LiteLLM base URL + API key + timeout', async () => {
    // Force a fresh litellmClient so its cached _client is re-constructed with
    // the current config values.
    vi.resetModules();
    const OpenAI = (await import('openai')).default;
    const { callLLM: freshCallLLM } = await import('../../src/harness/litellmClient.js');
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: 'ok' } }] });

    await freshCallLLM([{ role: 'user', content: 'hi', timestamp: 1 }]);

    expect(OpenAI).toHaveBeenCalledWith({
      baseURL: 'http://100.83.8.74:4000/v1',
      apiKey: 'test-key',
      timeout: 120_000,
    });
  });

  it('falls back to the literal string "no-key" when LITELLM_API_KEY is empty', async () => {
    const { config } = await import('../../src/config.js');
    (config as Record<string, unknown>).LITELLM_API_KEY = '';
    vi.resetModules();
    const OpenAI = (await import('openai')).default;
    const { callLLM: freshCallLLM } = await import('../../src/harness/litellmClient.js');
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: 'ok' } }] });

    await freshCallLLM([{ role: 'user', content: 'hi', timestamp: 1 }]);

    expect(OpenAI).toHaveBeenCalledWith(
      expect.objectContaining({ apiKey: 'no-key' }),
    );
  });

  it('uses LITELLM_MODEL when set, otherwise falls back to OLLAMA_MODEL', async () => {
    const { config } = await import('../../src/config.js');

    (config as Record<string, unknown>).LITELLM_MODEL = 'big-model';
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: 'ok' } }] });
    await callLLM([{ role: 'user', content: 'a', timestamp: 1 }]);
    expect(mockCreate).toHaveBeenLastCalledWith(
      expect.objectContaining({ model: 'big-model' }),
    );

    (config as Record<string, unknown>).LITELLM_MODEL = undefined;
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: 'ok' } }] });
    await callLLM([{ role: 'user', content: 'b', timestamp: 2 }]);
    expect(mockCreate).toHaveBeenLastCalledWith(
      expect.objectContaining({ model: 'gemma4-it:e2b' }),
    );
  });

  it('passes messages and temperature through to the OpenAI client', async () => {
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: 'ok' } }] });

    await callLLM([
      { role: 'system', content: 'sys', timestamp: 0 },
      { role: 'user', content: 'hi', timestamp: 1 },
    ]);

    expect(mockCreate).toHaveBeenCalledWith({
      model: 'ollama-cloud',
      messages: [
        { role: 'system', content: 'sys' },
        { role: 'user', content: 'hi' },
      ],
      temperature: 0.75,
    });
  });

  it('returns an empty narrative when the model response is empty', async () => {
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: '' } }] });

    const result = await callLLM([{ role: 'user', content: '...', timestamp: 1 }]);

    expect(result.narrative).toBe('');
    expect(result.toolCall).toBeUndefined();
  });

  it('falls back to an empty string when the response has no choices at all', async () => {
    mockCreate.mockResolvedValueOnce({ choices: [] });

    const result = await callLLM([{ role: 'user', content: '...', timestamp: 1 }]);

    expect(result.narrative).toBe('');
    expect(result.toolCall).toBeUndefined();
  });

  it('handles a missing usage field without crashing', async () => {
    mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: 'ok' } }] });

    const result = await callLLM([{ role: 'user', content: '...', timestamp: 1 }]);

    expect(result.rawTokensUsed).toBeUndefined();
  });

  it('propagates errors from the OpenAI client', async () => {
    mockCreate.mockRejectedValueOnce(new Error('rate limit exceeded'));

    await expect(
      callLLM([{ role: 'user', content: 'hi', timestamp: 1 }]),
    ).rejects.toThrow('rate limit exceeded');
  });
});