test(e2e): multiplayer live test drives the real embed UI, not just Redis

Uplift the multiplayer live test to verify the actual UI (embeds), per review
feedback that Redis-only assertions aren't thorough enough.

- fakeButton: optional messageId param — when set, update() EDITS THE REAL
  message so the embed reflects the click (drives the UI). Back-compat for
  2-arg callers (captured update).
- Correct lobby semantics: player1 is the STARTER (pre-joined per /encounter
  start), player2 joins → roster = both bots. (Starter pre-joined; my earlier
  draft had the join counts wrong.)
- AC-8 asserts on the REAL lobby embed: Seats field ('1 / 2 minimum' →
  'min 2 met') + Begin button disabled/enabled read from the fetched components.
- AC-9 asserts the 👀 reaction on the player's real message (proves handleMessage
  routed it, not skipped) + history growth.
- AC-10 asserts on the REAL scoreboard embed: Rolled field shows both players'
  rolls, final footer (prevails/falters), buttons removed on finalize; outcome
  consistent with the [GROUP CHECK RESULT] system message.

Verified: tsc --noEmit clean; 535 unit tests pass (fakeButton back-compat);
CI-safe skip confirmed (gate off → 3 skipped, clean).

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kaysser Kayyali
2026-06-22 20:39:33 +00:00
parent fcea0a30bc
commit 2dda9b4847
2 changed files with 153 additions and 75 deletions

View File

@@ -1,52 +1,74 @@
// Live multiplayer E2E — two player-bots run a real group encounter.
// Live multiplayer E2E — two player-bots run a real group encounter, driving
// the REAL embed UI (lobby embed + scoreboard embed), not just Redis state.
//
// Gate: RUN_FULL_E2E=1 AND E2E_PLAYER2_TOKEN set. Requires the full live stack:
// DISCORD_TOKEN, E2E_DRIVER_TOKEN, E2E_PLAYER2_TOKEN, E2E_TEST_GUILD_ID,
// E2E_TEST_CHANNEL_ID, E2E_ALLOW_PLAYER_BOTS=1, plus Redis + GraphMCP + LLM up.
// E2E_TEST_CHANNEL_ID, E2E_ALLOW_PLAYER_BOTS=1, plus Redis + LLM up.
// (This fixture has no randomizable / no NPC memoryKey, so GraphMCP is NOT
// called on the start path — GRAPHMCP_URL need not be host-reachable here.)
// Skipped by default → CI-safe.
//
// MVP scope: lobby gating (minPlayers:2), 2 real chat turns routed through
// messageRouter via the e2ePlayerAllowlist, and a group skill check N=2
// finalizing with the real successRule. The 4 gap-case ACs (FR-1114:
// simultaneous fan-out, successRule N>1 matrix, per-user ephemeral,
// second-claimant rejection) are a follow-up story — NOT covered here.
// Topology: player1 (E2E_DRIVER_TOKEN) is the STARTER (pre-joined in the lobby
// per /encounter start); player2 (E2E_PLAYER2_TOKEN) joins. Both end up in the
// roster and act as the two players. The test asserts on the REAL lobby embed
// (Seats field + Begin button disabled/enabled) and the REAL scoreboard embed
// (Rolled field + final footer + buttons removed), plus the 👀 reaction on a
// player's message (proves it routed through handleMessage, not skipped).
//
// NOTE: this test cannot run in CI/here — it needs the real env above. It is
// written to be correct against the live handlers and CI-safe (skipIf). Verify
// changes by keeping `npx vitest run tests/unit` green and `npm run build` clean;
// run it live only in the dedicated test guild with the env set.
// The 4 gap-case ACs (FR-1114) remain a follow-up story.
import './support/env.js';
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import type { ThreadChannel } from 'discord.js';
import type { Message, ThreadChannel } from 'discord.js';
import { execute } from '../../../src/bot/commands/encounter.js';
import { handleLobbyInteraction } from '../../../src/bot/handlers/lobbyHandler.js';
import { handleRollInteraction } from '../../../src/bot/handlers/rollHandler.js';
import { sessionManager } from '../../../src/session/sessionManager.js';
import { getLobby } from '../../../src/session/lobbyManager.js';
import { buildGroupScoreboardEmbed } from '../../../src/bot/embeds/groupScoreboard.js';
import { buildRollButtons } from '../../../src/bot/embeds/skillCheck.js';
import { connectLiveBots, disconnectLiveBots, type LiveBots } from './support/liveBots.js';
import { fakeInteraction, fakeButton, parseThreadIdFromReply } from './support/fakes.js';
import { flushRedisForGuild, disconnectRedis, deleteThread, deleteSession } from './support/cleanup.js';
import { waitFor } from './support/poll.js';
import type { PendingGroupCheck } from '../../../src/types/index.js';
import type { PendingGroupCheck, PendingGroupCheckRoll } from '../../../src/types/index.js';
const runE2E = process.env.RUN_FULL_E2E === '1' && !!process.env.E2E_PLAYER2_TOKEN;
const specName = 'e2e-group-multiplayer';
describe.skipIf(!runE2E)('Multiplayer live E2E — MVP (2 player-bots)', () => {
// ── Real-embed assertion helpers ────────────────────────────────────────────
function field(msg: Message, name: string): string {
return (msg.embeds[0]?.fields ?? []).find(f => f.name === name)?.value ?? '';
}
function seatsField(msg: Message): string {
return (msg.embeds[0]?.fields ?? []).find(f => f.name.startsWith('Seats'))?.value ?? '';
}
// The Begin button lives in the second action row (lobby_start). Read its real
// disabled state from the fetched message components.
function beginDisabled(msg: Message): boolean {
for (const row of msg.components ?? []) {
for (const c of (row as { components?: { customId?: string; disabled?: boolean }[] }).components ?? []) {
if (c.customId === 'lobby_start') return c.disabled ?? false;
}
}
return true; // button absent → treat as disabled
}
function scoreboardFooter(msg: Message): string {
return msg.embeds[0]?.footer?.text ?? '';
}
describe.skipIf(!runE2E)('Multiplayer live E2E — MVP (2 player-bots, driving the real embeds)', () => {
let bots: LiveBots;
let player1Id: string;
let player2Id: string;
let starterId: string;
let threadId: string | null = null;
let thread: ThreadChannel | null = null;
beforeAll(async () => {
bots = await connectLiveBots();
expect(bots.players.length, 'multiplayer E2E needs 2 player-bots').toBeGreaterThanOrEqual(2);
player1Id = bots.players[0].user!.id;
player2Id = bots.players[1].user!.id;
starterId = process.env.E2E_DRIVER_USER_ID ?? player1Id;
player1Id = bots.players[0].user!.id; // the starter (pre-joined)
player2Id = bots.players[1].user!.id; // the joiner
await flushRedisForGuild(bots.guild.id);
}, 120_000);
@@ -62,15 +84,16 @@ describe.skipIf(!runE2E)('Multiplayer live E2E — MVP (2 player-bots)', () => {
}
}, 120_000);
// AC-8 — lobby gating + start ------------------------------------------------
it('lobby gates at minPlayers:2, then starts on a full roster', async () => {
// AC-8 — lobby gating + start, asserting on the REAL lobby embed ───────────
it('lobby embed shows the gate: below-min Begin disabled → join → Begin enabled → start', async () => {
// player1 runs /encounter start → a lobby opens with the starter pre-joined.
const { interaction, lastText } = fakeInteraction({
subcommand: 'start',
stringOptions: { spec: specName },
channel: bots.channel,
guildId: bots.guild.id,
userId: starterId,
username: 'E2E Starter',
userId: player1Id,
username: 'Player1',
});
await execute(interaction);
@@ -79,58 +102,66 @@ describe.skipIf(!runE2E)('Multiplayer live E2E — MVP (2 player-bots)', () => {
thread = await bots.channel.threads.fetch(threadId!);
expect(thread, 'lobby thread must exist on the real gateway').toBeTruthy();
// player1 joins — below minPlayers, Start is not ready.
await handleLobbyInteraction(
fakeButton(thread!, 'lobby_join', player1Id, 'Player1').interaction,
bots.botClient,
);
let lobby = await waitFor(() => getLobby(threadId!).then(l => l ?? null), {
// Lobby state: starter pre-joined (1/2).
const lobby0 = await waitFor(() => getLobby(threadId!).then(l => l ?? null), {
timeoutMs: 15_000, intervalMs: 500,
});
expect(lobby!.joined.length, 'one join → below minPlayers:2').toBe(1);
expect(lobby!.joined.length >= lobby!.minPlayers).toBe(false);
expect(lobby0!.joined, 'starter is pre-joined').toEqual([player1Id]);
expect(lobby0!.joined.length < lobby0!.minPlayers, 'below minPlayers before anyone joins').toBe(true);
// player2 joins — meets minPlayers, Start is ready.
// REAL lobby embed: Seats shows "1 / 2 minimum" and Begin is DISABLED.
const lobbyMsg0 = await thread!.messages.fetch(lobby0!.messageId!);
expect(seatsField(lobbyMsg0), 'seats below min').toContain('1 / 2 minimum');
expect(beginDisabled(lobbyMsg0), 'Begin disabled below min').toBe(true);
// player2 joins. fakeButton.update EDITS THE REAL lobby embed (messageId
// passed), so the embed reflects the click — the UI is driven, not faked.
await handleLobbyInteraction(
fakeButton(thread!, 'lobby_join', player2Id, 'Player2').interaction,
fakeButton(thread!, 'lobby_join', player2Id, 'Player2', lobby0!.messageId).interaction,
bots.botClient,
);
lobby = await waitFor(() => getLobby(threadId!).then(l => l ?? null), {
timeoutMs: 15_000, intervalMs: 500,
});
expect(lobby!.joined.length, 'two joins → meets minPlayers:2').toBe(2);
expect(lobby!.joined.length >= lobby!.minPlayers).toBe(true);
const lobby1 = await waitFor(
() => getLobby(threadId!).then(l => (l && l.joined.length >= 2) ? l : null),
{ timeoutMs: 15_000, intervalMs: 500 },
);
expect(lobby1!.joined, 'player2 joined').toEqual(expect.arrayContaining([player1Id, player2Id]));
expect(lobby1!.joined.length >= lobby1!.minPlayers, 'meets minPlayers after one join').toBe(true);
// Start the encounter from the lobby (re-resolves the spec, builds the roster
// from joined players, posts the opening, opens the session).
// REAL lobby embed now: Seats shows "min 2 met" and Begin is ENABLED.
const lobbyMsg1 = await thread!.messages.fetch(lobby0!.messageId!);
expect(seatsField(lobbyMsg1), 'seats met').toContain('min 2 met');
expect(beginDisabled(lobbyMsg1), 'Begin enabled at min').toBe(false);
// Begin the encounter from the lobby.
await handleLobbyInteraction(
fakeButton(thread!, 'lobby_start', starterId, 'E2E Starter').interaction,
fakeButton(thread!, 'lobby_start', player1Id, 'Player1').interaction,
bots.botClient,
);
const session = await waitFor(
async () => (await sessionManager.get(threadId!)) ?? null,
async () => {
const s = await sessionManager.get(threadId!);
return s && s.phase === 'open' ? s : null;
},
{ timeoutMs: 30_000, intervalMs: 1_000 },
);
expect(session, 'encounter session must be persisted after lobby start').toBeTruthy();
expect(session!.phase).toBe('open');
expect(Object.keys(session!.players).length, 'both joined players are in the roster').toBe(2);
}, 150_000);
expect(session, 'session must be open after Begin').toBeTruthy();
expect(Object.keys(session!.players), 'both player-bots are in the roster').toEqual(
expect.arrayContaining([player1Id, player2Id]),
);
}, 180_000);
// AC-9 — 2 real chat turns routed as player turns --------------------------
it('two players post real gateway messages that route as player turns', async () => {
// AC-9 — 2 real chat turns routed as player turns (👀 reaction = routed) ────
it('two players post real gateway messages that route through handleMessage', async () => {
expect(threadId, 'depends on lobby start').toBeTruthy();
thread = thread ?? (await bots.channel.threads.fetch(threadId!));
// The player bots post REAL gateway messages into the thread. These route
// through handleMessage → processEncounterMessage because
// E2E_ALLOW_PLAYER_BOTS=1 and their ids are in the e2ePlayerAllowlist
// (populated by connectLiveBots after login).
const p1Thread = (await bots.players[0].channels.fetch(threadId!)) as ThreadChannel;
const p2Thread = (await bots.players[1].channels.fetch(threadId!)) as ThreadChannel;
// player1 posts a REAL gateway message. It routes through the bot-under-
// test's messageCreate → handleMessage (E2E_ALLOW_PLAYER_BOTS=1 + allowlist),
// which fires the 👀 reaction and appends a user turn to history.
const before1 = (await sessionManager.get(threadId!))!.history.length;
await p1Thread.send('Player1: I ready my tools and watch the patrol rhythm.');
const p1Msg = await p1Thread.send('Player1: I ready my tools and watch the patrol rhythm.');
await waitFor(
async () => {
const s = await sessionManager.get(threadId!);
@@ -138,7 +169,16 @@ describe.skipIf(!runE2E)('Multiplayer live E2E — MVP (2 player-bots)', () => {
},
{ timeoutMs: 30_000, intervalMs: 1_000 },
);
// 👀 reaction on the player's real message = handleMessage ran (not skipped).
await waitFor(
async () => {
const m = await thread!.messages.fetch(p1Msg.id).catch(() => null);
return m && m.reactions.cache.some(r => r.emoji.name === '👀') ? m : null;
},
{ timeoutMs: 15_000, intervalMs: 500 },
);
// player2 posts → history grows again.
const before2 = (await sessionManager.get(threadId!))!.history.length;
await p2Thread.send('Player2: I flank left while they are distracted.');
const grown = await waitFor(
@@ -148,58 +188,88 @@ describe.skipIf(!runE2E)('Multiplayer live E2E — MVP (2 player-bots)', () => {
},
{ timeoutMs: 30_000, intervalMs: 1_000 },
);
expect(grown!.history.length, 'player2 turn must append to history').toBeGreaterThan(before2);
}, 150_000);
expect(grown!.history.length, 'player2 turn appended to history').toBeGreaterThan(before2);
}, 180_000);
// AC-10 — group check N=2 finalizes with the real successRule --------------
it('a group skill check with 2 rollers finalizes with the real successRule', async () => {
// AC-10 — group check N=2 finalizes; the REAL scoreboard embed reflects it ─
it('group check scoreboard reflects each roll and finalizes with the real successRule', async () => {
expect(threadId, 'depends on lobby start').toBeTruthy();
thread = thread ?? (await bots.channel.threads.fetch(threadId!));
// Post a real scoreboard message, then set up a pending group check
// targeting both players (deterministic — does not rely on the LLM emitting
// skill_check_group_emit; see PRD OQ-2). successRule: majority.
const scoreboard = await thread!.send({ content: 'Group Stealth check — DC 13 (scoreboard)' });
// Post a REAL scoreboard embed + Roll button, then set up a pending group
// check targeting both players (deterministic setup; successRule: majority).
const rolls: PendingGroupCheckRoll[] = [
{ discordId: player1Id, dndName: 'Player1', rolled: false, modifier: 0 },
{ discordId: player2Id, dndName: 'Player2', rolled: false, modifier: 0 },
];
const scoreboard = await thread!.send({
embeds: [buildGroupScoreboardEmbed('Stealth', 'Slip the party past the patrol', 13, rolls)],
components: [buildRollButtons()],
});
const gc: PendingGroupCheck = {
skill: 'Stealth',
prompt: 'Slip the party past the patrol',
dc: 13,
messageId: scoreboard.id,
successRule: { kind: 'majority' },
rolls: [
{ discordId: player1Id, dndName: 'Player1', rolled: false, modifier: 0 },
{ discordId: player2Id, dndName: 'Player2', rolled: false, modifier: 0 },
],
rolls,
};
await sessionManager.atomicMutate(threadId!, () => ({ pendingGroupCheck: gc }));
// Each player clicks Roll. submitGroupRoll records the roll atomically; on
// the second roll allRolled → finalizeGroupCheck appends [GROUP CHECK RESULT]
// and clears pendingGroupCheck.
// player1 rolls → the REAL scoreboard is edited in place to show player1's roll.
await handleRollInteraction(
fakeButton(thread!, 'sc_roll', player1Id, 'Player1').interaction,
bots.botClient,
);
await waitFor(
async () => {
const m = await thread!.messages.fetch(scoreboard.id).catch(() => null);
const rolled = m ? field(m, 'Rolled') : '';
return rolled.includes('Player1') && !rolled.includes('Player1 — …awaiting') ? m : null;
},
{ timeoutMs: 15_000, intervalMs: 500 },
);
// player2 rolls → allRolled → finalizeGroupCheck edits the scoreboard to its
// final state and appends [GROUP CHECK RESULT].
await handleRollInteraction(
fakeButton(thread!, 'sc_roll', player2Id, 'Player2').interaction,
bots.botClient,
);
const finalized = await waitFor(
async () => {
const s = await sessionManager.get(threadId!);
if (!s) return null;
if (!s || s.pendingGroupCheck) return null;
const hasResult = s.history.some(
m => typeof m.content === 'string' && m.content.startsWith('[GROUP CHECK RESULT]'),
);
return hasResult && !s.pendingGroupCheck ? s : null;
return hasResult ? s : null;
},
{ timeoutMs: 30_000, intervalMs: 1_000 },
);
expect(finalized, 'group check must finalize: [GROUP CHECK RESULT] appended + pendingGroupCheck cleared').toBeTruthy();
expect(finalized, 'finalized: [GROUP CHECK RESULT] appended + pendingGroupCheck cleared').toBeTruthy();
// REAL scoreboard embed: both rolls shown, footer = prevails/falters, buttons removed.
const finalBoard = await thread!.messages.fetch(scoreboard.id);
const rolled = field(finalBoard, 'Rolled');
expect(rolled, 'scoreboard shows player1 roll').toContain('Player1');
expect(rolled, 'scoreboard shows player2 roll').toContain('Player2');
const footer = scoreboardFooter(finalBoard);
expect(
footer.includes('prevails') || footer.includes('falters'),
'final footer reflects the group outcome',
).toBe(true);
expect((finalBoard.components ?? []).length, 'Roll button removed on finalize').toBe(0);
// The [GROUP CHECK RESULT] outcome is consistent with the scoreboard footer.
const resultMsg = finalized!.history.find(
m => typeof m.content === 'string' && m.content.startsWith('[GROUP CHECK RESULT]'),
)!;
expect(resultMsg.content, 'successRule kind recorded in the result').toContain('Rule: majority');
}, 150_000);
expect(resultMsg.content, 'successRule kind recorded').toContain('Rule: majority');
const success = resultMsg.content.includes('SUCCESS');
expect(
success ? footer.includes('prevails') : footer.includes('falters'),
'scoreboard footer matches the recorded outcome',
).toBe(true);
}, 180_000);
});

View File

@@ -112,11 +112,15 @@ export interface FakeButton {
// `userId`/`username` are optional so existing 2-arg callers (skill-check.test,
// long-encounter.test) keep working. Multiplayer E2E passes each player-bot's
// real userId so submitGroupRoll / handleJoin can identify the clicker.
// `messageId` (optional): when set, `update()` edits the REAL message with that
// id — so the embed the button lives on actually reflects the click (drives the
// UI). Without it, `update()` is captured (back-compat for non-live tests).
export function fakeButton(
channel: ThreadChannel,
customId: string,
userId?: string,
username?: string,
messageId?: string,
): FakeButton {
const updates: unknown[] = [];
const replies: CapturedReply[] = [];
@@ -128,6 +132,10 @@ export function fakeButton(
channel,
user: { id: userId ?? 'e2e-driver-user', username: username ?? 'E2E Driver', bot: false },
async update(payload: unknown) {
if (messageId) {
const msg = await channel.messages.fetch(messageId).catch(() => null);
if (msg) await msg.edit(payload as Parameters<typeof msg.edit>[0]).catch(() => null);
}
updates.push(payload);
return {};
},