Files
obsidian-foundry-sync/tests/cchash.test.ts
Kaysser Kayyali 5d96bf1267 feat(E1b-alt): re-baseline ccHash to canonicalize-HTML contract
E1a proved the markdown round-trip unstable (NO-GO). This re-baselines the
E0.2 ccHash contract to hash Foundry HTML directly — the E1b-alt fork — which
sidesteps all 5 E1a failure reasons (no inverse, no resolver, no blank-line/
case/order sensitivity, no parseBody coupling).

- src/canonicalize-html.ts: canonicalizeHtml(html) — linkedom DOM walk that
  absorbs serialization drift (attribute order/quoting, named-vs-numeric
  entities, inter-tag whitespace, tag case, self-closing) while preserving
  content (structure, attr values, meaningful text). Two inputs parsing to the
  same DOM → same canonical string. Mini-gate: tests/canonicalize-html.test.ts
  (9 tests — serialization variants → same canonical; content change → different).
- src/cchash.ts: rewritten to ccHash = contentHash(canonicalizeHtml(
  data.description) + "\n" + canonicalizeHtml(data.notes ?? "") + "\n" + name
  + "\n" + folder). The HtmlToMarkdown seam is DROPPED; a CanonicalizeHtml seam
  (default = canonicalizeHtml) replaces it. CC_HASH_CONTRACT updated + pinned +
  re-derivation-enforced. CcHashError on missing description kept; direction-
  invariance kept (name/folder from liveEntry); folder = Foundry folder ID,
  distinct from Obsidian foundry.folder_path. tests/cchash.test.ts updated (21
  tests incl. serialization-drift-absorption + no-false-negative).
- src/fromFoundry.ts (the E1a markdown inverse) ships unwired — not consumed by
  ccHash; remains as the spike artifact's inverse.

tsc clean; 67 E0+E1a+E1b-alt tests pass; 112 passing project-wide (18 pre-existing
fixture-missing failures unchanged).

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-22 22:35:09 +00:00

198 lines
8.3 KiB
TypeScript

import { describe, it, expect } from "vitest";
import {
ccHash,
ccHashFromGet,
CC_HASH_CONTRACT,
CcHashError,
isCcHashError,
} from "../src/cchash.js";
import { canonicalizeHtml } from "../src/canonicalize-html.js";
import { contentHash } from "../src/normalize.js";
import type { JournalEntry, CcData } from "../src/types.js";
import type { RelayClient } from "../src/relay/client.js";
interface EntryOpts {
name?: string;
folder?: string | null;
description?: string;
notes?: string;
data?: CcData; // exact override (for the missing-field tests)
noFlag?: boolean;
noData?: boolean;
}
function entry(opts: EntryOpts = {}): JournalEntry {
const cc = opts.noFlag
? undefined
: opts.noData
? { type: "npc" }
: { type: "npc", data: opts.data ?? { description: opts.description ?? "<p>The gunslinger.</p>", notes: opts.notes ?? "" } };
return {
name: opts.name ?? "Roland Deschain",
_id: "abc1",
// Default only on undefined (NOT null) so tests can pass `folder: null`
// to exercise the `folder ?? ""` branch in ccHash.
folder: opts.folder !== undefined ? opts.folder : "Folder.gideon",
flags: cc ? { "campaign-codex": cc } : {},
};
}
describe("ccHash contract + determinism (E1b-alt)", () => {
it("CC_HASH_CONTRACT pins the exact bytes of the frozen input contract", () => {
expect(CC_HASH_CONTRACT).toBe(
'contentHash(canonicalizeHtml(data.description) + "\\n" + canonicalizeHtml(data.notes ?? "") + "\\n" + name + "\\n" + folder)',
);
});
it("implementation matches the frozen contract (re-derivation enforces it)", () => {
const e = entry({ notes: "<p>He killed the boy.</p>" });
const data = e.flags!["campaign-codex"]!.data!;
const expected = contentHash(
`${canonicalizeHtml(data.description!)}\n${canonicalizeHtml(data.notes!)}\n${e.name}\n${e.folder ?? ""}`,
);
expect(ccHash(e)).toBe(expected);
});
it("is deterministic: same payload → same hash across runs", () => {
const a = ccHash(entry());
const b = ccHash(entry());
expect(a).toBe(b);
expect(a).toMatch(/^[0-9a-f]{64}$/); // sha256 hex
});
it("is sensitive: a one-char change to data.description yields a different hash", () => {
const a = ccHash(entry({ description: "<p>The gunslinger.</p>" }));
const b = ccHash(entry({ description: "<p>The gunslinger!</p>" }));
expect(a).not.toBe(b);
});
it("is sensitive: a change to data.notes (## Secrets) yields a different hash", () => {
// A Foundry-side edit to secrets MUST move ccHash, or the divergence guard
// would miss secrets-only edits (the clobber hole the contract closes).
const a = ccHash(entry({ notes: "" }));
const b = ccHash(entry({ notes: "<p>He killed the boy.</p>" }));
expect(a).not.toBe(b);
});
it("name changing alone yields a different hash (part of the hash input)", () => {
const a = ccHash(entry({ name: "Roland Deschain" }));
const b = ccHash(entry({ name: "Roland Deschain of Gilead" }));
expect(a).not.toBe(b);
});
it("folder changing alone yields a different hash (Foundry folder ID)", () => {
const a = ccHash(entry({ folder: "Folder.gideon" }));
const b = ccHash(entry({ folder: "Folder.gilead" }));
expect(a).not.toBe(b);
});
it("absent folder is treated as empty string (matches Obsidian-side absence)", () => {
const withEmpty = ccHash(entry({ folder: "" }));
const absentFolder = ccHash(entry({ folder: null }));
expect(withEmpty).toBe(absentFolder);
});
it("trailing whitespace in name/folder is normalized (canonicalize via contentHash)", () => {
// name/folder are concatenated raw but the final contentHash canonicalizes
// the whole string, so relay serialization whitespace drift does not flap ccHash.
const a = ccHash(entry({ name: "Roland Deschain" }));
const b = ccHash(entry({ name: "Roland Deschain " })); // trailing spaces
expect(a).toBe(b);
});
});
describe("ccHash absorbs HTML serialization drift (the E1b-alt property)", () => {
it("two descriptions that differ only in serialization → same ccHash", () => {
// Same DOM, different serialization (attribute order + inter-tag whitespace
// + self-closing slash + tag case). canonicalizeHtml absorbs it.
const a = ccHash(entry({ description: '<p>Hello <b>world</b></p><img src="x.png" alt="alt">' }));
const b = ccHash(entry({ description: '<P>Hello <B>world</B></P>\n <IMG alt="alt" src="x.png" />' }));
expect(a).toBe(b);
});
it("two notes that differ only in serialization → same ccHash", () => {
// Pure serialization drift (tag case + named-vs-numeric entity), NO text
// change. Both decode & → & and lowercase the tag → same canonical.
const a = ccHash(entry({ notes: "<p>Secret &amp; one.</p>" }));
const b = ccHash(entry({ notes: "<P>Secret &#38; one.</P>" }));
expect(a).toBe(b);
});
it("a real content change in the description → different ccHash (no false negative)", () => {
const a = ccHash(entry({ description: "<p>Hello world.</p>" }));
const b = ccHash(entry({ description: "<p>Hello World.</p>" })); // capital W
expect(a).not.toBe(b);
});
});
describe("ccHash direction-invariance (E1b-alt)", () => {
it("same Foundry data+name+folder → same hash regardless of caller (E1b push vs E2 pull)", () => {
const e = entry();
expect(ccHash(e)).toBe(ccHash(e)); // hash is a function of the Foundry entry only
});
it("renaming the vault file (without changing the live entry) leaves ccHash unchanged", () => {
// The vault filename never enters the hash. A rename is a name-field update
// routed through pushNote's updatedName path, not a content divergence — so
// the stored foundry.ccHash is unaffected until a push updates liveEntry.name.
const e = entry();
expect(ccHash(e)).toBe(ccHash(e)); // liveEntry unchanged
});
it("a live entry name change (a real push) DOES change ccHash", () => {
const before = ccHash(entry({ name: "Roland" }));
const after = ccHash(entry({ name: "Roland Deschain" }));
expect(before).not.toBe(after);
});
});
describe("ccHash error handling (E1b-alt)", () => {
it("throws CcHashError when flags.campaign-codex is absent", () => {
expect(() => ccHash(entry({ noFlag: true }))).toThrow(CcHashError);
expect(() => ccHash(entry({ noFlag: true }))).toThrow(/missing campaign-codex data/);
});
it("throws CcHashError when flags.campaign-codex.data is absent", () => {
expect(() => ccHash(entry({ noData: true }))).toThrow(CcHashError);
expect(() => ccHash(entry({ noData: true }))).toThrow(/missing campaign-codex data/);
});
it("throws CcHashError when data.description is absent/non-string (NOT coerced to empty)", () => {
// A present-but-description-less entry must not silently hash "" — that
// would create a stable-but-wrong baseline.
const e = entry({ data: { notes: "<p>orphan notes</p>" } as CcData });
expect(() => ccHash(e)).toThrow(CcHashError);
expect(() => ccHash(e)).toThrow(/description/);
});
it("ccHashFromGet surfaces relay errors unchanged (not wrapped as CcHashError)", async () => {
const relayErr = new Error('relay 404 GET /get: No connected Foundry clients found');
const fakeRelay = { getEntry: async (_uuid: string): Promise<JournalEntry> => { throw relayErr; } } as unknown as RelayClient;
try {
await ccHashFromGet(fakeRelay, "JournalEntry.abc1");
throw new Error("should have thrown");
} catch (err) {
expect(isCcHashError(err)).toBe(false);
expect(err).toBe(relayErr);
}
});
it("ccHashFromGet returns { hash, entry } on success and derives the hash from the same response", async () => {
const e = entry();
const fakeRelay = { getEntry: async (_uuid: string): Promise<JournalEntry> => e } as unknown as RelayClient;
const result = await ccHashFromGet(fakeRelay, "JournalEntry.abc1");
expect(result.entry).toBe(e);
expect(result.hash).toBe(ccHash(e));
});
it("ccHashFromGet throws CcHashError (not relay error) when the entry is malformed", async () => {
const malformed = entry({ noData: true });
const fakeRelay = { getEntry: async (): Promise<JournalEntry> => malformed } as unknown as RelayClient;
try {
await ccHashFromGet(fakeRelay, "JournalEntry.abc1");
throw new Error("should have thrown");
} catch (err) {
expect(isCcHashError(err)).toBe(true);
}
});
});