- React + MUI DataGrid app with faction filter, search, change filter - Biggest movers cards (drops/rises) scoped to current filter view - Historical points graph modal (5 MFM versions: 1.14 → current) - URL state sync (faction, dir, q params — shareable URLs) - Grimdark favicon + OG embed image (Google Imagen) - Multi-stage Dockerfile (node build → nginx serve) - docker-compose.yml with Traefik + Cloudflare TLS - Data pipeline: build_deduped_data.py merges PDF + live scrape - Ynnari merged into Aeldari (shared codex) - Mobile responsive: flex columns, no fixed pixel widths - Color semantics: green=cheaper, red=costlier (consistent everywhere) - 1,449 units across 31 factions
298 lines
11 KiB
Python
298 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Rebuild react-app/public/data.json with:
|
|
1. Filter out weapon-upgrade rows (size starts with "per " or "+ ").
|
|
2. Collapse each (faction, name) into ONE row, with a `sizes` array of
|
|
{size, original, new, change_pct, change_pts, tier, history} variants.
|
|
3. Only keep sizes that the MFM (new/live) actually listed.
|
|
4. Fill missing originals by scaling proportionally to model count.
|
|
5. Build a `history` array per size with {date, version, pts} from all 3 sources:
|
|
- v3.2 PDF (Aug 20, 2025)
|
|
- v4.3 PDF (Jun 5, 2026)
|
|
- Live MFM (Jun 17, 2026)
|
|
|
|
The history data does NOT appear in the table — it's only used when the user
|
|
clicks a unit name to open the graph modal.
|
|
"""
|
|
import json
|
|
import re
|
|
import time
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
ROOT = Path("/root/wh40k-factions")
|
|
PDF32_DIR = ROOT / "pdf32" # v3.2
|
|
PDF_DIR = ROOT / "pdf" # v4.3
|
|
LIVE_DIR = ROOT / "live" # current MFM
|
|
OUT = ROOT / "react-app" / "public" / "data.json"
|
|
OUT.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
DP_RE = re.compile(r"\b\d+DP\b", re.IGNORECASE)
|
|
DETACHMENT_TIERS = {"ENHANCEMENTS", "DETACHMENT"}
|
|
UPGRADE_PREFIXES = ("per ", "+ ")
|
|
|
|
# Version metadata (oldest → newest)
|
|
VERSIONS = [
|
|
{"version": "1.14", "date": "2024-12-01", "label": "MFM 1.14", "dir": ROOT / "pdf114"},
|
|
{"version": "2.3", "date": "2025-03-01", "label": "MFM 2.3", "dir": ROOT / "pdf23"},
|
|
{"version": "3.2", "date": "2025-08-20", "label": "MFM 3.2", "dir": PDF32_DIR},
|
|
{"version": "4.3", "date": "2026-06-05", "label": "MFM 4.3", "dir": PDF_DIR},
|
|
{"version": "current", "date": "2026-06-17", "label": "MFM (current)", "dir": LIVE_DIR},
|
|
]
|
|
|
|
|
|
def norm_name(s: str) -> str:
|
|
if not s:
|
|
return ""
|
|
s = s.lower()
|
|
s = re.sub(r"\s+", " ", s).strip()
|
|
s = re.sub(r"[^a-z0-9 ]", "", s)
|
|
return s
|
|
|
|
|
|
def norm_size(s: str) -> str:
|
|
if not s:
|
|
return ""
|
|
s = s.lower().strip()
|
|
s = re.sub(r"\s+", " ", s)
|
|
m = re.search(r"(\d+)\s*model", s)
|
|
if m:
|
|
n = int(m.group(1))
|
|
return f"{n} model{'s' if n != 1 else ''}"
|
|
return s
|
|
|
|
|
|
def is_upgrade_size(size: str) -> bool:
|
|
s = (size or "").lower().strip()
|
|
return s.startswith(UPGRADE_PREFIXES)
|
|
|
|
|
|
def is_detachment_name(name: str) -> bool:
|
|
return bool(DP_RE.search(name or ""))
|
|
|
|
|
|
def is_detachment_tier(tier) -> bool:
|
|
if not tier:
|
|
return False
|
|
return str(tier).upper() in DETACHMENT_TIERS
|
|
|
|
|
|
def model_count(s):
|
|
m = re.search(r"(\d+)", s)
|
|
return int(m.group(1)) if m else 1
|
|
|
|
|
|
def load_version(ver_info):
|
|
"""Load all rows from a version's directory. Returns dict keyed by (slug, norm_name, norm_size) -> pts."""
|
|
rows = {}
|
|
slug_dir = ver_info["dir"]
|
|
if not slug_dir.exists():
|
|
return rows
|
|
for path in sorted(slug_dir.glob("*.json")):
|
|
if path.stem.startswith("_"):
|
|
continue
|
|
data = json.load(open(path))
|
|
slug = data.get("slug", path.stem)
|
|
# Ynnari shares the Aeldari codex — merge into aeldari
|
|
if slug == "ynnari":
|
|
slug = "aeldari"
|
|
for unit, entries in data.get("units", {}).items():
|
|
if is_detachment_name(unit):
|
|
continue
|
|
for e in entries:
|
|
if ver_info["version"] == "current" and is_detachment_tier(e.get("tier")):
|
|
continue
|
|
size_disp = e.get("size", "")
|
|
if is_upgrade_size(size_disp):
|
|
continue
|
|
size = norm_size(size_disp)
|
|
if not size:
|
|
continue
|
|
k = (slug, norm_name(unit), size)
|
|
pts = e.get("pts")
|
|
if pts is not None:
|
|
# Keep lowest pts if duplicates
|
|
if k not in rows or pts < rows[k]:
|
|
rows[k] = pts
|
|
return rows
|
|
|
|
|
|
def main():
|
|
# Load each version
|
|
version_data = {}
|
|
for ver in VERSIONS:
|
|
rows = load_version(ver)
|
|
version_data[ver["version"]] = rows
|
|
print(f"{ver['label']}: {len(rows)} size-rows loaded")
|
|
|
|
# Use "current" (live) as the primary set of units/sizes
|
|
# and "4.3" as the source of "original" (old codex) values
|
|
live_rows = version_data.get("current", {})
|
|
pdf43_rows = version_data.get("4.3", {})
|
|
pdf32_rows = version_data.get("3.2", {})
|
|
|
|
# Also load faction names from live data
|
|
faction_names = {}
|
|
for path in sorted(LIVE_DIR.glob("*.json")):
|
|
if path.stem.startswith("_"):
|
|
continue
|
|
data = json.load(open(path))
|
|
slug = data.get("slug", path.stem)
|
|
faction_names[slug] = data.get("name", slug)
|
|
|
|
# Group live rows by (slug, norm_name)
|
|
groups = defaultdict(list)
|
|
for (slug, name_norm, size), pts in live_rows.items():
|
|
groups[(slug, name_norm)].append({"size": size, "new": pts})
|
|
|
|
# Also include PDF-only units (removed from MFM)
|
|
for (slug, name_norm, size), pts in pdf43_rows.items():
|
|
if (slug, name_norm, size) not in live_rows:
|
|
groups[(slug, name_norm)].append({"size": size, "new": None, "original": pts})
|
|
|
|
out_units = []
|
|
for (slug, name_norm), grp in groups.items():
|
|
# Sort by numeric size
|
|
grp.sort(key=lambda r: model_count(r["size"]))
|
|
|
|
# Deduplicate sizes (keep first occurrence)
|
|
seen_sizes = set()
|
|
unique = []
|
|
for r in grp:
|
|
if r["size"] not in seen_sizes:
|
|
seen_sizes.add(r["size"])
|
|
unique.append(r)
|
|
grp = unique
|
|
|
|
# Only keep sizes that the MFM (new/live) actually listed
|
|
mfm_sizes = [r for r in grp if r["new"] is not None]
|
|
if not mfm_sizes:
|
|
mfm_sizes = [grp[0]] # removed unit, keep one PDF entry
|
|
|
|
# Find base original (smallest size with a non-None original in 4.3 PDF)
|
|
base_orig = None
|
|
base_count = None
|
|
for (s, n, sz), pts in pdf43_rows.items():
|
|
if s == slug and n == name_norm:
|
|
if base_orig is None or model_count(sz) < base_count:
|
|
base_orig = pts
|
|
base_count = model_count(sz)
|
|
|
|
# Fill missing originals on MFM sizes by scaling from base original
|
|
for r in mfm_sizes:
|
|
if r.get("original") is None:
|
|
# Try exact match in 4.3 PDF first
|
|
key = (slug, name_norm, r["size"])
|
|
if key in pdf43_rows:
|
|
r["original"] = pdf43_rows[key]
|
|
elif base_orig is not None and base_count is not None:
|
|
cnt = model_count(r["size"])
|
|
if base_count > 0 and cnt > 0:
|
|
r["original"] = round(base_orig * cnt / base_count)
|
|
else:
|
|
r["original"] = None
|
|
|
|
# Build sizes[] array with history
|
|
sizes = []
|
|
for r in mfm_sizes:
|
|
o, n = r.get("original"), r["new"]
|
|
change_pct = round((n - o) / o * 100, 2) if (o is not None and n is not None and o > 0) else None
|
|
change_pts = (n - o) if (o is not None and n is not None) else None
|
|
|
|
# Build history for this size
|
|
history = []
|
|
for ver in VERSIONS:
|
|
key = (slug, name_norm, r["size"])
|
|
pts_map = version_data[ver["version"]]
|
|
if key in pts_map:
|
|
history.append({
|
|
"date": ver["date"],
|
|
"version": ver["label"],
|
|
"pts": pts_map[key],
|
|
})
|
|
|
|
sizes.append({
|
|
"size": r["size"],
|
|
"original": o,
|
|
"new": n,
|
|
"tier": None,
|
|
"change_pct": change_pct,
|
|
"change_pts": change_pts,
|
|
"history": history,
|
|
})
|
|
|
|
# default_size = smallest
|
|
default = sizes[0]
|
|
default_size = default["size"]
|
|
|
|
# Display name: try to find Title Case from PDF data
|
|
display_name = name_norm.title()
|
|
for path in sorted(PDF_DIR.glob("*.json")):
|
|
if path.stem.startswith("_"):
|
|
continue
|
|
data = json.load(open(path))
|
|
if data.get("slug") == slug:
|
|
for unit in data.get("units", {}):
|
|
if norm_name(unit) == name_norm:
|
|
display_name = unit
|
|
break
|
|
break
|
|
|
|
faction_name = faction_names.get(slug, slug)
|
|
|
|
out_units.append({
|
|
"faction": slug,
|
|
"faction_name": faction_name,
|
|
"name": display_name,
|
|
"size": default["size"],
|
|
"original": default["original"],
|
|
"new": default["new"],
|
|
"tier": default.get("tier"),
|
|
"change_pct": default["change_pct"],
|
|
"change_pts": default["change_pts"],
|
|
"sizes": sizes,
|
|
"default_size": default_size,
|
|
})
|
|
|
|
# Stats
|
|
has_both = sum(1 for u in out_units if u["original"] is not None and u["new"] is not None)
|
|
only_pdf = sum(1 for u in out_units if u["original"] is not None and u["new"] is None)
|
|
only_live = sum(1 for u in out_units if u["original"] is None and u["new"] is not None)
|
|
pct_changes = [u["change_pct"] for u in out_units if u["change_pct"] is not None]
|
|
pct_changes_sorted = sorted(pct_changes, key=lambda x: x)
|
|
units_with_history = sum(1 for u in out_units if any(len(s.get("history", [])) > 1 for s in u["sizes"]))
|
|
|
|
payload = {
|
|
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
"versions": [{"date": v["date"], "label": v["label"]} for v in VERSIONS],
|
|
"factions": sorted({u["faction"] for u in out_units}),
|
|
"faction_names": {u["faction"]: u["faction_name"] for u in out_units},
|
|
"stats": {
|
|
"total_rows": len(out_units),
|
|
"rows_with_both": has_both,
|
|
"rows_pdf_only": only_pdf,
|
|
"rows_live_only": only_live,
|
|
"biggest_drop_pct": pct_changes_sorted[0] if pct_changes_sorted else None,
|
|
"biggest_rise_pct": pct_changes_sorted[-1] if pct_changes_sorted else None,
|
|
"multi_size": sum(1 for u in out_units if len(u["sizes"]) > 1),
|
|
"units_with_history": units_with_history,
|
|
},
|
|
"units": out_units,
|
|
}
|
|
|
|
OUT.write_text(json.dumps(payload, ensure_ascii=False))
|
|
print(f"\nWrote {OUT}")
|
|
print(f" total rows: {len(out_units)}")
|
|
print(f" with both: {has_both}")
|
|
print(f" PDF only: {only_pdf}")
|
|
print(f" LIVE only: {only_live}")
|
|
print(f" multi-size: {sum(1 for u in out_units if len(u['sizes']) > 1)}")
|
|
print(f" with history: {units_with_history}")
|
|
if pct_changes:
|
|
print(f" biggest drop: {pct_changes_sorted[0]:.2f}%")
|
|
print(f" biggest rise: {pct_changes_sorted[-1]:.2f}%")
|
|
print(f" size: {OUT.stat().st_size / 1024:.1f} KB")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |