- React + MUI DataGrid app with faction filter, search, change filter - Biggest movers cards (drops/rises) scoped to current filter view - Historical points graph modal (5 MFM versions: 1.14 → current) - URL state sync (faction, dir, q params — shareable URLs) - Grimdark favicon + OG embed image (Google Imagen) - Multi-stage Dockerfile (node build → nginx serve) - docker-compose.yml with Traefik + Cloudflare TLS - Data pipeline: build_deduped_data.py merges PDF + live scrape - Ynnari merged into Aeldari (shared codex) - Mobile responsive: flex columns, no fixed pixel widths - Color semantics: green=cheaper, red=costlier (consistent everywhere) - 1,449 units across 31 factions
204 lines
6.8 KiB
Python
204 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Merge per-faction PDF (original) + LIVE (new) data into a single
|
|
client-loadable manifest. Skip detachment entries (names with DP suffix
|
|
or ENHANCEMENTS tier). Compute % change. Write to:
|
|
/root/wh40k-factions/site/data.json
|
|
|
|
Schema:
|
|
{
|
|
"generated_at": "...",
|
|
"factions": ["adepta-sororitas", ...], # ordered list
|
|
"units": [
|
|
{
|
|
"faction": "astra-militarum",
|
|
"faction_name": "Astra Militarum",
|
|
"name": "Valkyrie",
|
|
"size": "1 model",
|
|
"original": 190, # may be null if unit not in PDF
|
|
"new": 170, # may be null if unit not in LIVE
|
|
"tier": "YOUR 1ST TO 2ND UNITS COST", # may be null
|
|
"change_pct": -10.53, # may be null
|
|
"change_pts": -20
|
|
},
|
|
...
|
|
]
|
|
}
|
|
"""
|
|
import json
|
|
import re
|
|
import time
|
|
from pathlib import Path
|
|
|
|
ROOT = Path("/root/wh40k-factions")
|
|
PDF_DIR = ROOT / "pdf"
|
|
LIVE_DIR = ROOT / "live"
|
|
OUT = ROOT / "site" / "data.json"
|
|
OUT.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Detachments: skip any unit name with DP suffix, and skip ENHANCEMENTS-tier rows.
|
|
DP_RE = re.compile(r"\b\d+DP\b", re.IGNORECASE)
|
|
DETACHMENT_TIERS = {"ENHANCEMENTS", "DETACHMENT"}
|
|
|
|
|
|
def norm_name(s: str) -> str:
|
|
"""Normalize unit name for cross-source matching.
|
|
PDF has Title Case, LIVE has UPPERCASE — fold them together."""
|
|
if not s:
|
|
return ""
|
|
s = s.lower()
|
|
s = re.sub(r"\s+", " ", s).strip()
|
|
s = re.sub(r"[^a-z0-9 ]", "", s)
|
|
return s
|
|
|
|
|
|
def norm_size(s: str) -> str:
|
|
if not s:
|
|
return ""
|
|
s = s.lower().strip()
|
|
s = re.sub(r"\s+", " ", s)
|
|
m = re.search(r"(\d+)\s*model", s)
|
|
if m:
|
|
n = int(m.group(1))
|
|
return f"{n} model{'s' if n != 1 else ''}"
|
|
return s
|
|
|
|
|
|
def is_detachment_name(name: str) -> bool:
|
|
return bool(DP_RE.search(name or ""))
|
|
|
|
|
|
def is_detachment_tier(tier) -> bool:
|
|
if not tier:
|
|
return False
|
|
return str(tier).upper() in DETACHMENT_TIERS
|
|
|
|
|
|
def main():
|
|
# Discover factions present in both dirs
|
|
pdf_slugs = {p.stem for p in PDF_DIR.glob("*.json")
|
|
if not p.stem.startswith("_")}
|
|
live_slugs = {p.stem for p in LIVE_DIR.glob("*.json")
|
|
if not p.stem.startswith("_")}
|
|
slugs = sorted(pdf_slugs | live_slugs)
|
|
print(f"PDF factions: {len(pdf_slugs)}")
|
|
print(f"LIVE factions: {len(live_slugs)}")
|
|
print(f"Total slugs: {len(slugs)}")
|
|
|
|
# Build a { (slug, name_norm, size_norm): {pdf_pts, live_pts, tier, name_display} }
|
|
rows = {}
|
|
|
|
# PDF
|
|
for slug in slugs:
|
|
path = PDF_DIR / f"{slug}.json"
|
|
if not path.exists():
|
|
continue
|
|
data = json.load(open(path))
|
|
faction_name = data.get("name", slug)
|
|
for unit, entries in data.get("units", {}).items():
|
|
if is_detachment_name(unit):
|
|
continue
|
|
for e in entries:
|
|
size = norm_size(e.get("size"))
|
|
if not size:
|
|
continue
|
|
k = (slug, norm_name(unit), size)
|
|
rec = rows.setdefault(k, {
|
|
"faction": slug,
|
|
"faction_name": faction_name,
|
|
"name": unit,
|
|
"size": e.get("size", size), # keep display form
|
|
"original": None,
|
|
"new": None,
|
|
"tier": None,
|
|
})
|
|
# PDF may have multiple rows per (unit, size) — take the lowest as "base"
|
|
pts = e.get("pts")
|
|
if pts is not None:
|
|
if rec["original"] is None or pts < rec["original"]:
|
|
rec["original"] = pts
|
|
|
|
# LIVE
|
|
for slug in slugs:
|
|
path = LIVE_DIR / f"{slug}.json"
|
|
if not path.exists():
|
|
continue
|
|
data = json.load(open(path))
|
|
faction_name = data.get("name", slug)
|
|
for unit, entries in data.get("units", {}).items():
|
|
if is_detachment_name(unit):
|
|
continue
|
|
for e in entries:
|
|
if is_detachment_tier(e.get("tier")):
|
|
continue
|
|
size = norm_size(e.get("size"))
|
|
if not size:
|
|
continue
|
|
k = (slug, norm_name(unit), size)
|
|
rec = rows.setdefault(k, {
|
|
"faction": slug,
|
|
"faction_name": faction_name,
|
|
"name": unit,
|
|
"size": e.get("size", size),
|
|
"original": None,
|
|
"new": None,
|
|
"tier": None,
|
|
})
|
|
pts = e.get("pts")
|
|
if pts is not None:
|
|
if rec["new"] is None or pts < rec["new"]:
|
|
rec["new"] = pts
|
|
# Use the cheapest tier as the "primary" tier label
|
|
tier = e.get("tier")
|
|
if tier and not rec["tier"]:
|
|
rec["tier"] = tier
|
|
|
|
# Compute change
|
|
out_units = []
|
|
for k, r in rows.items():
|
|
o, n = r["original"], r["new"]
|
|
if o is not None and n is not None and o > 0:
|
|
r["change_pct"] = round((n - o) / o * 100, 2)
|
|
r["change_pts"] = n - o
|
|
else:
|
|
r["change_pct"] = None
|
|
r["change_pts"] = None
|
|
out_units.append(r)
|
|
|
|
# Stats
|
|
has_both = sum(1 for u in out_units if u["original"] is not None and u["new"] is not None)
|
|
only_pdf = sum(1 for u in out_units if u["original"] is not None and u["new"] is None)
|
|
only_live = sum(1 for u in out_units if u["original"] is None and u["new"] is not None)
|
|
pct_changes = [u["change_pct"] for u in out_units if u["change_pct"] is not None]
|
|
pct_changes_sorted = sorted(pct_changes, key=lambda x: x)
|
|
|
|
payload = {
|
|
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
"factions": sorted({u["faction"] for u in out_units}),
|
|
"faction_names": {u["faction"]: u["faction_name"] for u in out_units},
|
|
"stats": {
|
|
"total_rows": len(out_units),
|
|
"rows_with_both": has_both,
|
|
"rows_pdf_only": only_pdf,
|
|
"rows_live_only": only_live,
|
|
"biggest_drop_pct": pct_changes_sorted[0] if pct_changes_sorted else None,
|
|
"biggest_rise_pct": pct_changes_sorted[-1] if pct_changes_sorted else None,
|
|
},
|
|
"units": out_units,
|
|
}
|
|
|
|
OUT.write_text(json.dumps(payload, ensure_ascii=False))
|
|
print(f"\nWrote {OUT}")
|
|
print(f" total rows: {len(out_units)}")
|
|
print(f" with both: {has_both}")
|
|
print(f" PDF only: {only_pdf}")
|
|
print(f" LIVE only: {only_live}")
|
|
if pct_changes:
|
|
print(f" biggest drop: {pct_changes_sorted[0]:.2f}%")
|
|
print(f" biggest rise: {pct_changes_sorted[-1]:.2f}%")
|
|
print(f" size: {OUT.stat().st_size / 1024:.1f} KB")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|