Initial commit: bs-roster-parser v1.0.0

Python library for parsing BattleScribe/NewRecruit roster JSON. Extracted from Nachmund Tracker's processJSON, ported JS→Python. - parse_roster(json_string) / parse_roster_file(path) → RosterSummary - Extracts: unit name, pts, CP, model count, weapon breakdown per model variant - Handles: variable-model-count units, nested costs, compound upgrades - Unicode apostrophe-safe unit lookup (find_unit) - to_dict() for JSON serialization - 8/8 tests passing on real roster data (27 units, 2815pts)
2026-06-18 03:04:16 +00:00
commit 948d98accb
7 changed files with 22310 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
+__pycache__/
+*.pyc
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
--- a/README.md
+++ b/README.md
@@ -0,0 +1,67 @@
+# bs-roster-parser
+
+Python library for parsing BattleScribe/NewRecruit roster JSON into flat unit lists with costs, model counts, and weapon breakdowns.
+
+## Install
+
+```bash
+pip install git+https://git.homelab.local/kaykayyali/bs-roster-parser.git
+```
+
+Or clone and install locally:
+```bash
+git clone https://git.homelab.local/kaykayyali/bs-roster-parser.git
+cd bs-roster-parser
+pip install -e .
+```
+
+## Usage
+
+```python
+from bs_roster_parser import parse_roster_file
+
+# Parse a roster file
+summary = parse_roster_file("roster.json")
+
+# Access units
+for unit in summary.units:
+    print(f"{unit.name}: {unit.pts}pts, {unit.model_count} models")
+    for model in unit.breakdown:
+        print(f"  {model.name} ×{model.count}")
+        if model.weapons:
+            print(f"    Weapons: {', '.join(model.weapons)}")
+
+# Aggregate stats
+print(f"Total: {summary.total_pts}pts, {summary.total_cp} CP")
+print(f"Units: {summary.unit_count}, Models: {summary.total_models}")
+
+# Find a specific unit
+gaunt = summary.find_unit("Gaunt's Ghosts")
+
+# Export to dict/JSON
+import json
+print(json.dumps(summary.to_dict(), indent=2))
+```
+
+## What it extracts
+
+Each unit includes:
+- **name** — display name (custom name if set, otherwise catalogue name)
+- **pts** — total points cost (including descendant costs for variable-model-count units)
+- **cp** — crusade points
+- **model_count** — total models (summed from child model nodes)
+- **breakdown** — per-model-variant list with names, counts, and detected weapons
+- **type** — `unit` or `model`
+
+The roster summary includes:
+- **roster_name**, **game_system**, **faction** (detected from first force)
+- **points_limit** (from costLimits)
+- **total_pts**, **total_cp**, **unit_count**, **total_models**
+
+## Origin
+
+Extracted from the [Nachmund Tracker](https://git.homelab.local/kaykayyali/Nachmund-Tracker) project's `processJSON` function, ported from JavaScript to Python and generalized as a reusable library.
+
+## License
+
+MIT
--- a/bs_roster_parser/init.py
+++ b/bs_roster_parser/init.py
@@ -0,0 +1,17 @@
+"""
+bs-roster-parser — Parse BattleScribe/NewRecruit roster JSON into flat unit lists.
+
+Usage:
+    from bs_roster_parser import parse_roster, RosterSummary
+
+    with open('roster.json') as f:
+        summary = parse_roster(f.read())
+
+    for unit in summary.units:
+        print(f"{unit.name}: {unit.pts}pts, {unit.model_count} models")
+    print(f"Total: {summary.total_pts}pts, {summary.total_cp} CP")
+"""
+from .parser import parse_roster, parse_roster_file, RosterSummary, Unit
+
+__version__ = "1.0.0"
+__all__ = ["parse_roster", "parse_roster_file", "RosterSummary", "Unit"]
--- a/bs_roster_parser/parser.py
+++ b/bs_roster_parser/parser.py
@@ -0,0 +1,341 @@
+"""
+Core roster parser — recursively walks BSApp/NewRecruit roster JSON trees
+and extracts flat unit entries with costs, model counts, and weapon breakdowns.
+
+Extracted from the Nachmund Tracker's processJSON function (src/app.js ~lines 648-764),
+ported from JavaScript to Python and generalized to be reusable.
+"""
+
+import json
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class WeaponProfile:
+    """A weapon selection within a unit."""
+    name: str
+
+
+@dataclass
+class ModelVariant:
+    """A distinct model type within a unit (e.g. 'Sergeant' vs 'Battle Sister')."""
+    name: str
+    count: int
+    weapons: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Unit:
+    """A single unit entry extracted from a roster."""
+    name: str
+    type: str                        # 'model' or 'unit'
+    pts: float                       # points cost (including descendants)
+    cp: float                        # crusade points (0 if none)
+    model_count: int                 # total models in the unit
+    breakdown: list[ModelVariant] = field(default_factory=list)
+    custom_name: Optional[str] = None  # custom name if set in BSApp
+
+
+@dataclass
+class RosterSummary:
+    """Parsed roster with all units extracted."""
+    roster_name: str
+    game_system: str
+    points_limit: Optional[int]
+    units: list[Unit]
+    total_pts: float
+    total_cp: float
+    faction: Optional[str] = None     # detected from force catalogue name
+
+    @property
+    def unit_count(self) -> int:
+        return len(self.units)
+
+    @property
+    def total_models(self) -> int:
+        return sum(u.model_count for u in self.units)
+
+    def find_unit(self, name: str) -> Optional[Unit]:
+        """Find a unit by name (case-insensitive, unicode-apostrophe-safe). Returns first match."""
+        # Normalize curly apostrophes to straight ones for matching
+        name_norm = name.replace("\u2019", "'").replace("\u2018", "'").lower()
+        for u in self.units:
+            u_name = u.name.replace("\u2019", "'").replace("\u2018", "'").lower()
+            if u_name == name_norm:
+                return u
+        return None
+
+    def to_dict(self) -> dict:
+        """Serialize to a plain dict for JSON export."""
+        return {
+            "roster_name": self.roster_name,
+            "game_system": self.game_system,
+            "points_limit": self.points_limit,
+            "faction": self.faction,
+            "total_pts": self.total_pts,
+            "total_cp": self.total_cp,
+            "unit_count": self.unit_count,
+            "total_models": self.total_models,
+            "units": [
+                {
+                    "name": u.name,
+                    "type": u.type,
+                    "pts": u.pts,
+                    "cp": u.cp,
+                    "model_count": u.model_count,
+                    "custom_name": u.custom_name,
+                    "breakdown": [
+                        {"name": m.name, "count": m.count, "weapons": m.weapons}
+                        for m in u.breakdown
+                    ],
+                }
+                for u in self.units
+            ],
+        }
+
+
+# ── Cost extraction helpers ──
+
+def _extract_cost(node: dict, exact_matches: list[str], prop_names: list[str]) -> float:
+    """Extract a cost value from a node, checking costs[] array first, then direct properties."""
+    val = 0.0
+    costs = node.get("costs")
+    if isinstance(costs, list):
+        for c in costs:
+            cname = (c.get("name") or "").lower().strip()
+            if cname in exact_matches:
+                val += float(c.get("value", 0))
+                break
+    if val == 0:
+        for prop in prop_names:
+            if prop in node and node[prop] is not None:
+                val += float(node[prop])
+                break
+    return val
+
+
+def _sum_descendant_pts(obj) -> float:
+    """Recursively sum pts from all descendant selections.
+    Handles variable-model-count units where each child model carries its own pts."""
+    if isinstance(obj, list):
+        return sum(_sum_descendant_pts(item) for item in obj)
+    if not isinstance(obj, dict) or obj is None:
+        return 0.0
+    pts = 0.0
+    costs = obj.get("costs")
+    if isinstance(costs, list):
+        for c in costs:
+            cname = (c.get("name") or "").lower().strip()
+            if cname in ("pts", "points"):
+                pts += float(c.get("value", 0))
+                break
+    selections = obj.get("selections")
+    if isinstance(selections, list):
+        pts += _sum_descendant_pts(selections)
+    return pts
+
+
+def _sum_descendant_models(obj) -> int:
+    """Recursively count all model nodes in a subtree (summing their number fields)."""
+    if isinstance(obj, list):
+        return sum(_sum_descendant_models(item) for item in obj)
+    if not isinstance(obj, dict) or obj is None:
+        return 0
+    count = 0
+    if obj.get("type") == "model":
+        count += int(obj.get("number", 1))
+    selections = obj.get("selections")
+    if isinstance(selections, list):
+        count += _sum_descendant_models(selections)
+    return count
+
+
+# ── Weapon detection ──
+
+def _get_weapon_names(selections) -> list[str]:
+    """Extract weapon names from a selections list, recursing into compound upgrades."""
+    weapons = []
+    if not isinstance(selections, list):
+        return weapons
+    for sel in selections:
+        if not isinstance(sel, dict):
+            continue
+        profiles = sel.get("profiles")
+        if isinstance(profiles, list):
+            for p in profiles:
+                ptype = (p.get("typeName") or "").lower()
+                if "weapon" in ptype and sel.get("name") and sel["name"] not in weapons:
+                    weapons.append(sel["name"])
+                    break
+        # Recurse into compound upgrades (e.g. "2 Plasma Cannons")
+        sub_weapons = _get_weapon_names(sel.get("selections"))
+        for w in sub_weapons:
+            if w not in weapons:
+                weapons.append(w)
+    return weapons
+
+
+# ── Model breakdown builder ──
+
+def _build_model_breakdown(unit_obj: dict) -> list[ModelVariant]:
+    """Build a list of {name, count, weapons} for every unique model variant in a unit."""
+    model_map: dict[str, dict] = {}
+
+    def collect_models(obj):
+        if isinstance(obj, list):
+            for item in obj:
+                collect_models(item)
+            return
+        if not isinstance(obj, dict) or obj is None:
+            return
+        if obj.get("type") == "model":
+            name = obj.get("name") or "Unknown"
+            n = int(obj.get("number", 1))
+            if name not in model_map:
+                model_map[name] = {"count": 0, "weapons": []}
+            model_map[name]["count"] += n
+            weapons = _get_weapon_names(obj.get("selections"))
+            for w in weapons:
+                if w not in model_map[name]["weapons"]:
+                    model_map[name]["weapons"].append(w)
+            return
+        selections = obj.get("selections")
+        if isinstance(selections, list):
+            for s in selections:
+                collect_models(s)
+
+    selections = unit_obj.get("selections")
+    if isinstance(selections, list):
+        for s in selections:
+            collect_models(s)
+
+    return [
+        ModelVariant(name=name, count=d["count"], weapons=d["weapons"])
+        for name, d in model_map.items()
+    ]
+
+
+# ── Main tree walker ──
+
+def _search_tree(obj, entries: list[Unit]):
+    """Recursively walk the roster tree, extracting unit/model entries."""
+    if isinstance(obj, list):
+        for item in obj:
+            _search_tree(item, entries)
+        return
+    if not isinstance(obj, dict) or obj is None:
+        return
+
+    # Look for unit/model entries that come directly from a catalogue entry
+    if obj.get("from") == "entry" and obj.get("type") in ("model", "unit"):
+        display_name = (
+            obj.get("customName")
+            or obj.get("custom_name")
+            or obj.get("name")
+            or "Unnamed Entry"
+        )
+        count = int(obj.get("number", 1)) if obj.get("number") else 1
+        pts = _extract_cost(obj, ["pts", "points"], ["pts", "points"])
+
+        # Sum pts from child selections (handles variable-model-count units
+        # and enhancement pts costs added to units with a base cost)
+        selections = obj.get("selections")
+        if isinstance(selections, list):
+            pts += _sum_descendant_pts(selections)
+
+        cp = _extract_cost(obj, ["cp", "crusade points"], ["crusadePoints", "cp"])
+
+        # Count models
+        model_count = 0
+        if obj.get("type") == "unit" and isinstance(selections, list):
+            model_count = _sum_descendant_models(selections)
+        elif obj.get("type") == "model":
+            model_count = int(obj.get("number", 1))
+
+        breakdown = _build_model_breakdown(
+            obj if obj.get("type") == "unit" else {"selections": [obj]}
+        )
+
+        for _ in range(count):
+            entries.append(Unit(
+                name=display_name,
+                type=obj["type"],
+                pts=pts,
+                cp=cp,
+                model_count=model_count,
+                breakdown=breakdown,
+                custom_name=obj.get("customName") or obj.get("custom_name"),
+            ))
+        return
+
+    # Recurse into all values
+    for v in obj.values():
+        if isinstance(v, (dict, list)):
+            _search_tree(v, entries)
+
+
+# ── Public API ──
+
+def parse_roster(json_str: str) -> RosterSummary:
+    """Parse a BattleScribe/NewRecruit roster JSON string into a RosterSummary.
+
+    Args:
+        json_str: Raw JSON string from BSApp/NewRecruit roster export.
+
+    Returns:
+        RosterSummary with all units, costs, and model counts extracted.
+
+    Raises:
+        json.JSONDecodeError: If the input is not valid JSON.
+        KeyError: If the roster structure is missing required fields.
+    """
+    data = json.loads(json_str)
+    roster = data.get("roster", data)
+
+    roster_name = roster.get("name", "Unknown Roster")
+    game_system = roster.get("gameSystemName", "Unknown")
+
+    # Extract points limit
+    points_limit = None
+    cost_limits = roster.get("costLimits", [])
+    for cl in cost_limits:
+        if (cl.get("name") or "").lower() == "pts":
+            points_limit = int(cl.get("value", 0))
+            break
+
+    # Detect faction from first force's catalogue
+    faction = None
+    forces = roster.get("forces", [])
+    if forces:
+        faction = forces[0].get("name") or forces[0].get("catalogueName")
+
+    # Walk the tree
+    entries: list[Unit] = []
+    _search_tree(roster, entries)
+
+    total_pts = sum(u.pts for u in entries)
+    total_cp = sum(u.cp for u in entries)
+
+    return RosterSummary(
+        roster_name=roster_name,
+        game_system=game_system,
+        points_limit=points_limit,
+        units=entries,
+        total_pts=total_pts,
+        total_cp=total_cp,
+        faction=faction,
+    )
+
+
+def parse_roster_file(path: str) -> RosterSummary:
+    """Parse a roster JSON file from disk.
+
+    Args:
+        path: Path to the roster JSON file.
+
+    Returns:
+        RosterSummary with all units extracted.
+    """
+    with open(path, "r", encoding="utf-8") as f:
+        return parse_roster(f.read())
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,15 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.backends._legacy:_Backend"
+
+[project]
+name = "bs-roster-parser"
+version = "1.0.0"
+description = "Parse BattleScribe/NewRecruit roster JSON into flat unit lists with costs, model counts, and weapon breakdowns"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "MIT" }
+authors = [{ name = "Kay Kayyali" }]
+
+[tool.setuptools]
+packages = ["bs_roster_parser"]
--- a/tests/example_roster.json
+++ b/tests/example_roster.json
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""Tests for bs-roster-parser using the Nachmund Tracker example roster."""
+
+import json
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+
+from bs_roster_parser import parse_roster, parse_roster_file, RosterSummary, Unit
+
+TEST_FILE = os.path.join(os.path.dirname(__file__), "example_roster.json")
+
+
+def test_parse_file():
+    """Parse the example roster file and verify basic structure."""
+    summary = parse_roster_file(TEST_FILE)
+    assert isinstance(summary, RosterSummary)
+    assert summary.roster_name == "Cadian 67th Legion (1)"
+    assert summary.game_system == "Warhammer 40,000 10th Edition"
+    assert summary.points_limit == 2000
+    print(f"✓ Parsed: {summary.roster_name}")
+    print(f"  Game: {summary.game_system}")
+    print(f"  Points limit: {summary.points_limit}")
+    print(f"  Faction: {summary.faction}")
+    print(f"  Units: {summary.unit_count}")
+    print(f"  Total pts: {summary.total_pts}")
+    print(f"  Total CP: {summary.total_cp}")
+    print(f"  Total models: {summary.total_models}")
+
+
+def test_units_extracted():
+    """Verify units are extracted with correct names and costs."""
+    summary = parse_roster_file(TEST_FILE)
+    assert summary.units, "No units extracted"
+    # Should have at least some units with pts
+    units_with_pts = [u for u in summary.units if u.pts > 0]
+    assert units_with_pts, "No units with pts > 0 found"
+
+    # Gaunt's Ghosts should be 100 pts (from the example JSON)
+    # Note: the roster uses a unicode curly apostrophe (') not a straight one (')
+    ghosts = summary.find_unit("Gaunt's Ghosts")
+    assert ghosts is not None, "Gaunt's Ghosts not found"
+    assert ghosts.pts == 100, f"Gaunt's Ghosts pts = {ghosts.pts}, expected 100"
+    assert ghosts.type == "unit"
+    print(f"✓ Gaunt's Ghosts: {ghosts.pts}pts, {ghosts.model_count} models")
+
+
+def test_model_count():
+    """Verify model counting works for unit nodes."""
+    summary = parse_roster_file(TEST_FILE)
+    for u in summary.units:
+        if u.model_count > 0:
+            print(f"✓ {u.name}: {u.model_count} models, {u.pts}pts")
+
+
+def test_crusade_points():
+    """Verify crusade points extraction."""
+    summary = parse_roster_file(TEST_FILE)
+    if summary.total_cp > 0:
+        cp_units = [u for u in summary.units if u.cp > 0]
+        print(f"✓ Crusade points: {summary.total_cp} CP across {len(cp_units)} units")
+        for u in cp_units:
+            print(f"    {u.name}: {u.cp} CP")
+
+
+def test_model_breakdown():
+    """Verify model variant breakdown (weapons, distinct model types)."""
+    summary = parse_roster_file(TEST_FILE)
+    for u in summary.units:
+        if u.breakdown:
+            print(f"✓ {u.name} breakdown:")
+            for m in u.breakdown:
+                weapon_str = f", weapons: {m.weapons}" if m.weapons else ""
+                print(f"    {m.name} ×{m.count}{weapon_str}")
+
+
+def test_to_dict():
+    """Verify serialization to dict."""
+    summary = parse_roster_file(TEST_FILE)
+    d = summary.to_dict()
+    assert "units" in d
+    assert "total_pts" in d
+    assert isinstance(d["units"], list)
+    # Round-trip through JSON
+    json_str = json.dumps(d)
+    restored = json.loads(json_str)
+    assert restored["total_pts"] == summary.total_pts
+    print(f"✓ Serialization: {len(restored['units'])} units, {restored['total_pts']}pts")
+
+
+def test_find_unit_case_insensitive():
+    """Verify case-insensitive unit lookup."""
+    summary = parse_roster_file(TEST_FILE)
+    # Try different cases
+    for name in ["gaunt's ghosts", "GAUNT'S GHOSTS", "Gaunt's Ghosts"]:
+        u = summary.find_unit(name)
+        if u:
+            print(f"✓ Found '{name}' → {u.pts}pts")
+            return
+    # If Gaunt's Ghosts isn't in this roster, just verify find_unit returns None for nonsense
+    assert summary.find_unit("Nonexistent Unit") is None
+    print("✓ find_unit returns None for nonexistent units")
+
+
+def test_string_input():
+    """Verify parse_roster accepts a JSON string."""
+    with open(TEST_FILE) as f:
+        json_str = f.read()
+    summary = parse_roster(json_str)
+    assert summary.units
+    print(f"✓ String input: {summary.unit_count} units parsed")
+
+
+if __name__ == "__main__":
+    tests = [
+        test_parse_file,
+        test_units_extracted,
+        test_model_count,
+        test_crusade_points,
+        test_model_breakdown,
+        test_to_dict,
+        test_find_unit_case_insensitive,
+        test_string_input,
+    ]
+    passed = 0
+    failed = 0
+    for test in tests:
+        try:
+            test()
+            passed += 1
+        except Exception as e:
+            print(f"✗ {test.__name__}: {e}")
+            failed += 1
+    print(f"\n{'='*40}")
+    print(f"Results: {passed} passed, {failed} failed")
+    sys.exit(1 if failed else 0)