bs-roster-parser/bs_roster_parser/parser.py

"""
Core roster parser — recursively walks BSApp/NewRecruit roster JSON trees
and extracts flat unit entries with costs, model counts, and weapon breakdowns.

Extracted from the Nachmund Tracker's processJSON function (src/app.js ~lines 648-764),
ported from JavaScript to Python and generalized to be reusable.
"""

import json
from dataclasses import dataclass, field
from typing import Optional


@dataclass
class WeaponProfile:
    """A weapon selection within a unit."""
    name: str


@dataclass
class ModelVariant:
    """A distinct model type within a unit (e.g. 'Sergeant' vs 'Battle Sister')."""
    name: str
    count: int
    weapons: list[str] = field(default_factory=list)


@dataclass
class Unit:
    """A single unit entry extracted from a roster."""
    name: str
    type: str                        # 'model' or 'unit'
    pts: float                       # points cost (including descendants)
    cp: float                        # crusade points (0 if none)
    model_count: int                 # total models in the unit
    breakdown: list[ModelVariant] = field(default_factory=list)
    custom_name: Optional[str] = None  # custom name if set in BSApp


@dataclass
class RosterSummary:
    """Parsed roster with all units extracted."""
    roster_name: str
    game_system: str
    points_limit: Optional[int]
    units: list[Unit]
    total_pts: float
    total_cp: float
    faction: Optional[str] = None     # detected from force catalogue name

    @property
    def unit_count(self) -> int:
        return len(self.units)

    @property
    def total_models(self) -> int:
        return sum(u.model_count for u in self.units)

    def find_unit(self, name: str) -> Optional[Unit]:
        """Find a unit by name (case-insensitive, unicode-apostrophe-safe). Returns first match."""
        # Normalize curly apostrophes to straight ones for matching
        name_norm = name.replace("\u2019", "'").replace("\u2018", "'").lower()
        for u in self.units:
            u_name = u.name.replace("\u2019", "'").replace("\u2018", "'").lower()
            if u_name == name_norm:
                return u
        return None

    def to_dict(self) -> dict:
        """Serialize to a plain dict for JSON export."""
        return {
            "roster_name": self.roster_name,
            "game_system": self.game_system,
            "points_limit": self.points_limit,
            "faction": self.faction,
            "total_pts": self.total_pts,
            "total_cp": self.total_cp,
            "unit_count": self.unit_count,
            "total_models": self.total_models,
            "units": [
                {
                    "name": u.name,
                    "type": u.type,
                    "pts": u.pts,
                    "cp": u.cp,
                    "model_count": u.model_count,
                    "custom_name": u.custom_name,
                    "breakdown": [
                        {"name": m.name, "count": m.count, "weapons": m.weapons}
                        for m in u.breakdown
                    ],
                }
                for u in self.units
            ],
        }


# ── Cost extraction helpers ──

def _extract_cost(node: dict, exact_matches: list[str], prop_names: list[str]) -> float:
    """Extract a cost value from a node, checking costs[] array first, then direct properties."""
    val = 0.0
    costs = node.get("costs")
    if isinstance(costs, list):
        for c in costs:
            cname = (c.get("name") or "").lower().strip()
            if cname in exact_matches:
                val += float(c.get("value", 0))
                break
    if val == 0:
        for prop in prop_names:
            if prop in node and node[prop] is not None:
                val += float(node[prop])
                break
    return val


def _sum_descendant_pts(obj) -> float:
    """Recursively sum pts from all descendant selections.
    Handles variable-model-count units where each child model carries its own pts."""
    if isinstance(obj, list):
        return sum(_sum_descendant_pts(item) for item in obj)
    if not isinstance(obj, dict) or obj is None:
        return 0.0
    pts = 0.0
    costs = obj.get("costs")
    if isinstance(costs, list):
        for c in costs:
            cname = (c.get("name") or "").lower().strip()
            if cname in ("pts", "points"):
                pts += float(c.get("value", 0))
                break
    selections = obj.get("selections")
    if isinstance(selections, list):
        pts += _sum_descendant_pts(selections)
    return pts


def _sum_descendant_models(obj) -> int:
    """Recursively count all model nodes in a subtree (summing their number fields)."""
    if isinstance(obj, list):
        return sum(_sum_descendant_models(item) for item in obj)
    if not isinstance(obj, dict) or obj is None:
        return 0
    count = 0
    if obj.get("type") == "model":
        count += int(obj.get("number", 1))
    selections = obj.get("selections")
    if isinstance(selections, list):
        count += _sum_descendant_models(selections)
    return count


# ── Weapon detection ──

def _get_weapon_names(selections) -> list[str]:
    """Extract weapon names from a selections list, recursing into compound upgrades."""
    weapons = []
    if not isinstance(selections, list):
        return weapons
    for sel in selections:
        if not isinstance(sel, dict):
            continue
        profiles = sel.get("profiles")
        if isinstance(profiles, list):
            for p in profiles:
                ptype = (p.get("typeName") or "").lower()
                if "weapon" in ptype and sel.get("name") and sel["name"] not in weapons:
                    weapons.append(sel["name"])
                    break
        # Recurse into compound upgrades (e.g. "2 Plasma Cannons")
        sub_weapons = _get_weapon_names(sel.get("selections"))
        for w in sub_weapons:
            if w not in weapons:
                weapons.append(w)
    return weapons


# ── Model breakdown builder ──

def _build_model_breakdown(unit_obj: dict) -> list[ModelVariant]:
    """Build a list of {name, count, weapons} for every unique model variant in a unit."""
    model_map: dict[str, dict] = {}

    def collect_models(obj):
        if isinstance(obj, list):
            for item in obj:
                collect_models(item)
            return
        if not isinstance(obj, dict) or obj is None:
            return
        if obj.get("type") == "model":
            name = obj.get("name") or "Unknown"
            n = int(obj.get("number", 1))
            if name not in model_map:
                model_map[name] = {"count": 0, "weapons": []}
            model_map[name]["count"] += n
            weapons = _get_weapon_names(obj.get("selections"))
            for w in weapons:
                if w not in model_map[name]["weapons"]:
                    model_map[name]["weapons"].append(w)
            return
        selections = obj.get("selections")
        if isinstance(selections, list):
            for s in selections:
                collect_models(s)

    selections = unit_obj.get("selections")
    if isinstance(selections, list):
        for s in selections:
            collect_models(s)

    return [
        ModelVariant(name=name, count=d["count"], weapons=d["weapons"])
        for name, d in model_map.items()
    ]


# ── Main tree walker ──

def _search_tree(obj, entries: list[Unit]):
    """Recursively walk the roster tree, extracting unit/model entries."""
    if isinstance(obj, list):
        for item in obj:
            _search_tree(item, entries)
        return
    if not isinstance(obj, dict) or obj is None:
        return

    # Look for unit/model entries that come directly from a catalogue entry
    if obj.get("from") == "entry" and obj.get("type") in ("model", "unit"):
        display_name = (
            obj.get("customName")
            or obj.get("custom_name")
            or obj.get("name")
            or "Unnamed Entry"
        )
        count = int(obj.get("number", 1)) if obj.get("number") else 1
        pts = _extract_cost(obj, ["pts", "points"], ["pts", "points"])

        # Sum pts from child selections (handles variable-model-count units
        # and enhancement pts costs added to units with a base cost)
        selections = obj.get("selections")
        if isinstance(selections, list):
            pts += _sum_descendant_pts(selections)

        cp = _extract_cost(obj, ["cp", "crusade points"], ["crusadePoints", "cp"])

        # Count models
        model_count = 0
        if obj.get("type") == "unit" and isinstance(selections, list):
            model_count = _sum_descendant_models(selections)
        elif obj.get("type") == "model":
            model_count = int(obj.get("number", 1))

        breakdown = _build_model_breakdown(
            obj if obj.get("type") == "unit" else {"selections": [obj]}
        )

        for _ in range(count):
            entries.append(Unit(
                name=display_name,
                type=obj["type"],
                pts=pts,
                cp=cp,
                model_count=model_count,
                breakdown=breakdown,
                custom_name=obj.get("customName") or obj.get("custom_name"),
            ))
        return

    # Recurse into all values
    for v in obj.values():
        if isinstance(v, (dict, list)):
            _search_tree(v, entries)


# ── Public API ──

def parse_roster(json_str: str) -> RosterSummary:
    """Parse a BattleScribe/NewRecruit roster JSON string into a RosterSummary.

    Args:
        json_str: Raw JSON string from BSApp/NewRecruit roster export.

    Returns:
        RosterSummary with all units, costs, and model counts extracted.

    Raises:
        json.JSONDecodeError: If the input is not valid JSON.
        KeyError: If the roster structure is missing required fields.
    """
    data = json.loads(json_str)
    roster = data.get("roster", data)

    roster_name = roster.get("name", "Unknown Roster")
    game_system = roster.get("gameSystemName", "Unknown")

    # Extract points limit
    points_limit = None
    cost_limits = roster.get("costLimits", [])
    for cl in cost_limits:
        if (cl.get("name") or "").lower() == "pts":
            points_limit = int(cl.get("value", 0))
            break

    # Detect faction from first force's catalogue
    faction = None
    forces = roster.get("forces", [])
    if forces:
        faction = forces[0].get("name") or forces[0].get("catalogueName")

    # Walk the tree
    entries: list[Unit] = []
    _search_tree(roster, entries)

    total_pts = sum(u.pts for u in entries)
    total_cp = sum(u.cp for u in entries)

    return RosterSummary(
        roster_name=roster_name,
        game_system=game_system,
        points_limit=points_limit,
        units=entries,
        total_pts=total_pts,
        total_cp=total_cp,
        faction=faction,
    )


def parse_roster_file(path: str) -> RosterSummary:
    """Parse a roster JSON file from disk.

    Args:
        path: Path to the roster JSON file.

    Returns:
        RosterSummary with all units extracted.
    """
    with open(path, "r", encoding="utf-8") as f:
        return parse_roster(f.read())