#!/usr/bin/env python3
"""
Generate the "US troops deployed in Iran" count market seed files.

Methodology
-----------
This market uses a zero-including latent-state model with partial pooling.

1. The monthly state can be one of:
   - none
   - raid
   - limited foothold
   - major campaign
2. Each positive state emits a heavy-tailed troop-count distribution on log1p(troops).
3. State emissions are partially pooled from tagged reference classes:
   - Coercive crises with no U.S. ground entry into the target country
   - Limited-entry operations into hostile or politically constrained territory
   - Large-entry or regime-change campaigns
4. A persistent monthly transition model carries risk forward through 2026 while
   allowing the no-entry state to remain dominant unless escalation occurs.

The goal is not to predict one exact troop count. It is to produce a threshold
ladder that preserves a large all-no region while still assigning a meaningful
upper tail to raid, foothold, or major-campaign outcomes.
"""

from __future__ import annotations

import calendar
import csv
import json
import math
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from html import escape
from pathlib import Path
from statistics import NormalDist


REPO_ROOT = Path(__file__).resolve().parents[2]
DATA_DIR = REPO_ROOT / "data" / "us-troops-iran"
CSV_PATH = DATA_DIR / "market.csv"
BACKGROUND_PATH = DATA_DIR / "background_info.html"
SUMMARY_PATH = DATA_DIR / "model_summary.json"

MARKET_CODE = "us-troops-iran"
MARKET_TITLE = "US troops deployed in Iran"
MARKET_BUDGET = 500
MARKET_DECAY_RATE = 0.005
PROBABILITY_DECIMALS = 3

MONTHS = [(2026, month) for month in range(4, 13)]
THRESHOLDS = [
    25,
    50,
    75,
    100,
    125,
    150,
    200,
    250,
    300,
    400,
    500,
    650,
    800,
    1000,
    1250,
    1500,
    2000,
    2500,
    3000,
    4000,
    5000,
    6500,
    8000,
    10000,
    12500,
    15000,
    20000,
    30000,
    50000,
]
SELECTED_THRESHOLDS = [25, 100, 1000, 5000, 20000]

STATE_ORDER = ["none", "raid", "limited_foothold", "major_campaign"]
STATE_LABELS = {
    "none": "No entry",
    "raid": "Raid",
    "limited_foothold": "Limited foothold",
    "major_campaign": "Major campaign",
}
TAG_LABELS = {
    "coercive_no_ground_entry": "Coercive no-ground-entry",
    "limited_entry": "Limited entry",
    "large_entry": "Large entry",
}

RESOLUTION_CRITERIA = (
    "<p>This market resolves on the maximum simultaneous number of United States "
    "uniformed military personnel credibly present on Iranian sovereign land or "
    "at fixed installations in Iran at any point during each calendar month in 2026. "
    "Each row's <code>projection_group</code> identifies the month, and each row "
    "resolves Yes if the best-supported monthly peak is greater than or equal to "
    "<code>threshold_decimal</code>, No if the best-supported monthly peak is below "
    "that threshold, and will remain unresolved if reliable evidence leaves that "
    "threshold inside an unresolved interval.</p>"
    "<p>The exact value may be impossible to determine unambiguously. Resolution will "
    "therefore be threshold-by-threshold: the intention is to resolve as many rows as "
    "can reasonably be supported, even if some middle band of thresholds remains "
    "unresolved. Some thresholds are expected to remain unresolved.</p>"
    "<p>Evidence priority is: (1) official United States or Iranian statements, "
    "documents, or after-action reporting; (2) AP, Reuters, or AFP reporting; "
    "(3) high-confidence open-source reporting with named sourcing or strong "
    "evidentiary support. Aircraft overflight, offshore ships not in Iranian "
    "internal waters or port, and forces based only outside Iran do not count. "
    "Civilian contractors do not count unless clearly identified as uniformed "
    "military personnel. If multiple credible sources imply only a range, that "
    "range will be used rather than forcing a false precision.</p>"
)


@dataclass(frozen=True)
class ReferenceCase:
    date: str
    name: str
    peak_troops: int
    scenario_type: str
    state: str
    tags: tuple[str, ...]
    source_label: str
    source_url: str
    notes: str


@dataclass(frozen=True)
class EmissionParams:
    log_mean: float | None
    log_sigma: float | None
    median_troops: float
    p90_troops: float


REFERENCE_CASES = [
    ReferenceCase(
        date="1987-10",
        name="Tanker War reflagging and escort posture",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="Office of the Historian: The Tanker War",
        source_url="https://history.state.gov/milestones/1981-1988/tanker-war",
        notes="Direct naval confrontation pressure around Iran without putting U.S. troops on the ground.",
    ),
    ReferenceCase(
        date="1988-04",
        name="Operation Praying Mantis",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="Britannica: Operation Praying Mantis",
        source_url="https://www.britannica.com/event/Operation-Praying-Mantis",
        notes="Large U.S. strike operation against Iranian targets without putting U.S. troops on the ground.",
    ),
    ReferenceCase(
        date="1986-04",
        name="Operation El Dorado Canyon",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="Britannica: Operation El Dorado Canyon",
        source_url="https://www.britannica.com/topic/Operation-El-Dorado-Canyon",
        notes="Punitive U.S. air strikes on Libya without putting U.S. troops on the ground in the target country.",
    ),
    ReferenceCase(
        date="1998-12",
        name="Operation Desert Fox",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="Britannica: Operation Desert Fox",
        source_url="https://www.britannica.com/event/Operation-Desert-Fox",
        notes="A sustained U.S.-U.K. strike campaign against Iraq without putting U.S. troops on the ground there.",
    ),
    ReferenceCase(
        date="1999-03",
        name="NATO air war over Kosovo/Serbia",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="Britannica: Kosovo conflict",
        source_url="https://www.britannica.com/event/Kosovo-conflict",
        notes="A major coercive air campaign without a U.S. ground entry into the target country. (Note: NATO KFOR ground forces entered Kosovo post-armistice; only the coercive air phase informs this class.)",
    ),
    ReferenceCase(
        date="2011-03",
        name="Libya 2011 NATO air campaign",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="Britannica: 2011 Libyan civil war",
        source_url="https://www.britannica.com/event/Libya-Civil-War-2011",
        notes="NATO air campaign achieved regime change in Libya without U.S. ground troops entering the country; demonstrates that coercive airpower can topple a government without a ground foothold.",
    ),
    ReferenceCase(
        date="2020-01",
        name="Soleimani killing and U.S.–Iran stand-off",
        peak_troops=0,
        scenario_type="No ground entry",
        state="none",
        tags=("coercive_no_ground_entry",),
        source_label="DoD: statement on death of Qasem Soleimani",
        source_url="https://www.defense.gov/News/Releases/Release/Article/2049534/statement-by-the-department-of-defense/",
        notes="Direct U.S. strike killing Iran's top military commander; Iran responded with ballistic missile strikes on U.S. bases in Iraq; no U.S. troops entered Iran despite the most direct military confrontation since 1988. The most recent Iran-specific coercive episode.",
    ),
    ReferenceCase(
        date="1980-04",
        name="Operation Eagle Claw",
        peak_troops=132,
        scenario_type="Raid-scale attempted entry",
        state="raid",
        tags=("limited_entry",),
        source_label="Britannica: Operation Eagle Claw",
        source_url="https://www.britannica.com/event/Operation-Eagle-Claw",
        notes="Failed rescue mission into Iran; useful low-end Iran-specific raid analogue.",
    ),
    ReferenceCase(
        date="2011-05",
        name="Abbottabad raid",
        peak_troops=79,
        scenario_type="Raid-scale entry",
        state="raid",
        tags=("limited_entry",),
        source_label="White House archive: Bin Laden remarks",
        source_url="https://obamawhitehouse.archives.gov/the-press-office/2011/05/02/remarks-president-osama-bin-laden",
        notes="Politically constrained sovereign-territory raid with dozens of U.S. personnel.",
    ),
    ReferenceCase(
        date="2019-10",
        name="Barisha raid against al-Baghdadi",
        peak_troops=70,
        scenario_type="Raid-scale entry",
        state="raid",
        tags=("limited_entry",),
        source_label="White House archive: al-Baghdadi statement",
        source_url="https://trumpwhitehouse.archives.gov/briefings-statements/statement-president-death-abu-bakr-al-baghdadi/",
        notes="Another tightly bounded hostile-territory raid with a small force package.",
    ),
    ReferenceCase(
        date="2001-11",
        name="Opening U.S. foothold in Afghanistan",
        peak_troops=1300,
        scenario_type="Limited foothold",
        state="limited_foothold",
        tags=("limited_entry",),
        source_label="Britannica: Afghanistan War",
        source_url="https://www.britannica.com/event/Afghanistan-War",
        notes="Small but persistent opening-month ground footprint before later expansion.",
    ),
    ReferenceCase(
        date="1983-10",
        name="Grenada opening week",
        peak_troops=7300,
        scenario_type="Small invasion / overlap case",
        state="major_campaign",
        tags=("limited_entry", "large_entry"),
        source_label="Britannica: Grenada invasion",
        source_url="https://www.britannica.com/event/Grenada-invasion",
        notes="Overlap case: larger than a foothold, smaller than Iraq-scale campaigns.",
    ),
    ReferenceCase(
        date="1989-12",
        name="Panama opening month",
        peak_troops=26000,
        scenario_type="Major campaign",
        state="major_campaign",
        tags=("large_entry",),
        source_label="Britannica: Panama invasion",
        source_url="https://www.britannica.com/event/Panama-invasion",
        notes="Short, decisive regime-change campaign with tens of thousands of troops.",
    ),
    ReferenceCase(
        date="1991-02",
        name="Desert Storm ground offensive into Iraq/Kuwait",
        peak_troops=270000,
        scenario_type="Major campaign",
        state="major_campaign",
        tags=("large_entry",),
        source_label="Britannica: Persian Gulf War",
        source_url="https://www.britannica.com/event/Persian-Gulf-War",
        notes="Upper-tail analogue for a very large U.S. ground campaign in the region.",
    ),
    ReferenceCase(
        date="2003-03",
        name="Iraq invasion opening month",
        peak_troops=148000,
        scenario_type="Major campaign",
        state="major_campaign",
        tags=("large_entry",),
        source_label="Britannica: Iraq War",
        source_url="https://www.britannica.com/event/Iraq-War",
        notes="Large ground invasion used to shape the upper campaign tail rather than the median month.",
    ),
    ReferenceCase(
        date="1952-04",
        name="Korean War peak deployment",
        peak_troops=326000,
        scenario_type="Major campaign",
        state="major_campaign",
        tags=("large_entry",),
        source_label="Britannica: Korean War",
        source_url="https://www.britannica.com/event/Korean-War",
        notes="Peak U.S. deployment in a sustained ground war against a populous Asian adversary; upper-tail anchor calibrating the major-campaign distribution for a large-country opponent with a capable military.",
    ),
    ReferenceCase(
        date="1969-04",
        name="Vietnam War peak deployment",
        peak_troops=543000,
        scenario_type="Major campaign",
        state="major_campaign",
        tags=("large_entry",),
        source_label="Britannica: Vietnam War",
        source_url="https://www.britannica.com/event/Vietnam-War",
        notes="Largest sustained U.S. ground commitment in a hostile country; establishes the plausible ceiling for a large-scale campaign against a populous, militarily capable adversary. Iran (~85 million people, significant conventional and irregular forces) is closer in scale to Vietnam-era North/South Vietnam than to Iraq 2003.",
    ),
]


def log1p_stats(values: list[int]) -> tuple[float, float]:
    if not values:
        raise ValueError("Expected non-empty list of values")
    logs = [math.log1p(value) for value in values]
    mean = sum(logs) / len(logs)
    if len(logs) == 1:
        return mean, 0.35
    variance = sum((value - mean) ** 2 for value in logs) / (len(logs) - 1)
    return mean, math.sqrt(variance)


def month_projection_group(year: int, month: int) -> str:
    return f"{year:04d}-{month:02d}"


def month_label(year: int, month: int) -> str:
    return f"{calendar.month_abbr[month]} {year}"


def month_end_iso(year: int, month: int) -> str:
    last_day = calendar.monthrange(year, month)[1]
    return f"{year:04d}-{month:02d}-{last_day:02d}T23:59:59Z"


def compute_overlap_counts(cases: list[ReferenceCase]) -> tuple[dict[str, int], dict[str, float]]:
    raw = {tag: 0 for tag in TAG_LABELS}
    effective = {tag: 0.0 for tag in TAG_LABELS}
    for case in cases:
        for tag in case.tags:
            raw[tag] += 1
        weight_share = 1.0 / len(case.tags)
        for tag in case.tags:
            effective[tag] += weight_share
    return raw, effective


def build_emission_params(cases: list[ReferenceCase]) -> dict[str, EmissionParams]:
    raid_values = [case.peak_troops for case in cases if case.state == "raid"]
    limited_values = [case.peak_troops for case in cases if "limited_entry" in case.tags and case.peak_troops > 0]
    major_values = [case.peak_troops for case in cases if "large_entry" in case.tags and case.peak_troops > 0]

    raid_mean, raid_sigma = log1p_stats(raid_values)
    limited_mean, limited_sigma = log1p_stats(limited_values)
    major_mean, major_sigma = log1p_stats(major_values)

    raid_log_mean = 0.80 * raid_mean + 0.20 * limited_mean
    raid_log_sigma = max(0.40, 0.80 * raid_sigma + 0.20 * limited_sigma)

    foothold_log_mean = 0.60 * limited_mean + 0.25 * major_mean + 0.15 * raid_mean
    foothold_log_sigma = max(0.70, 0.70 * limited_sigma + 0.30 * major_sigma)

    major_log_mean = 0.90 * major_mean + 0.10 * foothold_log_mean
    major_log_sigma = max(0.85, 0.85 * major_sigma + 0.15 * limited_sigma)

    def summarize(log_mean: float | None, log_sigma: float | None) -> EmissionParams:
        if log_mean is None or log_sigma is None:
            return EmissionParams(log_mean=None, log_sigma=None, median_troops=0.0, p90_troops=0.0)
        normal = NormalDist(mu=log_mean, sigma=log_sigma)
        median = math.exp(normal.inv_cdf(0.50)) - 1.0
        p90 = math.exp(normal.inv_cdf(0.90)) - 1.0
        return EmissionParams(
            log_mean=log_mean,
            log_sigma=log_sigma,
            median_troops=median,
            p90_troops=p90,
        )

    return {
        "none": EmissionParams(log_mean=None, log_sigma=None, median_troops=0.0, p90_troops=0.0),
        "raid": summarize(raid_log_mean, raid_log_sigma),
        "limited_foothold": summarize(foothold_log_mean, foothold_log_sigma),
        "major_campaign": summarize(major_log_mean, major_log_sigma),
    }


def tail_probability(threshold: int, params: EmissionParams) -> float:
    if params.log_mean is None or params.log_sigma is None:
        return 0.0
    normal = NormalDist(mu=params.log_mean, sigma=params.log_sigma)
    return 1.0 - normal.cdf(math.log1p(threshold))


def build_april_prior() -> dict[str, float]:
    coercive_no_ground_entry_prior = {
        "none": 0.97,
        "raid": 0.020,
        "limited_foothold": 0.008,
        "major_campaign": 0.002,
    }
    limited_entry_prior = {
        "none": 0.72,
        "raid": 0.17,
        "limited_foothold": 0.08,
        "major_campaign": 0.03,
    }
    large_entry_prior = {
        "none": 0.55,
        "raid": 0.07,
        "limited_foothold": 0.18,
        "major_campaign": 0.20,
    }
    weights = {"coercive_no_ground_entry": 0.65, "limited_entry": 0.25, "large_entry": 0.10}

    prior = {
        state: (
            weights["coercive_no_ground_entry"] * coercive_no_ground_entry_prior[state]
            + weights["limited_entry"] * limited_entry_prior[state]
            + weights["large_entry"] * large_entry_prior[state]
        )
        for state in STATE_ORDER
    }
    total = sum(prior.values())
    return {state: prior[state] / total for state in STATE_ORDER}


def transition_matrix(escalation_multiplier: float) -> dict[str, dict[str, float]]:
    none_to_raid = 0.020 * escalation_multiplier
    none_to_foothold = 0.008 * escalation_multiplier
    none_to_major = 0.003 * escalation_multiplier

    raid_to_foothold = 0.14 * escalation_multiplier
    raid_to_major = 0.025 * escalation_multiplier
    raid_to_none = 0.55

    foothold_to_major = 0.18 * escalation_multiplier
    foothold_to_none = 0.20
    foothold_to_raid = 0.09

    return {
        "none": {
            "none": 1.0 - none_to_raid - none_to_foothold - none_to_major,
            "raid": none_to_raid,
            "limited_foothold": none_to_foothold,
            "major_campaign": none_to_major,
        },
        "raid": {
            "none": raid_to_none,
            "raid": 1.0 - raid_to_none - raid_to_foothold - raid_to_major,
            "limited_foothold": raid_to_foothold,
            "major_campaign": raid_to_major,
        },
        "limited_foothold": {
            "none": foothold_to_none,
            "raid": foothold_to_raid,
            "limited_foothold": 1.0 - foothold_to_none - foothold_to_raid - foothold_to_major,
            "major_campaign": foothold_to_major,
        },
        "major_campaign": {
            "none": 0.03,
            "raid": 0.02,
            "limited_foothold": 0.08,
            "major_campaign": 0.87,
        },
    }


def propagate_state_probabilities(april_prior: dict[str, float]) -> tuple[dict[str, dict[str, float]], list[float]]:
    escalation_path = [1.00, 0.97, 0.93, 0.88, 0.84, 0.80, 0.76, 0.72]
    monthly = {month_projection_group(2026, 4): april_prior}
    current = april_prior

    for (year, month), escalation_multiplier in zip(MONTHS[1:], escalation_path):
        matrix = transition_matrix(escalation_multiplier)
        next_probs = {state: 0.0 for state in STATE_ORDER}
        for prev_state in STATE_ORDER:
            for next_state in STATE_ORDER:
                next_probs[next_state] += current[prev_state] * matrix[prev_state][next_state]
        total = sum(next_probs.values())
        current = {state: next_probs[state] / total for state in STATE_ORDER}
        monthly[month_projection_group(year, month)] = current

    return monthly, escalation_path


def build_probability_rows(
    monthly_states: dict[str, dict[str, float]],
    emissions: dict[str, EmissionParams],
) -> list[dict[str, str]]:
    rows: list[dict[str, str]] = []
    for year, month in MONTHS:
        projection_group = month_projection_group(year, month)
        label_month = month_label(year, month)
        end_date = month_end_iso(year, month)
        state_probs = monthly_states[projection_group]
        previous_probability = 1.0

        for threshold in THRESHOLDS:
            probability = sum(
                state_probs[state] * tail_probability(threshold, emissions[state])
                for state in STATE_ORDER
            )
            probability = min(previous_probability, probability)
            probability = min(0.999, max(0.001, probability))
            previous_probability = probability

            rows.append(
                {
                    "projection_group": projection_group,
                    "threshold_decimal": str(threshold),
                    "threshold_date": "",
                    "initial_probability": f"{probability:.{PROBABILITY_DECIMALS}f}",
                    "label": label_month,
                    "end_date": end_date,
                    "decay_rate": f"{MARKET_DECAY_RATE:.3f}",
                    "status": "open",
                }
            )
    return rows


def validate_rows(rows: list[dict[str, str]]) -> None:
    if len(rows) != len(MONTHS) * len(THRESHOLDS):
        raise ValueError(f"Expected {len(MONTHS) * len(THRESHOLDS)} rows, found {len(rows)}")

    by_group: dict[str, list[float]] = {}
    for row in rows:
        by_group.setdefault(row["projection_group"], []).append(float(row["initial_probability"]))

    for projection_group, probs in by_group.items():
        if len(probs) != len(THRESHOLDS):
            raise ValueError(f"{projection_group} has {len(probs)} thresholds, expected {len(THRESHOLDS)}")
        for left, right in zip(probs, probs[1:]):
            if right > left + 1e-9:
                raise ValueError(f"Probabilities not non-increasing in {projection_group}")


def build_selected_threshold_table(rows: list[dict[str, str]]) -> dict[str, dict[str, float]]:
    selected = {str(threshold): {} for threshold in SELECTED_THRESHOLDS}
    for row in rows:
        threshold = int(row["threshold_decimal"])
        if threshold in SELECTED_THRESHOLDS:
            selected[str(threshold)][row["projection_group"]] = float(row["initial_probability"])
    return selected


def build_background_html(
    emissions: dict[str, EmissionParams],
    raw_counts: dict[str, int],
    effective_counts: dict[str, float],
    april_prior: dict[str, float],
    monthly_states: dict[str, dict[str, float]],
    selected_thresholds: dict[str, dict[str, float]],
) -> str:
    # Index map after insertions:
    # 0=Tanker War, 1=Praying Mantis, 2=El Dorado Canyon, 3=Desert Fox,
    # 4=Kosovo, 5=Libya 2011, 6=Soleimani 2020,
    # 7=Eagle Claw, 8=Abbottabad, 9=Barisha, 10=Afghanistan,
    # 11=Grenada, 12=Panama, 13=Desert Storm, 14=Iraq, 15=Korea, 16=Vietnam
    influential_examples = [
        REFERENCE_CASES[0],   # Tanker War (Iran-specific no-entry)
        REFERENCE_CASES[1],   # Praying Mantis (Iran-specific strike)
        REFERENCE_CASES[2],   # El Dorado Canyon (punitive strike)
        REFERENCE_CASES[5],   # Libya 2011 (regime change via air)
        REFERENCE_CASES[6],   # Soleimani 2020 (Iran-specific, most recent)
        REFERENCE_CASES[7],   # Eagle Claw (Iran-specific raid)
        REFERENCE_CASES[8],   # Abbottabad (paradigmatic raid)
        REFERENCE_CASES[9],   # Barisha (recent raid)
        REFERENCE_CASES[10],  # Afghanistan opening (foothold)
        REFERENCE_CASES[11],  # Grenada (overlap case)
        REFERENCE_CASES[13],  # Desert Storm (upper-regional anchor)
        REFERENCE_CASES[14],  # Iraq 2003 (regime change campaign)
        REFERENCE_CASES[15],  # Korea (large-country upper tail)
        REFERENCE_CASES[16],  # Vietnam (peak deployment upper tail)
    ]

    lines = [
        "<p>The starting probabilities of this market were calculated from the historical record alone: similar U.S. coercive episodes, including many cases where the United States applied force or moved major assets but never ended up putting U.S. troops on the ground in the target country.</p>",
        "<h2>History-only starting point</h2>",
        "<p>The April 2026 prior is intentionally based only on historical escalation patterns at roughly this level of confrontation. It does not use current official statements, current deployments, or other case-specific March 2026 information as model inputs. The starting point therefore reflects the historical mix of no-entry, raid, foothold, and campaign outcomes in similar episodes rather than any claim about what current policymakers privately intend.</p>",
        "<h2>Reference classes and partial pooling</h2>",
        "<p>The model uses four monthly states: <strong>no entry</strong>, <strong>raid</strong>, <strong>limited foothold</strong>, and <strong>major campaign</strong>. Instead of forcing one smooth curve, it partially pools several kinds of historical evidence so that no-entry months remain common while larger tails are still possible.</p>",
        "<table>",
        "<thead><tr><th>Reference layer</th><th>Raw case-months</th><th>Overlap-adjusted effective count</th><th>Role in the fit</th></tr></thead>",
        "<tbody>",
        f"<tr><td>Coercive no-ground-entry</td><td>{raw_counts['coercive_no_ground_entry']}</td><td>{effective_counts['coercive_no_ground_entry']:.1f}</td><td>Preserves real mass on all-no months.</td></tr>",
        f"<tr><td>Limited entry</td><td>{raw_counts['limited_entry']}</td><td>{effective_counts['limited_entry']:.1f}</td><td>Shapes the 25 to 2,000 troop range.</td></tr>",
        f"<tr><td>Large entry</td><td>{raw_counts['large_entry']}</td><td>{effective_counts['large_entry']:.1f}</td><td>Shapes only the upper tail above roughly 2,000 troops.</td></tr>",
        "</tbody>",
        "</table>",
        "<p>Some examples sit on the boundary between a foothold and a campaign. To avoid double counting, each operation-month gets total weight 1.0 and shares that weight across its tags. That overlap discount stops the same case from pulling both the middle and upper tail too hard.</p>",
        "<p>The initial April blend assigns <strong>65% weight to coercive no-ground-entry</strong>, <strong>25% to limited entry</strong>, and <strong>10% to large entry</strong>. These weights reflect the historical base rate: the large majority of high-intensity U.S. confrontations — including direct naval strikes and major air campaigns — end without U.S. troops entering the target country. The 25% limited-entry weight preserves meaningful raid-to-foothold probability, while the 10% large-entry weight reflects that regime-change-scale campaigns remain uncommon even from high-pressure starting points. Within each reference class, the per-class state probabilities (for example, a coercive-no-ground-entry episode has a 97% base rate of no ground entry; a large-entry-type episode has a 55% base rate) are analyst calibrations drawn from the broader historical record and are not derived mechanically from the listed reference cases. The listed cases instead inform the <em>emission distributions</em> — what a raid, foothold, or campaign looks like in terms of troop counts.</p>",
        "<p>Note on Kosovo (1999): the NATO air campaign is coded as coercive no-ground-entry for the pre-armistice phase. NATO KFOR ground forces subsequently entered Kosovo territory after the armistice; the relevant analogue for this market is the coercive phase, in which airpower achieved its political objective without a prior ground commitment.</p>",
        "<h3>Most influential examples</h3>",
        "<table>",
        "<thead><tr><th>Date</th><th>Example</th><th>Estimated peak troops</th><th>Scenario type</th><th>Why it matters</th><th>Source</th></tr></thead>",
        "<tbody>",
    ]

    for case in influential_examples:
        troop_text = "0" if case.peak_troops == 0 else f"{case.peak_troops:,}"
        lines.append(
            "<tr>"
            f"<td>{escape(case.date)}</td>"
            f"<td>{escape(case.name)}</td>"
            f"<td>{troop_text}</td>"
            f"<td>{escape(case.scenario_type)}</td>"
            f"<td>{escape(case.notes)}</td>"
            f"<td><a href=\"{escape(case.source_url)}\">{escape(case.source_label)}</a></td>"
            "</tr>"
        )

    lines.extend(
        [
            "</tbody>",
            "</table>",
            "<h2>Fitted state distributions</h2>",
            "<p>Positive-entry states are modeled on <code>log1p(troops)</code> with heavy tails. The median and 90th-percentile troop counts below are the fitted state distributions before the monthly state probabilities are mixed together.</p>",
            "<table>",
            "<thead><tr><th>State</th><th>Median troops</th><th>90th percentile troops</th></tr></thead>",
            "<tbody>",
        ]
    )

    for state in STATE_ORDER:
        params = emissions[state]
        lines.append(
            "<tr>"
            f"<td>{escape(STATE_LABELS[state])}</td>"
            f"<td>{int(round(params.median_troops)):,}</td>"
            f"<td>{int(round(params.p90_troops)):,}</td>"
            "</tr>"
        )

    lines.extend(
        [
            "</tbody>",
            "</table>",
            "<p>Each state's emission distribution is partially pooled across adjacent tiers to prevent overconfident estimates from small samples. The raid distribution borrows 20% of its log-mean from the limited-entry tier; the limited-foothold distribution draws 60% from limited-entry cases, 25% from large-entry cases, and 15% from raid cases; the major-campaign distribution borrows 10% of its log-mean from the derived foothold. Log-sigma values are floored at 0.40 (raid), 0.70 (foothold), and 0.85 (campaign) to maintain realistic tail width given the limited case counts. The Korea and Vietnam cases are included in the large-entry tier specifically because Iran's population (~85 million) and military depth are closer in scale to those theatres than to Iraq 2003 or Panama, so those upper-tail cases are relevant calibration points for what a major campaign could require.</p>",
            "<h2>Monthly state probabilities</h2>",
            "<p>The April prior is a history-only blend of the three reference layers above. Later months use a persistent transition model: once an entry state is reached it tends to persist rather than snapping back immediately to no-entry. Escalation transition rates are also scaled by a multiplier that starts at 1.00 for the May step and decays month-by-month to 0.72 by December (schedule: 1.00, 0.97, 0.93, 0.88, 0.84, 0.80, 0.76, 0.72). This captures the empirical pattern that confrontations either escalate quickly or settle into a stable standoff; a situation that has not escalated by mid-year is historically less likely to generate fresh escalation than one that is just beginning. The practical effect is that the probability of a <em>new</em> low-level incursion (such as a raid) falls as the year progresses, while the major-campaign probability continues rising because it benefits from state persistence once escalation is entered. The market is history-only: it does not encode any claim about current U.S. government intentions, ongoing negotiations, or current force posture.</p>",
            "<table>",
            "<thead><tr><th>Month</th><th>No entry</th><th>Raid</th><th>Limited foothold</th><th>Major campaign</th></tr></thead>",
            "<tbody>",
        ]
    )

    for projection_group, probs in monthly_states.items():
        year, month = projection_group.split("-")
        lines.append(
            "<tr>"
            f"<td>{calendar.month_abbr[int(month)]} {year}</td>"
            f"<td>{100.0 * probs['none']:.1f}%</td>"
            f"<td>{100.0 * probs['raid']:.1f}%</td>"
            f"<td>{100.0 * probs['limited_foothold']:.1f}%</td>"
            f"<td>{100.0 * probs['major_campaign']:.1f}%</td>"
            "</tr>"
        )

    lines.extend(
        [
            "</tbody>",
            "</table>",
            "<h2>Selected threshold probabilities</h2>",
            "<p>These are the starting probabilities for a few representative thresholds from the full ladder. Because the lowest listed threshold is 25 troops, a no-entry month can still resolve as all no.</p>",
            "<table>",
            "<thead><tr><th>Month</th><th>&ge;25</th><th>&ge;100</th><th>&ge;1,000</th><th>&ge;5,000</th><th>&ge;20,000</th></tr></thead>",
            "<tbody>",
        ]
    )

    for year, month in MONTHS:
        projection_group = month_projection_group(year, month)
        lines.append(
            "<tr>"
            f"<td>{month_label(year, month)}</td>"
            f"<td>{100.0 * selected_thresholds['25'][projection_group]:.1f}%</td>"
            f"<td>{100.0 * selected_thresholds['100'][projection_group]:.1f}%</td>"
            f"<td>{100.0 * selected_thresholds['1000'][projection_group]:.1f}%</td>"
            f"<td>{100.0 * selected_thresholds['5000'][projection_group]:.1f}%</td>"
            f"<td>{100.0 * selected_thresholds['20000'][projection_group]:.1f}%</td>"
            "</tr>"
        )

    lines.extend(
        [
            "</tbody>",
            "</table>",
            "<h2>Resolution approach</h2>",
            "<p>This market is intentionally built for partial observability. We do not expect every month's exact troop count to be provable. Instead, resolution is threshold-by-threshold. If the best-supported interval for a month is, for example, 120 to 140 troops, thresholds at or below 100 resolve Yes, thresholds at or above 150 resolve No, and thresholds inside the interval remain unresolved. If the best-supported interval is 2,800 to 4,200 troops, the same logic applies at the higher thresholds. The point is to resolve as many rows as can reasonably be supported without pretending to know more than the evidence shows.</p>",
            "<h2>Generation script</h2>",
            "<p>The Python script used to produce this market's starting probabilities is available for download: <a href=\"{{asset:generate_market.py}}\">generate_market.py</a></p>",
            "<h2>Key source links</h2>",
            "<ul>",
            "<li><a href=\"https://history.state.gov/milestones/1981-1988/tanker-war\">Office of the Historian: The Tanker War</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Operation-Praying-Mantis\">Britannica: Operation Praying Mantis</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Operation-Eagle-Claw\">Britannica: Operation Eagle Claw</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Libya-Civil-War-2011\">Britannica: 2011 Libyan civil war</a></li>",
            "<li><a href=\"https://www.defense.gov/News/Releases/Release/Article/2049534/statement-by-the-department-of-defense/\">DoD: Statement on death of Qasem Soleimani</a></li>",
            "<li><a href=\"https://obamawhitehouse.archives.gov/the-press-office/2011/05/02/remarks-president-osama-bin-laden\">White House archive: Abbottabad raid remarks</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Persian-Gulf-War\">Britannica: Persian Gulf War</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Iraq-War\">Britannica: Iraq War</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Korean-War\">Britannica: Korean War</a></li>",
            "<li><a href=\"https://www.britannica.com/event/Vietnam-War\">Britannica: Vietnam War</a></li>",
            "</ul>",
        ]
    )

    return "\n".join(lines) + "\n"


def write_market_csv(rows: list[dict[str, str]]) -> None:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    meta_lines = [
        f"# market_code: {MARKET_CODE}",
        f"# market_title: {MARKET_TITLE}",
        "# market_type: count",
        "# market_visibility: public",
        "# market_status: draft",
        f"# market_budget: {MARKET_BUDGET}",
        f"# market_decay_rate: {MARKET_DECAY_RATE:.3f}",
        f"# market_resolution_criteria: {RESOLUTION_CRITERIA}",
        "# market_x_unit: troops",
        "# market_number_format: ,.0f",
        "# market_end_date: 2026-12-31T23:59:59Z",
        "# market_resolution_date: 2027-03-31T23:59:59Z",
        "# market_cumulative: false",
        "# market_background_info_path: background_info.html",
        '# market_svelte_params: {"scaleType":"log","scaleBase":1.25,"countUnitDisplay":"axis","countAxisLabel":"US troops in Iran"}',
        "# generated_note: zero_including_partially_pooled_latent_state_count, thresholds=29, months=9, ref_cases=17, april_prior=history_only_partial_pool",
        "",
    ]

    with CSV_PATH.open("w", encoding="utf-8", newline="") as handle:
        handle.write("\n".join(meta_lines))
        writer = csv.DictWriter(
            handle,
            fieldnames=[
                "projection_group",
                "threshold_decimal",
                "threshold_date",
                "initial_probability",
                "label",
                "end_date",
                "decay_rate",
                "status",
            ],
            lineterminator="\n",
        )
        writer.writeheader()
        writer.writerows(rows)


def main() -> int:
    emissions = build_emission_params(REFERENCE_CASES)
    raw_counts, effective_counts = compute_overlap_counts(REFERENCE_CASES)
    april_prior = build_april_prior()
    monthly_states, escalation_path = propagate_state_probabilities(april_prior)
    rows = build_probability_rows(monthly_states, emissions)
    validate_rows(rows)
    selected_thresholds = build_selected_threshold_table(rows)

    write_market_csv(rows)
    BACKGROUND_PATH.write_text(
        build_background_html(
            emissions=emissions,
            raw_counts=raw_counts,
            effective_counts=effective_counts,
            april_prior=april_prior,
            monthly_states=monthly_states,
            selected_thresholds=selected_thresholds,
        ),
        encoding="utf-8",
    )

    summary = {
        "model": "zero_including_partially_pooled_latent_state_count",
        "market_code": MARKET_CODE,
        "market_title": MARKET_TITLE,
        "generated_at_utc": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
        "months": [month_projection_group(year, month) for year, month in MONTHS],
        "thresholds": THRESHOLDS,
        "initial_prior_blend_weights": {
            "coercive_no_ground_entry": 0.65,
            "limited_entry": 0.25,
            "large_entry": 0.10,
        },
        "reference_class_counts": {
            "raw": raw_counts,
            "overlap_adjusted_effective": {tag: round(value, 3) for tag, value in effective_counts.items()},
        },
        "overlap_policy": (
            "Each unique operation-month has total weight capped at 1.0. "
            "If a case belongs to multiple tags, its weight is split evenly across them."
        ),
        "state_emission_params": {
            state: {
                "log_mean": None if params.log_mean is None else round(params.log_mean, 6),
                "log_sigma": None if params.log_sigma is None else round(params.log_sigma, 6),
                "median_troops": round(params.median_troops, 2),
                "p90_troops": round(params.p90_troops, 2),
            }
            for state, params in emissions.items()
        },
        "april_prior": {state: round(prob, 6) for state, prob in april_prior.items()},
        "escalation_path_after_april": escalation_path,
        "monthly_state_probabilities": {
            month: {state: round(prob, 6) for state, prob in probs.items()}
            for month, probs in monthly_states.items()
        },
        "selected_threshold_probabilities": {
            threshold: {month: round(prob, 6) for month, prob in months.items()}
            for threshold, months in selected_thresholds.items()
        },
        "reference_cases": [asdict(case) for case in REFERENCE_CASES],
    }
    SUMMARY_PATH.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
