#!/usr/bin/env python3
"""
Generate starting probabilities for the "Date China takes major military
action against Taiwan" market on Antistatic.

Methodology: Bayesian multilevel hazard-rate model
=================================================

We estimate a time-varying monthly hazard rate h(t) — the probability that
China takes major military action against Taiwan in month t, given it hasn't
happened yet — by combining three levels of evidence with partial pooling.

Level 1 (Prior): General territorial-dispute base rate
    Among major-power territorial disputes post-1945, the annual probability
    of escalation to significant military action is ~1-2%/year.  This serves
    as a weakly informative prior.

Level 2 (Group): Major powers with active military buildup for a territorial goal
    Historical reference class of 13 cases (8 acted, 5 censored).  Kaplan-Meier
    estimate: ~54% probability of action within 10 years, implying ~7-8%/year
    average hazard during the active buildup phase.

Level 3 (Specific): China-Taiwan expert consensus and prediction markets
    Metaculus community forecasts (as of March 2026):
      - Full-scale invasion by 2028: 10%, by 2030: ~20%, by 2035: ~33%
      - Full-scale blockade by 2030: 40%, by 2035: 60%
    CSIS 2024 expert survey: 52% think force short of invasion likely within 10y
    Swift Centre: 9% blockade by mid-2027
    Good Judgment superforecasters: ~3% blockade per year

    For our broader "major military action" definition (invasion OR blockade
    OR kinetic strikes on main island), we derive:
      P(major action by end 2029) ≈ 43%
      P(major action by end 2034) ≈ 65%

Partial pooling: The specific Level 3 data dominates (~80% weight) because
it is abundant and directly relevant.  Level 2 contributes ~20% through
shrinkage, pulling extreme estimates toward the historical base rate.

Time variation: The hazard rate peaks around late 2027/early 2028 (the PLA's
stated modernisation milestone, Xi's stated "window"), then gradually declines.
We model this as piecewise-constant annual hazard rates, smoothed to monthly
resolution via cubic spline interpolation.

Sources
-------
- Metaculus Q#11480 (invasion), Q#12309 (blockade), Q#5320 (control)
- CSIS ChinaPower expert surveys (2022, 2024)
- Swift Centre China-Taiwan forecasts (2024)
- EA Forum: "An invasion of Taiwan is uncomfortably likely" (2023)
- UI Sweden: "Is a conflict over Taiwan drawing near?" (2024)
- Correlates of War MID dataset (base rates)
- Pentagon 2025 Annual Report on PRC Military Developments
"""
from __future__ import annotations

import csv
import json
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path

import matplotlib
import numpy as np

matplotlib.use("Agg")
import matplotlib.pyplot as plt

# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
REPO_ROOT = Path(__file__).resolve().parents[2]
DATA_DIR = REPO_ROOT / "data" / "china-taiwan-action"
SCRIPT_PATH = Path(__file__).resolve()
CSV_PATH = DATA_DIR / "china_taiwan_action_market.csv"
SUMMARY_PATH = DATA_DIR / "model_summary.json"
BACKGROUND_PATH = DATA_DIR / "background_info.html"
HAZARD_PLOT_PATH = DATA_DIR / "hazard_rate.png"
CDF_PLOT_PATH = DATA_DIR / "cumulative_probability.png"
REFERENCE_PLOT_PATH = DATA_DIR / "reference_classes.png"

# ---------------------------------------------------------------------------
# Market metadata
# ---------------------------------------------------------------------------
MARKET_CODE = "cn-tw-action"
MARKET_TITLE = "Date China takes major military action against Taiwan"
MARKET_BUDGET = 300
MARKET_DECAY_RATE = 0.005
PROBABILITY_DECIMALS = 3

# Market starts April 2026 (month 0 = start of April 2026)
# Through December 2036
MARKET_START = datetime(2026, 4, 1, tzinfo=timezone.utc)
N_MONTHS = 129  # April 2026 through December 2036

RESOLUTION_CRITERIA = (
    "This market resolves to the earliest date at which any of the following "
    "conditions is reported by at least two of AP, Reuters, and AFP: "
    "(a) at least 1,000 People's Liberation Army military personnel have been "
    "deployed to the main island of Taiwan for the purpose of establishing PRC "
    "sovereignty; (b) the People's Republic of China has imposed a blockade that "
    "closes all major Taiwanese ports (Kaohsiung, Keelung, Taichung, and Taipei) "
    "for at least 10 consecutive days; or (c) the People's Republic of China has "
    "launched kinetic strikes (missiles, airstrikes, or naval bombardment) against "
    "military or civilian infrastructure on the main island of Taiwan. "
    "Exclusions: military exercises that do not actively intercept traffic or "
    "strike targets; cyber-only operations; actions limited to outlying islands "
    "(Kinmen, Matsu, Penghu) without action against the main island; economic "
    "sanctions or trade restrictions without physical military action."
)


# ---------------------------------------------------------------------------
# Reference class data
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class HistoricalCase:
    """A historical case of a major power with a territorial objective."""

    name: str
    claimant: str
    target: str
    buildup_start_year: int
    action_year: int | None  # None = censored (no action taken)
    censored_year: int  # year of observation (action year or last observed)
    notes: str


HISTORICAL_CASES: list[HistoricalCase] = [
    # Cases where action was taken
    HistoricalCase("China → Tibet", "PRC", "Tibet", 1949, 1950, 1950,
                   "PRC invaded ~1 year after founding"),
    HistoricalCase("Indonesia → East Timor", "Indonesia", "East Timor", 1974, 1975, 1975,
                   "Invaded after Portuguese withdrawal"),
    HistoricalCase("Iraq → Kuwait", "Iraq", "Kuwait", 1988, 1990, 1990,
                   "~2 years of active dispute escalation"),
    HistoricalCase("Argentina → Falklands", "Argentina", "Falklands", 1976, 1982, 1982,
                   "Military junta era, 6 years buildup"),
    HistoricalCase("Russia → Crimea", "Russia", "Crimea/Ukraine", 2008, 2014, 2014,
                   "Post-Georgia war buildup, seized Crimea"),
    HistoricalCase("Turkey → Cyprus", "Turkey", "Northern Cyprus", 1964, 1974, 1974,
                   "10 years from crisis to invasion"),
    HistoricalCase("India → Goa", "India", "Goa", 1947, 1961, 1961,
                   "14 years from independence to liberation"),
    HistoricalCase("Russia → Ukraine (full)", "Russia", "Ukraine", 2014, 2022, 2022,
                   "8 years from Crimea to full invasion"),
    # Counter-examples: active claim, no full military action
    HistoricalCase("China → Taiwan", "PRC", "Taiwan", 1949, None, 2026,
                   "77 years, no action (the case we're forecasting)"),
    HistoricalCase("India → Pak Kashmir", "India", "Pakistan-held Kashmir", 1947, None, 2026,
                   "Skirmishes but no full-scale invasion attempt"),
    HistoricalCase("Greece → Turkey Aegean", "Greece/Turkey", "Aegean islands", 1974, None, 2026,
                   "Ongoing dispute, no military action"),
    HistoricalCase("N. Korea → S. Korea", "North Korea", "South Korea", 1953, None, 2026,
                   "Armistice only, no renewed full-scale action"),
    HistoricalCase("Venezuela → Essequibo", "Venezuela", "Guyana Essequibo", 1966, None, 2026,
                   "Threatened but never invaded"),
]


@dataclass(frozen=True)
class CalibrationAnchor:
    """An empirical anchor from prediction markets or expert surveys."""

    source: str
    description: str
    months_from_start: float  # months from April 2026
    cumulative_probability: float
    weight: float  # how much to trust this anchor (0-1)


# Key calibration anchors (all conditional on no event before April 2026)
CALIBRATION_ANCHORS: list[CalibrationAnchor] = [
    CalibrationAnchor(
        "Metaculus Q#12309 + derived",
        "P(major action by end 2029) — Metaculus blockade 40% + strikes increment",
        months_from_start=45.0,  # end Dec 2029
        cumulative_probability=0.43,
        weight=0.9,
    ),
    CalibrationAnchor(
        "Metaculus Q#12309 + derived",
        "P(major action by end 2034) — Metaculus blockade 60% + strikes increment",
        months_from_start=105.0,  # end Dec 2034
        cumulative_probability=0.65,
        weight=0.9,
    ),
    CalibrationAnchor(
        "Swift Centre + Metaculus",
        "P(major action by end 2027) — interpolated from Swift Centre blockade "
        "9% by mid-2027 and Metaculus invasion 10% by 2028",
        months_from_start=21.0,  # end Dec 2027
        cumulative_probability=0.22,
        weight=0.7,
    ),
    CalibrationAnchor(
        "Reference class Level 2",
        "P(action within 10 years of active buildup) — Kaplan-Meier from "
        "13 historical cases",
        months_from_start=120.0,  # 10 years
        cumulative_probability=0.54,
        weight=0.3,  # lower weight: less specific
    ),
]


# ---------------------------------------------------------------------------
# Hazard rate model
# ---------------------------------------------------------------------------
def compute_annual_hazard_rates() -> dict[int, float]:
    """
    Compute annual hazard rates for each year from the multilevel model.

    The annual rates are derived from three levels with partial pooling:

    Level 1 (base rate):  ~1.5%/year from general territorial dispute data
    Level 2 (group rate): ~7.5%/year from major-power buildup reference class
    Level 3 (specific):   Time-varying, derived from prediction market data

    Partial pooling weights: w_specific=0.80, w_group=0.20
    (Level 1 is absorbed into Level 2 via the reference class itself)
    """
    # --- Level 2: Reference class hazard rate ---
    # From Kaplan-Meier analysis of 13 historical cases:
    # ~54% cumulative action within 10 years → ~7.5%/year average
    lambda_group_annual = 0.075

    # --- Level 3: Specific China-Taiwan rates by year ---
    # Derived from Metaculus, CSIS, Swift Centre data
    # Calibrated to hit anchor points while reflecting the 2027-2028 peak
    lambda_specific_annual = {
        2026: 0.08,   # building but not peak; ~8%/year
        2027: 0.18,   # PEAK — PLA modernisation milestone, Xi's window
        2028: 0.16,   # still elevated
        2029: 0.14,   # gradually declining
        2030: 0.11,   # post-peak
        2031: 0.09,
        2032: 0.07,
        2033: 0.07,
        2034: 0.07,
        2035: 0.07,   # long-run equilibrium
    }

    # --- Partial pooling ---
    w_specific = 0.80
    w_group = 0.20

    pooled = {}
    for year, lam_s in lambda_specific_annual.items():
        lam_pooled = w_specific * lam_s + w_group * lambda_group_annual
        pooled[year] = lam_pooled

    return pooled


def annual_to_monthly_hazard(annual_rate: float) -> float:
    """Convert annual hazard rate to monthly: h_m = 1 - (1 - h_a)^(1/12)."""
    return 1.0 - (1.0 - annual_rate) ** (1.0 / 12.0)


def build_monthly_hazard_curve(n_months: int) -> np.ndarray:
    """
    Build a smooth monthly hazard rate curve by assigning each month its
    year's pooled hazard rate (converted from annual to monthly).

    For months that straddle a year boundary, the rate transitions smoothly
    via linear interpolation between adjacent years' monthly rates.

    Returns array of length n_months with h(t) for each month.
    """
    annual_rates = compute_annual_hazard_rates()
    hazard = np.zeros(n_months)

    for t in range(n_months):
        # Which calendar year does this month fall in?
        year = 2026 + (3 + t) // 12
        month_in_year = (3 + t) % 12  # 0=Jan .. 11=Dec

        # Get this year's rate (clamp to last available year)
        year_clamped = min(year, max(annual_rates.keys()))
        rate = annual_rates.get(year_clamped, annual_rates[max(annual_rates.keys())])

        # Smooth transitions: in first/last 3 months of each year, blend with
        # adjacent year's rate
        if month_in_year < 3 and year - 1 in annual_rates:
            prev_rate = annual_rates[year - 1]
            blend = (month_in_year + 0.5) / 3.0  # 0→1 over Jan-Mar
            rate = prev_rate * (1 - blend) + rate * blend
        elif month_in_year > 8 and year + 1 in annual_rates:
            next_rate = annual_rates[year + 1]
            blend = (month_in_year - 8.5) / 3.0  # 0→1 over Oct-Dec
            rate = rate * (1 - blend) + next_rate * blend

        hazard[t] = annual_to_monthly_hazard(rate)

    return hazard


def hazard_to_cdf(hazard: np.ndarray) -> np.ndarray:
    """
    Convert monthly hazard rates to cumulative distribution function.

    F(t) = 1 - prod_{s=0}^{t-1} (1 - h(s))
    """
    survival = np.cumprod(1.0 - hazard)
    return 1.0 - survival


def validate_against_anchors(
    cdf: np.ndarray, anchors: list[CalibrationAnchor]
) -> list[dict]:
    """Check the model CDF against calibration anchors."""
    results = []
    for anchor in anchors:
        month_idx = min(int(round(anchor.months_from_start)) - 1, len(cdf) - 1)
        model_prob = float(cdf[month_idx])
        error = model_prob - anchor.cumulative_probability
        results.append({
            "source": anchor.source,
            "description": anchor.description,
            "target_month": anchor.months_from_start,
            "target_prob": anchor.cumulative_probability,
            "model_prob": round(model_prob, 4),
            "error": round(error, 4),
            "weight": anchor.weight,
        })
    return results


# ---------------------------------------------------------------------------
# Kaplan-Meier analysis of reference class
# ---------------------------------------------------------------------------
def kaplan_meier_reference_class() -> tuple[list[float], list[float]]:
    """
    Compute Kaplan-Meier survival curve from the historical reference class.

    Returns (times, survival_probs) for plotting.
    """
    events = []
    for case in HISTORICAL_CASES:
        duration = case.censored_year - case.buildup_start_year
        is_event = case.action_year is not None
        events.append((duration, is_event))

    events.sort(key=lambda x: x[0])

    times = [0.0]
    survival = [1.0]
    n_at_risk = len(events)

    for duration, is_event in events:
        if is_event:
            s = survival[-1] * (1.0 - 1.0 / n_at_risk)
            times.append(float(duration))
            survival.append(s)
        n_at_risk -= 1

    return times, survival


# ---------------------------------------------------------------------------
# CSV and output generation
# ---------------------------------------------------------------------------
def month_end_iso(month_offset: int) -> str:
    """Return ISO timestamp for end of the month at given offset from April 2026."""
    year = 2026 + (3 + month_offset) // 12
    month = (3 + month_offset) % 12 + 1
    # Last day of the month
    if month == 12:
        next_year, next_month = year + 1, 1
    else:
        next_year, next_month = year, month + 1
    from datetime import timedelta
    last_day = datetime(next_year, next_month, 1, tzinfo=timezone.utc) - timedelta(days=1)
    return last_day.strftime("%Y-%m-%dT23:59:59Z")


def month_label(month_offset: int) -> str:
    """Human-readable label like 'Apr 2026', 'May 2026', etc."""
    year = 2026 + (3 + month_offset) // 12
    month = (3 + month_offset) % 12 + 1
    month_names = [
        "", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
    ]
    return f"{month_names[month]} {year}"


def clamp_prob(p: float) -> float:
    """Clamp probability to valid market range."""
    return min(0.999, max(0.001, round(p, PROBABILITY_DECIMALS)))


def write_market_csv(cdf: np.ndarray) -> None:
    """Write the market CSV with monthly date submarkets."""
    DATA_DIR.mkdir(parents=True, exist_ok=True)

    annual_rates = compute_annual_hazard_rates()
    rate_summary = ", ".join(f"{y}={r:.1%}" for y, r in sorted(annual_rates.items()))

    with CSV_PATH.open("w", newline="", encoding="utf-8") as handle:
        handle.write(f"# market_code: {MARKET_CODE}\n")
        handle.write(f"# market_title: {MARKET_TITLE}\n")
        handle.write("# market_type: date\n")
        handle.write("# market_visibility: public\n")
        handle.write("# market_status: draft\n")
        handle.write(f"# market_budget: {MARKET_BUDGET}\n")
        handle.write(f"# market_decay_rate: {MARKET_DECAY_RATE:.3f}\n")
        handle.write(f"# market_resolution_criteria: {RESOLUTION_CRITERIA}\n")
        handle.write("# market_x_unit: month\n")
        handle.write("# market_end_date: 2036-12-31T23:59:59Z\n")
        handle.write("# market_resolution_date: 2037-03-31T23:59:59Z\n")
        handle.write("# market_cumulative: true\n")
        handle.write("# market_background_info_path: background_info.html\n")
        handle.write(
            '# market_svelte_params: {"scaleType":"linear","timeCadence":"monthly"}\n'
        )
        handle.write(
            f"# generated_note: multilevel_hazard_model, "
            f"annual_rates=[{rate_summary}], "
            f"partial_pooling=0.80_specific+0.20_group, "
            f"n_months={N_MONTHS}\n"
        )
        handle.write("\n")

        writer = csv.writer(handle)
        writer.writerow([
            "projection_group",
            "threshold_decimal",
            "threshold_date",
            "initial_probability",
            "label",
            "end_date",
            "decay_rate",
            "status",
        ])

        prev_prob = 0.0

        for t in range(N_MONTHS):
            threshold_date = month_end_iso(t)
            end_date = threshold_date
            label = f"By {month_label(t)}"
            prob = clamp_prob(float(cdf[t]))

            # Ensure monotonically non-decreasing
            prob = max(prob, prev_prob + 0.001) if prob > prev_prob else max(prob, prev_prob)
            prob = clamp_prob(prob)
            prev_prob = prob

            writer.writerow([
                "main",  # projection_group: single group for date markets
                "",  # threshold_decimal: empty for date markets
                threshold_date,
                f"{prob:.3f}",
                label,
                end_date,
                f"{MARKET_DECAY_RATE:.3f}",
                "open",
            ])


def write_summary_json(
    cdf: np.ndarray,
    hazard: np.ndarray,  # noqa: ARG001 — stored in JSON indirectly via annual rates
    validation: list[dict],
) -> None:
    """Write model summary JSON."""
    annual_rates = compute_annual_hazard_rates()

    # Key yearly cumulative probabilities
    yearly_cdf = {}
    for t in range(N_MONTHS):
        year = 2026 + (3 + t) // 12
        month = (3 + t) % 12 + 1
        if month == 12 or t == N_MONTHS - 1:
            yearly_cdf[str(year)] = round(float(cdf[t]), 4)

    # Reference class summary
    ref_class_data = []
    for case in HISTORICAL_CASES:
        ref_class_data.append({
            "name": case.name,
            "buildup_start": case.buildup_start_year,
            "action_year": case.action_year,
            "duration": case.censored_year - case.buildup_start_year,
            "acted": case.action_year is not None,
            "notes": case.notes,
        })

    payload = {
        "model": "multilevel_hazard_rate",
        "market_start": "2026-04-01",
        "n_months": N_MONTHS,
        "partial_pooling_weights": {
            "specific": 0.80,
            "group": 0.20,
        },
        "annual_hazard_rates_pooled": {
            str(y): round(r, 4) for y, r in sorted(annual_rates.items())
        },
        "yearly_cumulative_probability": yearly_cdf,
        "calibration_validation": validation,
        "reference_class": ref_class_data,
        "sources": [
            {"label": "Metaculus Q#11480: Chinese invasion of Taiwan",
             "url": "https://www.metaculus.com/questions/11480/chinese-invasion-of-taiwan/"},
            {"label": "Metaculus Q#12309: China blockade of Taiwan",
             "url": "https://www.metaculus.com/questions/12309/china-engages-in-a-blockade-against-taiwan/"},
            {"label": "Metaculus Q#5320: Chinese control of half of Taiwan by 2050",
             "url": "https://www.metaculus.com/questions/5320/chinese-control-of-half-of-taiwan-by-2050/"},
            {"label": "CSIS ChinaPower: Surveying the Experts (2024)",
             "url": "https://chinapower.csis.org/surveying-experts-us-and-taiwan-views-china-approach-taiwan-2024/"},
            {"label": "Swift Centre: China-Taiwan and TSMC risks to 2027",
             "url": "https://www.swiftcentre.org/publicforecasts/china-taiwan-and-tsmc-risks-to-2027"},
            {"label": "UI Sweden: Is a conflict over Taiwan drawing near? (2024)",
             "url": "https://www.ui.se/globalassets/ui.se-eng/publications/other-publications/is-a-conflict-over-taiwan-drawing-near-a-review-of-available-forecasts-and-scenarios.pdf"},
            {"label": "Pentagon 2025 Annual Report on PRC Military Developments",
             "url": "https://media.defense.gov/2025/Dec/23/2003849070/-1/-1/1/ANNUAL-REPORT-TO-CONGRESS-MILITARY-AND-SECURITY-DEVELOPMENTS-INVOLVING-THE-PEOPLES-REPUBLIC-OF-CHINA-2025.PDF"},
            {"label": "EA Forum: An invasion of Taiwan is uncomfortably likely (2023)",
             "url": "https://forum.effectivealtruism.org/posts/qvzcmzPcR5mDEhqkz/an-invasion-of-taiwan-is-uncomfortably-likely-potentially"},
        ],
    }
    SUMMARY_PATH.write_text(json.dumps(payload, indent=2), encoding="utf-8")


# ---------------------------------------------------------------------------
# Plots
# ---------------------------------------------------------------------------
def plot_hazard_rate(hazard: np.ndarray) -> None:
    """Plot the monthly and annualised hazard rate over time."""
    fig, ax = plt.subplots(figsize=(11, 5))
    months = np.arange(N_MONTHS)

    # Annualised hazard = 1 - (1 - h_monthly)^12
    annual_hazard = 1.0 - (1.0 - hazard) ** 12

    ax.plot(months, annual_hazard * 100, color="#dc2626", lw=2.2,
            label="Annualised hazard rate")
    ax.fill_between(months, 0, annual_hazard * 100, color="#dc2626", alpha=0.12)

    # Mark the 2027 PLA modernisation milestone
    milestone_month = 21  # Dec 2027
    ax.axvline(milestone_month, color="#6b7280", ls="--", lw=1.2, alpha=0.7)
    ax.annotate("2027 PLA\nmilestone", xy=(milestone_month, float(annual_hazard[milestone_month]) * 100),
                xytext=(milestone_month + 5, annual_hazard[milestone_month] * 100 + 2),
                fontsize=9, color="#6b7280",
                arrowprops=dict(arrowstyle="->", color="#6b7280", lw=1.0))

    ax.set_xlabel("Month")
    ax.set_ylabel("Annualised hazard rate (%)")
    ax.set_title("Time-varying hazard rate: major Chinese military action against Taiwan")

    # X-axis: show every 12 months
    tick_positions = list(range(0, N_MONTHS, 12))
    tick_labels = [month_label(t) for t in tick_positions]
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, rotation=45, ha="right", fontsize=8)
    ax.set_xlim(0, N_MONTHS - 1)
    ax.set_ylim(0)
    ax.grid(alpha=0.25)
    ax.legend(frameon=False)
    fig.tight_layout()
    fig.savefig(HAZARD_PLOT_PATH, dpi=180)
    plt.close(fig)


def plot_cdf(cdf: np.ndarray, anchors: list[CalibrationAnchor]) -> None:
    """Plot the cumulative probability (CDF) with calibration anchors."""
    fig, ax = plt.subplots(figsize=(11, 5))
    months = np.arange(N_MONTHS)

    ax.plot(months, cdf * 100, color="#b45309", lw=2.5, label="Model CDF")
    ax.fill_between(months, 0, cdf * 100, color="#f59e0b", alpha=0.15)

    # Plot calibration anchors
    for anchor in anchors:
        idx = min(int(round(anchor.months_from_start)) - 1, len(cdf) - 1)
        ax.scatter([idx], [anchor.cumulative_probability * 100],
                   color="#2563eb", s=60, zorder=5, alpha=anchor.weight)
        ax.annotate(f"{anchor.cumulative_probability:.0%}",
                    xy=(idx, anchor.cumulative_probability * 100),
                    xytext=(idx + 3, anchor.cumulative_probability * 100 + 2),
                    fontsize=8, color="#2563eb")

    ax.set_xlabel("Month")
    ax.set_ylabel("Cumulative probability (%)")
    ax.set_title("Cumulative probability of major Chinese military action against Taiwan")

    tick_positions = list(range(0, N_MONTHS, 12))
    tick_labels = [month_label(t) for t in tick_positions]
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels, rotation=45, ha="right", fontsize=8)
    ax.set_xlim(0, N_MONTHS - 1)
    ax.set_ylim(0, 100)
    ax.grid(alpha=0.25)
    ax.legend(frameon=False)
    fig.tight_layout()
    fig.savefig(CDF_PLOT_PATH, dpi=180)
    plt.close(fig)


def plot_reference_classes() -> None:
    """Plot the Kaplan-Meier survival curve from the historical reference class."""
    times, survival = kaplan_meier_reference_class()

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5.5))

    # Left: Kaplan-Meier survival curve
    ax1.step(times, [s * 100 for s in survival], where="post",
             color="#2563eb", lw=2.2, label="Kaplan-Meier survival")
    ax1.axhline(50, color="#6b7280", ls=":", lw=1, alpha=0.5)
    ax1.set_xlabel("Years from start of active buildup")
    ax1.set_ylabel("Survival probability (%)")
    ax1.set_title("Reference class: major-power territorial buildup")
    ax1.set_xlim(0, 80)
    ax1.set_ylim(0, 105)
    ax1.grid(alpha=0.25)
    ax1.legend(frameon=False)

    # Add case labels
    acted_cases = [c for c in HISTORICAL_CASES if c.action_year is not None]
    for case in acted_cases:
        assert case.action_year is not None  # for type checker
        duration = case.action_year - case.buildup_start_year
        ax1.annotate(case.name, xy=(duration, 50), xytext=(duration, 30),
                     fontsize=7, rotation=90, ha="center", va="bottom",
                     color="#6b7280", alpha=0.7)

    # Right: Annual hazard rates comparison
    annual_rates = compute_annual_hazard_rates()
    years = sorted(annual_rates.keys())
    rates = [annual_rates[y] * 100 for y in years]

    ax2.bar(years, rates, color="#b45309", alpha=0.7, label="Pooled model rate")
    ax2.axhline(7.5, color="#2563eb", ls="--", lw=1.5, alpha=0.7,
                label="Reference class avg (7.5%/yr)")
    ax2.axhline(1.5, color="#059669", ls=":", lw=1.5, alpha=0.7,
                label="General base rate (1.5%/yr)")
    ax2.set_xlabel("Year")
    ax2.set_ylabel("Annual hazard rate (%)")
    ax2.set_title("Pooled annual hazard rates by year")
    ax2.grid(alpha=0.25)
    ax2.legend(frameon=False, fontsize=9)

    fig.tight_layout()
    fig.savefig(REFERENCE_PLOT_PATH, dpi=180)
    plt.close(fig)


# ---------------------------------------------------------------------------
# Background HTML
# ---------------------------------------------------------------------------
def write_background_html(cdf: np.ndarray) -> None:
    """Write the background information HTML."""
    annual_rates = compute_annual_hazard_rates()

    # Build rate table rows
    rate_rows = []
    for year in sorted(annual_rates.keys()):
        rate = annual_rates[year]
        # Cumulative prob at end of year
        if year == 2026:
            end_month = 8  # end Dec 2026 = month index 8 (0-indexed)
        else:
            end_month = min((year - 2026) * 12 + 8, N_MONTHS - 1)
        cum_prob = float(cdf[end_month]) if end_month < len(cdf) else float(cdf[-1])
        rate_rows.append(
            f"<tr><td>{year}</td>"
            f"<td>{rate:.1%}</td>"
            f"<td>{cum_prob:.1%}</td></tr>"
        )

    # Build reference class tables
    acted_rows = []
    not_acted_rows = []
    for case in HISTORICAL_CASES:
        duration = case.censored_year - case.buildup_start_year
        if case.action_year is not None:
            acted_rows.append(
                f"<tr><td>{case.name}</td>"
                f"<td>{case.buildup_start_year}</td>"
                f"<td>{case.action_year}</td>"
                f"<td>{duration} years</td></tr>"
            )
        else:
            not_acted_rows.append(
                f"<tr><td>{case.name}</td>"
                f"<td>{case.buildup_start_year}</td>"
                f"<td>{duration}+ years (ongoing)</td></tr>"
            )

    html = f"""<h3>Reference classes</h3>
<p>
We use three levels of evidence, from general to specific, combined with
partial pooling to estimate a time-varying monthly hazard rate.
</p>

<h4>Level 1: General territorial dispute base rate</h4>
<p>
Among all major-power territorial disputes post-1945, roughly 1-2% per year
escalate to significant military action. This comes from the Correlates of War
MID dataset (~2,400 militarized interstate disputes 1816-2014, 85% of fatal
conflicts involving territorial issues). This is the weakest prior &mdash; just
a sanity floor.
</p>

<h4>Level 2: Major powers with active military buildup for a specific territorial goal</h4>
<p>
This is the core reference class. We identified <strong>13 historical
cases</strong> &mdash; 8 that resulted in military action, 5 where no action
has been taken despite decades of tension.
</p>

<p><strong>Cases that resulted in military action:</strong></p>
<table>
  <thead>
    <tr><th>Case</th><th>Buildup start</th><th>Action year</th><th>Time to action</th></tr>
  </thead>
  <tbody>
    {"".join(acted_rows)}
  </tbody>
</table>

<p><strong>Cases where no military action has been taken:</strong></p>
<table>
  <thead>
    <tr><th>Case</th><th>Buildup start</th><th>Duration so far</th></tr>
  </thead>
  <tbody>
    {"".join(not_acted_rows)}
  </tbody>
</table>

<p>
<strong>Kaplan-Meier survival analysis</strong> of these 13 cases gives roughly
<strong>54% probability of action within 10 years</strong> of active buildup
starting, implying an average annual hazard of ~7.5%/year during the buildup
phase. The median time to action among those that acted was ~6 years.
</p>
<p>
The cases where no action was taken are critical &mdash; they show that roughly
40% of these disputes simply never result in full military action, even over
many decades. China-Taiwan itself is the longest-running such case at 77 years.
</p>

<p><img src="{{{{asset:reference_classes.png}}}}" alt="Reference class: Kaplan-Meier survival and pooled hazard rates" width="960" /></p>

<h4>Level 3: China-Taiwan specific expert data</h4>
<p>
Current prediction markets and expert surveys (as of March 2026) provide the
most directly relevant evidence:
</p>
<ul>
  <li><strong>Metaculus</strong>: full-scale blockade by 2030 at 40%, by 2035 at 60%;
  full-scale invasion by 2028 at 10%, by 2030 at ~20%, by 2035 at ~33%</li>
  <li><strong>CSIS 2024 survey</strong>: 52% of 64 China experts think force short of
  invasion &ldquo;likely&rdquo; within 10 years; only 27% of US experts and 17% of
  Taiwan experts believe China can currently execute an amphibious invasion</li>
  <li><strong>Swift Centre</strong>: 9% blockade probability by mid-2027</li>
  <li><strong>Pentagon 2025 report</strong>: PLA targeting capability to fight and
  win a Taiwan war by end of 2027; new bridge landing vessels and expanded
  amphibious training observed</li>
</ul>
<p>
These imply annualised hazard rates of ~12-16%/year near-term (peaking
2027-2028 around the PLA modernisation milestone), declining to ~7%/year
long-term.
</p>

<h3>Multilevel model</h3>

<p>
The posterior annual hazard rate for each year is a <strong>weighted
average</strong> of Level 2 and Level 3:
</p>
<p>
<code>&lambda;_pooled(year) = 0.80 &times; &lambda;_specific(year) + 0.20 &times; &lambda;_group</code>
</p>
<ul>
  <li><strong>80% weight on Level 3</strong> (specific): prediction markets
  aggregate hundreds of forecasters with diverse information &mdash;
  substantially more informative than the small historical sample.</li>
  <li><strong>20% weight on Level 2</strong> (group): the reference class pull
  prevents overconfidence and anchors the long-run tail to historical base
  rates.</li>
</ul>
<p>
Level 1 is absorbed into Level 2 (the Kaplan-Meier already incorporates the
non-action cases).
</p>

<h4>Time variation</h4>
<p>
The hazard rate is <strong>time-varying</strong>: it peaks in 2027 (the PLA's
stated modernisation deadline, the consensus &ldquo;window of maximum
danger&rdquo; identified by military analysts) then gradually declines as the
specific risk window passes. It remains well above the general base rate
throughout the forecast horizon due to the structural nature of the
cross-strait dispute. Monthly values are smoothed via linear blending at year
boundaries, then converted to a cumulative CDF via the survival function.
</p>

<p><img src="{{{{asset:hazard_rate.png}}}}" alt="Time-varying annualised hazard rate" width="960" /></p>

<h4>Hazard rates and cumulative probabilities by year</h4>
<table>
  <thead>
    <tr><th>Year</th><th>Annual hazard (pooled)</th><th>Cumulative P(action by year-end)</th></tr>
  </thead>
  <tbody>
    {"".join(rate_rows)}
  </tbody>
</table>

<p><img src="{{{{asset:cumulative_probability.png}}}}" alt="Cumulative probability of major Chinese military action" width="960" /></p>

<h3>Resolution criteria</h3>
<p>{RESOLUTION_CRITERIA}</p>

<h3>Sources</h3>
<ul>
  <li><a href="https://www.metaculus.com/questions/11480/chinese-invasion-of-taiwan/" target="_blank" rel="noopener">Metaculus Q#11480</a>: Chinese invasion of Taiwan forecasts</li>
  <li><a href="https://www.metaculus.com/questions/12309/china-engages-in-a-blockade-against-taiwan/" target="_blank" rel="noopener">Metaculus Q#12309</a>: China blockade of Taiwan forecasts</li>
  <li><a href="https://www.metaculus.com/questions/5320/chinese-control-of-half-of-taiwan-by-2050/" target="_blank" rel="noopener">Metaculus Q#5320</a>: Chinese control of half of Taiwan by 2050</li>
  <li><a href="https://www.metaculus.com/questions/11401/china-controls-taiwan-after-invasion/" target="_blank" rel="noopener">Metaculus Q#11401</a>: China controls Taiwan after invasion</li>
  <li><a href="https://chinapower.csis.org/surveying-experts-us-and-taiwan-views-china-approach-taiwan-2024/" target="_blank" rel="noopener">CSIS ChinaPower 2024</a>: expert survey on China's approach to Taiwan</li>
  <li><a href="https://chinapower.csis.org/survey-experts-china-approach-to-taiwan/" target="_blank" rel="noopener">CSIS ChinaPower 2022</a>: earlier expert survey (64 China experts)</li>
  <li><a href="https://www.swiftcentre.org/publicforecasts/china-taiwan-and-tsmc-risks-to-2027" target="_blank" rel="noopener">Swift Centre</a>: China-Taiwan and TSMC risks to 2027</li>
  <li><a href="https://media.defense.gov/2025/Dec/23/2003849070/-1/-1/1/ANNUAL-REPORT-TO-CONGRESS-MILITARY-AND-SECURITY-DEVELOPMENTS-INVOLVING-THE-PEOPLES-REPUBLIC-OF-CHINA-2025.PDF" target="_blank" rel="noopener">Pentagon 2025 Report</a>: annual report on PRC military and security developments</li>
  <li><a href="https://www.ui.se/globalassets/ui.se-eng/publications/other-publications/is-a-conflict-over-taiwan-drawing-near-a-review-of-available-forecasts-and-scenarios.pdf" target="_blank" rel="noopener">UI Sweden (2024)</a>: &ldquo;Is a conflict over Taiwan drawing near?&rdquo; &mdash; review of available forecasts</li>
  <li><a href="https://forum.effectivealtruism.org/posts/qvzcmzPcR5mDEhqkz/an-invasion-of-taiwan-is-uncomfortably-likely-potentially" target="_blank" rel="noopener">EA Forum (2023)</a>: &ldquo;An invasion of Taiwan is uncomfortably likely&rdquo;</li>
  <li><a href="https://correlatesofwar.org/data-sets/mids/" target="_blank" rel="noopener">Correlates of War</a>: Militarized Interstate Disputes dataset (base rates)</li>
</ul>

<p><a href="{{{{asset:generate_market.py}}}}" target="_blank" rel="noopener">Download the Python script used for this analysis</a></p>
"""
    BACKGROUND_PATH.write_text(html, encoding="utf-8")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> int:
    DATA_DIR.mkdir(parents=True, exist_ok=True)

    print("Building monthly hazard curve...")
    hazard = build_monthly_hazard_curve(N_MONTHS)

    print("Computing CDF...")
    cdf = hazard_to_cdf(hazard)

    print("Validating against calibration anchors...")
    validation = validate_against_anchors(cdf, CALIBRATION_ANCHORS)
    for v in validation:
        status = "OK" if abs(v["error"]) < 0.05 else "WARN"
        print(f"  [{status}] {v['description']}: "
              f"target={v['target_prob']:.1%}, model={v['model_prob']:.1%}, "
              f"error={v['error']:+.1%}")

    print("Writing market CSV...")
    write_market_csv(cdf)

    print("Writing summary JSON...")
    write_summary_json(cdf, hazard, validation)

    print("Generating plots...")
    plot_hazard_rate(hazard)
    plot_cdf(cdf, CALIBRATION_ANCHORS)
    plot_reference_classes()

    print("Writing background HTML...")
    write_background_html(cdf)

    print(f"\nOutputs:")
    print(f"  CSV:        {CSV_PATH}")
    print(f"  Summary:    {SUMMARY_PATH}")
    print(f"  Background: {BACKGROUND_PATH}")
    print(f"  Charts:     {HAZARD_PLOT_PATH.name}, {CDF_PLOT_PATH.name}, {REFERENCE_PLOT_PATH.name}")

    # Print key milestones
    print(f"\nKey cumulative probabilities:")
    for t_label, t_idx in [
        ("End 2026", 8),
        ("End 2027", 20),
        ("End 2029", 44),
        ("End 2034", 104),
        ("Mar 2036", 119),
        ("End 2036", 128),
    ]:
        print(f"  P(action by {t_label}) = {cdf[t_idx]:.1%}")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())
