#!/usr/bin/env python3
from __future__ import annotations

import csv
import json
import math
from dataclasses import dataclass
from pathlib import Path

import matplotlib
import numpy as np

matplotlib.use("Agg")
import matplotlib.pyplot as plt

REPO_ROOT = Path(__file__).resolve().parents[2]
DATA_DIR = REPO_ROOT / "data" / "sk-hynix-revenue"
SCRIPT_PATH = Path(__file__).resolve()
CSV_PATH = DATA_DIR / "sk_hynix_revenue_market.csv"
SUMMARY_PATH = DATA_DIR / "model_summary.json"
BACKGROUND_PATH = DATA_DIR / "background_info.html"
FORECAST_PLOT_PATH = DATA_DIR / "forecast_path.png"
REFERENCE_PLOT_PATH = DATA_DIR / "reference_classes.png"
LIVE_THRESHOLDS_PATH = DATA_DIR / "live_thresholds.json"

MARKET_CODE = "sk-hynix-rev"
MARKET_TITLE = "SK hynix annual revenue"
MARKET_BUDGET = 300
MARKET_DECAY_RATE = 0.005
PROBABILITY_DECIMALS = 3
N_THRESHOLDS = 50
SIM_DRAWS = 90000
RNG_SEED = 20260408

YEARS = list(range(2026, 2030))
FIRST_YEAR = YEARS[0]
LAST_YEAR = YEARS[-1]
YEAR_ENDS = {year: f"{year}-12-31T23:59:59Z" for year in YEARS}

# Units: KRW trillions. Latest SK hynix releases present K-IFRS revenue in KRW.
SK_HYNIX_REVENUE_TN = {
    2015: 18.798,
    2016: 17.198,
    2017: 30.109,
    2018: 40.445,
    2019: 26.991,
    2020: 31.900,
    2021: 42.998,
    2022: 44.622,
    2023: 32.7657,
    2024: 66.1930,
    2025: 97.1467,
}

MICRON_REVENUE_USD_BN = {
    2015: 16.192,
    2016: 12.399,
    2017: 20.322,
    2018: 30.391,
    2019: 23.406,
    2020: 21.435,
    2021: 27.705,
    2022: 30.758,
    2023: 15.540,
    2024: 25.111,
    2025: 37.378,
}

# Analyst expectations are incorporated through a partial-pooling layer.
# Each yearly latent analyst target is inferred from current published
# expectations and then pooled with the structural memory-cycle path using
# inverse-variance weights rather than treated as a fixed override.
ANALYST_EXPECTATIONS_TN = {
    2026: {
        "consensus_hana_fnguide_2026_02_23": 211.6896,
        "hana_house_2026_02_23": 229.3808,
    },
    2027: {
        "consensus_hana_fnguide_2026_02_23": 242.8982,
        "hana_house_2026_02_23": 286.7260,
    },
}
ANALYST_ERROR_PRIOR_LOG = {
    2026: 0.075,
    2027: 0.105,
}

LAST_FY_GROWTH_PCT = (SK_HYNIX_REVENUE_TN[2025] / SK_HYNIX_REVENUE_TN[2024] - 1.0) * 100.0
FOURTH_QUARTER_2025_TN = 32.8267
Q4_2025_ANNUALIZED_TN = FOURTH_QUARTER_2025_TN * 4.0
MARKET_END_DATE = f"{LAST_YEAR}-12-31T23:59:59Z"
MARKET_RESOLUTION_DATE = f"{LAST_YEAR + 1}-04-30T23:59:59Z"

OFFICIAL_SOURCE_LINKS = [
    (
        "SK hynix FY2025 results",
        "https://news.skhynix.com/sk-hynix-announces-fy25-financial-results/",
    ),
    (
        "SK hynix FY2024 results",
        "https://news.skhynix.com/sk-hynix-announces-4q24-financial-results/",
    ),
    (
        "SK hynix FY2022 results",
        "https://news.skhynix.com/sk-hynix-reports-2022-and-fourth-quarter-financial-results/",
    ),
    (
        "Micron FY2025 results",
        "https://investors.micron.com/news-releases/news-release-details/micron-technology-inc-reports-results-fourth-quarter-and-full-8",
    ),
    (
        "Hana Securities sector report with SK hynix consensus and house estimates",
        "https://www.hanaw.com/download/research/FileServer/WEB/industry/industry/2026/02/23/Semi_260224.pdf",
    ),
    (
        "Korea JoongAng Daily on FnGuide 1Q26 consensus",
        "https://koreajoongangdaily.joins.com/news/2026-04-06/business/industry/Samsung-SK-hynix-to-post-record-Q1-profits-with-blockbuster-earnings-still-to-come/2560747",
    ),
    (
        "Nature (1996): scaling behaviour in the growth of companies",
        "https://www.nature.com/articles/379804a0",
    ),
    (
        "Bottazzi & Secchi (2006): explaining the distribution of firm growth rates",
        "https://ideas.repec.org/a/taf/teurst/v40y2006i2p235-256.html",
    ),
]


@dataclass(frozen=True)
class GrowthObservation:
    company: str
    start_year: int
    end_year: int
    start_revenue: float
    end_revenue: float
    growth_log: float
    growth_pct: float


def clamp(value: float, low: float, high: float) -> float:
    return min(high, max(low, value))


def standardized_student_t(rng: np.random.Generator, df: int) -> float:
    scale = math.sqrt(df / (df - 2))
    return float(rng.standard_t(df) / scale)


def percentile(values: np.ndarray, q: float) -> float:
    return float(np.percentile(values, q))


def tn_label(value_tn: float) -> str:
    if value_tn >= 100.0:
        return f"₩{value_tn:,.0f}tn"
    if value_tn >= 10.0:
        return f"₩{value_tn:,.1f}tn".replace(".0tn", "tn")
    return f"₩{value_tn:,.2f}tn".replace(".00tn", "tn")


def axis_label(value_tn: float) -> str:
    if value_tn >= 100.0:
        return f"{value_tn:,.0f}"
    if value_tn >= 10.0:
        return f"{value_tn:,.1f}".replace(".0", "")
    return f"{value_tn:,.2f}".replace(".00", "")


def nice_round(value: float) -> float:
    if value < 20:
        step = 1
    elif value < 50:
        step = 2
    elif value < 100:
        step = 5
    elif value < 250:
        step = 5
    elif value < 500:
        step = 10
    elif value < 1000:
        step = 20
    else:
        step = 50
    rounded = round(value / step) * step
    return float(max(step, rounded))


def build_thresholds(samples: np.ndarray, n_thresholds: int) -> list[float]:
    low = max(20.0, percentile(samples, 0.4) * 0.82)
    high = percentile(samples, 99.6) * 1.08
    raw = np.geomspace(low, high, 220)
    unique_sorted = sorted({nice_round(float(x)) for x in raw})

    while len(unique_sorted) < n_thresholds:
        high *= 1.10
        raw = np.geomspace(low, high, 320)
        unique_sorted = sorted({nice_round(float(x)) for x in raw})

    picks = np.linspace(0, len(unique_sorted) - 1, n_thresholds)
    thresholds = [unique_sorted[int(round(idx))] for idx in picks]
    deduped: list[float] = []
    for threshold in thresholds:
        if deduped and threshold <= deduped[-1]:
            threshold = nice_round(deduped[-1] * 1.035)
            if threshold <= deduped[-1]:
                threshold = deduped[-1] + 1.0
        deduped.append(float(threshold))
    return deduped[:n_thresholds]


def load_live_thresholds() -> dict[int, list[float]]:
    if not LIVE_THRESHOLDS_PATH.exists():
        return {}
    payload = json.loads(LIVE_THRESHOLDS_PATH.read_text(encoding="utf-8"))
    if not isinstance(payload, dict):
        return {}

    ladders: dict[int, list[float]] = {}
    for year_text, values in payload.items():
        try:
            year = int(year_text)
        except (TypeError, ValueError):
            continue
        if not isinstance(values, list):
            continue
        cleaned = sorted({float(v) for v in values if float(v) > 0})
        if cleaned:
            ladders[year] = cleaned
    return ladders


def compute_growth_observations() -> dict[str, list[GrowthObservation]]:
    rows: dict[str, list[GrowthObservation]] = {"sk_hynix": [], "micron": []}
    for company, series, bucket in [
        ("SK hynix", SK_HYNIX_REVENUE_TN, "sk_hynix"),
        ("Micron", MICRON_REVENUE_USD_BN, "micron"),
    ]:
        years = sorted(series)
        for start_year, end_year in zip(years, years[1:]):
            start_revenue = series[start_year]
            end_revenue = series[end_year]
            rows[bucket].append(
                GrowthObservation(
                    company=company,
                    start_year=start_year,
                    end_year=end_year,
                    start_revenue=start_revenue,
                    end_revenue=end_revenue,
                    growth_log=math.log(end_revenue / start_revenue),
                    growth_pct=(end_revenue / start_revenue - 1.0) * 100.0,
                )
            )
    return rows


def analyst_partial_pool(
    year: int,
    horizon: int,
    structural_revenue: float,
    rng: np.random.Generator,
) -> tuple[float, dict[str, float]] | None:
    estimates = ANALYST_EXPECTATIONS_TN.get(year)
    if not estimates:
        return None

    estimate_values = np.array(list(estimates.values()), dtype=float)
    estimate_logs = np.log(estimate_values)
    analyst_center_log = float(np.mean(estimate_logs))
    analyst_dispersion_log = float(np.std(estimate_logs, ddof=1)) if len(estimate_logs) > 1 else 0.0

    # Guidance/consensus should dominate near-term when it has stronger
    # predictive power than the structural cycle simulation.
    base_analyst_error_log = ANALYST_ERROR_PRIOR_LOG.get(year, 0.075 + 0.030 * horizon)
    analyst_signal_sd = math.sqrt(base_analyst_error_log**2 + analyst_dispersion_log**2)
    structural_signal_sd = 0.24 + 0.05 * horizon

    analyst_draw_log = rng.normal(analyst_center_log, analyst_signal_sd)
    structural_log = math.log(structural_revenue)

    analyst_precision = 1.0 / (analyst_signal_sd**2)
    structural_precision = 1.0 / (structural_signal_sd**2)
    pooled_precision = analyst_precision + structural_precision
    pooled_mean_log = (
        analyst_precision * analyst_draw_log + structural_precision * structural_log
    ) / pooled_precision
    pooled_sd_log = math.sqrt(1.0 / pooled_precision)

    pooled_revenue = float(math.exp(rng.normal(pooled_mean_log, pooled_sd_log)))
    diagnostics = {
        "analyst_center_tn": float(math.exp(analyst_center_log)),
        "analyst_dispersion_log": analyst_dispersion_log,
        "base_analyst_error_log": base_analyst_error_log,
        "analyst_signal_sd_log": analyst_signal_sd,
        "structural_signal_sd_log": structural_signal_sd,
        "pooled_weight_analyst": analyst_precision / pooled_precision,
        "pooled_weight_structural": structural_precision / pooled_precision,
    }
    return pooled_revenue, diagnostics


def simulate_paths() -> tuple[np.ndarray, dict[str, float]]:
    rng = np.random.default_rng(RNG_SEED)
    samples = np.zeros((SIM_DRAWS, len(YEARS)))
    growths = compute_growth_observations()
    sk_logs = np.array([row.growth_log for row in growths["sk_hynix"]], dtype=float)
    micron_logs = np.array([row.growth_log for row in growths["micron"]], dtype=float)
    all_memory_logs = np.concatenate([sk_logs, micron_logs])

    memory_mu = float(np.mean(all_memory_logs))
    memory_sd = float(np.std(all_memory_logs, ddof=1))
    sk_cycle_sd = float(np.std(sk_logs, ddof=1))
    boom_mean = float(np.mean([x for x in all_memory_logs if x > 0.20]))
    bust_mean = float(np.mean([x for x in all_memory_logs if x < -0.20]))
    pooled_weight_records: dict[int, list[float]] = {year: [] for year in ANALYST_EXPECTATIONS_TN}
    analyst_center_records: dict[int, list[float]] = {year: [] for year in ANALYST_EXPECTATIONS_TN}
    analyst_signal_sd_records: dict[int, list[float]] = {year: [] for year in ANALYST_EXPECTATIONS_TN}
    structural_signal_sd_records: dict[int, list[float]] = {year: [] for year in ANALYST_EXPECTATIONS_TN}

    for draw_idx in range(SIM_DRAWS):
        revenue_prev = SK_HYNIX_REVENUE_TN[2025]
        long_run_growth = rng.normal(math.log1p(0.030), 0.050)
        memory_cycle = rng.normal(math.log1p(0.13), 0.16)
        hbm_uplift0 = max(0.0, rng.normal(math.log1p(0.16), 0.13))
        hbm_decay = max(0.18, rng.normal(0.38, 0.10))
        company_decay = max(0.35, rng.normal(0.58, 0.14))
        prev_shock = 0.0
        downturn_hit = False

        for year_idx, year in enumerate(YEARS):
            horizon = year - YEARS[0]
            if horizon > 0:
                memory_cycle = 0.50 * memory_cycle + rng.normal(-0.020, 0.15)
                downturn_probability = clamp(0.06 + 0.030 * horizon, 0.06, 0.26)
                if not downturn_hit and rng.random() < downturn_probability:
                    memory_cycle += rng.normal(-0.36, 0.16)
                    downturn_hit = True
                elif downturn_hit:
                    memory_cycle += rng.normal(0.04, 0.12)

            hbm_uplift = hbm_uplift0 * math.exp(-hbm_decay * horizon)
            q4_run_rate_pull = math.log(Q4_2025_ANNUALIZED_TN / SK_HYNIX_REVENUE_TN[2025]) * math.exp(-company_decay * horizon)
            size_sigma = max(0.12, 0.28 * ((max(revenue_prev, 40.0) / 100.0) ** -0.18))
            shock = standardized_student_t(rng, df=5) * size_sigma
            shock_persistence = 0.13 * prev_shock if horizon > 0 else 0.0
            saturation_drag = -0.18 * max(0.0, math.log(revenue_prev / 240.0))

            mean_growth = long_run_growth + memory_cycle + hbm_uplift + q4_run_rate_pull + shock_persistence + saturation_drag
            realized_growth = clamp(mean_growth + shock, math.log(0.45), math.log(2.10))
            raw_revenue = revenue_prev * math.exp(realized_growth)

            pooled = analyst_partial_pool(year, horizon, raw_revenue, rng)
            if pooled is not None:
                revenue_next, pooled_diag = pooled
                pooled_weight_records[year].append(pooled_diag["pooled_weight_analyst"])
                analyst_center_records[year].append(pooled_diag["analyst_center_tn"])
                analyst_signal_sd_records[year].append(pooled_diag["analyst_signal_sd_log"])
                structural_signal_sd_records[year].append(pooled_diag["structural_signal_sd_log"])
            else:
                revenue_next = raw_revenue

            samples[draw_idx, year_idx] = revenue_next
            revenue_prev = revenue_next
            prev_shock = shock

    diagnostics = {
        "sk_hynix_mean_growth_pct": (math.exp(float(np.mean(sk_logs))) - 1.0) * 100.0,
        "sk_hynix_sd_log_growth": sk_cycle_sd,
        "micron_mean_growth_pct": (math.exp(float(np.mean(micron_logs))) - 1.0) * 100.0,
        "memory_mean_growth_pct": (math.exp(memory_mu) - 1.0) * 100.0,
        "memory_sd_log_growth": memory_sd,
        "memory_boom_mean_growth_pct": (math.exp(boom_mean) - 1.0) * 100.0,
        "memory_bust_mean_growth_pct": (math.exp(bust_mean) - 1.0) * 100.0,
        "q4_2025_annualized_tn": Q4_2025_ANNUALIZED_TN,
    }
    for year in sorted(ANALYST_EXPECTATIONS_TN):
        diagnostics[f"analyst_center_{year}_tn"] = float(np.mean(analyst_center_records[year]))
        diagnostics[f"pooled_weight_analyst_{year}"] = float(np.mean(pooled_weight_records[year]))
        diagnostics[f"analyst_signal_sd_log_{year}"] = float(np.mean(analyst_signal_sd_records[year]))
        diagnostics[f"structural_signal_sd_log_{year}"] = float(np.mean(structural_signal_sd_records[year]))
        diagnostics[f"analyst_error_prior_log_{year}"] = ANALYST_ERROR_PRIOR_LOG[year]
    return samples, diagnostics


def summarize_samples(samples: np.ndarray) -> dict[int, dict[str, float | list[float]]]:
    live_thresholds = load_live_thresholds()
    summary: dict[int, dict[str, float | list[float]]] = {}
    for idx, year in enumerate(YEARS):
        year_samples = samples[:, idx]
        thresholds = live_thresholds.get(year) or build_thresholds(year_samples, N_THRESHOLDS)
        summary[year] = {
            "mean_tn": float(np.mean(year_samples)),
            "median_tn": float(np.median(year_samples)),
            "p05_tn": percentile(year_samples, 5),
            "p10_tn": percentile(year_samples, 10),
            "p25_tn": percentile(year_samples, 25),
            "p75_tn": percentile(year_samples, 75),
            "p90_tn": percentile(year_samples, 90),
            "p95_tn": percentile(year_samples, 95),
            "thresholds_tn": thresholds,
        }
    return summary


def survival_probabilities(samples: np.ndarray, thresholds: list[float]) -> list[float]:
    n = len(samples)
    probabilities: list[float] = []
    prev = 0.999
    for threshold in thresholds:
        probability = sum(1 for value in samples if value >= threshold) / n
        probability = clamp(round(probability, PROBABILITY_DECIMALS), 0.001, 0.999)
        probability = min(prev, probability)
        probabilities.append(probability)
        prev = probability
    return probabilities


def write_market_csv(samples: np.ndarray, summary: dict[int, dict[str, float | list[float]]], diagnostics: dict[str, float]) -> None:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    with CSV_PATH.open("w", newline="", encoding="utf-8") as handle:
        handle.write(f"# market_code: {MARKET_CODE}\n")
        handle.write(f"# market_title: {MARKET_TITLE}\n")
        handle.write("# market_type: count\n")
        handle.write("# market_visibility: public\n")
        handle.write("# market_status: draft\n")
        handle.write(f"# market_budget: {MARKET_BUDGET}\n")
        handle.write(f"# market_decay_rate: {MARKET_DECAY_RATE:.3f}\n")
        handle.write(
            "# market_resolution_criteria: Each yearly projection group resolves to SK hynix Inc.'s consolidated annual revenue "
            f"for that fiscal year (FY{FIRST_YEAR} through FY{LAST_YEAR}), on a non-cumulative basis, in KRW trillions, from the company's "
            "audited annual report, official full-year financial results release, or K-IFRS financial statements. If SK hynix "
            "materially changes reporting structure, use the most directly comparable consolidated revenue figure explicitly "
            "disclosed for that fiscal year. Do not convert from KRW into another currency for resolution.\n"
        )
        handle.write("# market_x_unit: ₩Xtn\n")
        handle.write("# market_number_format: decimal\n")
        handle.write(f"# market_end_date: {MARKET_END_DATE}\n")
        handle.write(f"# market_resolution_date: {MARKET_RESOLUTION_DATE}\n")
        handle.write("# market_cumulative: false\n")
        handle.write("# market_background_info_path: background_info.html\n")
        handle.write(
            '# market_svelte_params: {"scaleType":"log","scaleBase":1.10,"timeCadence":"yearly","countUnitDisplay":"inline","countAxisLabel":"Annual revenue (₩tn)","projectionEndLabelPrefix":"FY ends"}\n'
        )
        handle.write(
            "# generated_note: memory_revenue_model, draws="
            f"{SIM_DRAWS}, seed={RNG_SEED}, thresholds_per_year={N_THRESHOLDS}, "
            f"last_fy_growth_pct={LAST_FY_GROWTH_PCT:.1f}, memory_mean_growth_pct={diagnostics['memory_mean_growth_pct']:.1f}, "
            f"memory_sd_log_growth={diagnostics['memory_sd_log_growth']:.3f}, analyst_center_2026_tn={diagnostics['analyst_center_2026_tn']:.1f}, "
            f"pooled_weight_analyst_2026={diagnostics['pooled_weight_analyst_2026']:.2f}, "
            f"pooled_weight_analyst_2027={diagnostics['pooled_weight_analyst_2027']:.2f}\n"
        )
        for year in YEARS:
            item = summary[year]
            handle.write(f"# generated_median_{year}: {item['median_tn']:.1f}\n")
            handle.write(f"# generated_interval90_{year}: [{item['p10_tn']:.1f},{item['p90_tn']:.1f}]\n")
        handle.write("\n")

        writer = csv.writer(handle)
        writer.writerow(
            [
                "projection_group",
                "threshold_decimal",
                "threshold_date",
                "initial_probability",
                "label",
                "end_date",
                "decay_rate",
                "budget_allocation",
                "status",
            ]
        )

        for idx, year in enumerate(YEARS):
            year_end = YEAR_ENDS[year]
            thresholds_tn = summary[year]["thresholds_tn"]
            probabilities = survival_probabilities(samples[:, idx], thresholds_tn)
            for threshold_tn, probability in zip(thresholds_tn, probabilities):
                writer.writerow(
                    [
                        year_end,
                        f"{threshold_tn:.3f}".rstrip("0").rstrip("."),
                        "",
                        f"{probability:.3f}",
                        f"FY{year} revenue",
                        year_end,
                        f"{MARKET_DECAY_RATE:.3f}",
                        "",
                        "open",
                    ]
                )


def write_summary_json(summary: dict[int, dict[str, float | list[float]]], diagnostics: dict[str, float]) -> None:
    growths = compute_growth_observations()
    payload = {
        "seed": RNG_SEED,
        "draws": SIM_DRAWS,
        "units": "KRW trillions for SK hynix revenue thresholds",
        "historical_revenue_tn": SK_HYNIX_REVENUE_TN,
        "last_financial_year_growth_pct": round(LAST_FY_GROWTH_PCT, 2),
        "q4_2025_revenue_tn": FOURTH_QUARTER_2025_TN,
        "q4_2025_annualized_tn": round(Q4_2025_ANNUALIZED_TN, 2),
        "analyst_expectations_tn": ANALYST_EXPECTATIONS_TN,
        "analyst_error_prior_log": ANALYST_ERROR_PRIOR_LOG,
        "reference_classes": {
            "sk_hynix_cycle_observations": [row.__dict__ for row in growths["sk_hynix"]],
            "micron_cycle_observations": [row.__dict__ for row in growths["micron"]],
            "diagnostics": diagnostics,
        },
        "yearly_forecast_tn": {
            str(year): {
                "mean_tn": round(float(item["mean_tn"]), 1),
                "median_tn": round(float(item["median_tn"]), 1),
                "p05_tn": round(float(item["p05_tn"]), 1),
                "p10_tn": round(float(item["p10_tn"]), 1),
                "p25_tn": round(float(item["p25_tn"]), 1),
                "p75_tn": round(float(item["p75_tn"]), 1),
                "p90_tn": round(float(item["p90_tn"]), 1),
                "p95_tn": round(float(item["p95_tn"]), 1),
                "thresholds_tn": item["thresholds_tn"],
            }
            for year, item in summary.items()
        },
        "sources": [{"label": label, "url": url} for label, url in OFFICIAL_SOURCE_LINKS],
    }
    SUMMARY_PATH.write_text(json.dumps(payload, indent=2), encoding="utf-8")


def plot_forecast(summary: dict[int, dict[str, float | list[float]]]) -> None:
    years = sorted(SK_HYNIX_REVENUE_TN) + YEARS
    historical = [SK_HYNIX_REVENUE_TN[year] for year in sorted(SK_HYNIX_REVENUE_TN)]
    medians = historical + [summary[year]["median_tn"] for year in YEARS]

    fig, ax = plt.subplots(figsize=(11, 6))
    ax.plot(years, medians, color="#0f766e", lw=2.5, marker="o", label="Median revenue")
    ax.scatter(sorted(SK_HYNIX_REVENUE_TN), historical, color="#111827", zorder=4, label="Reported revenue")
    ax.fill_between(YEARS, [summary[y]["p10_tn"] for y in YEARS], [summary[y]["p90_tn"] for y in YEARS], color="#14b8a6", alpha=0.18, label="80% interval")
    ax.fill_between(YEARS, [summary[y]["p25_tn"] for y in YEARS], [summary[y]["p75_tn"] for y in YEARS], color="#14b8a6", alpha=0.35, label="50% interval")
    analyst_years = sorted(ANALYST_EXPECTATIONS_TN)
    analyst_centers = [
        float(np.mean(list(ANALYST_EXPECTATIONS_TN[year].values()))) for year in analyst_years
    ]
    ax.plot(
        analyst_years,
        analyst_centers,
        color="#f97316",
        ls="--",
        marker="s",
        lw=1.7,
        label="Pooled analyst center",
    )

    ax.set_yscale("log")
    ax.set_title("SK hynix annual revenue model: history and forecast bands")
    ax.set_ylabel("Revenue (KRW tn, log scale)")
    ax.set_xlabel("Fiscal year")
    ax.grid(alpha=0.25, which="both")
    ax.legend(frameon=False)

    yticks = [20, 30, 50, 75, 100, 150, 200, 300, 500, 750]
    ax.set_yticks(yticks)
    ax.set_yticklabels([axis_label(v) for v in yticks])
    fig.tight_layout()
    fig.savefig(FORECAST_PLOT_PATH, dpi=180)
    plt.close(fig)


def plot_reference_classes(summary: dict[int, dict[str, float | list[float]]], diagnostics: dict[str, float]) -> None:
    growths = compute_growth_observations()
    fig, axes = plt.subplots(1, 2, figsize=(13, 5.5))

    for bucket, color, marker in [("sk_hynix", "#0f766e", "o"), ("micron", "#2563eb", "s")]:
        rows = growths[bucket]
        axes[0].scatter(
            [row.start_revenue for row in rows],
            [row.growth_pct for row in rows],
            color=color,
            marker=marker,
            alpha=0.80,
            label=rows[0].company,
        )
    axes[0].axhline(0, color="#111827", lw=1.0, alpha=0.55)
    axes[0].set_xscale("log")
    axes[0].set_xlabel("Starting revenue (company native currency, log scale)")
    axes[0].set_ylabel("Next-year revenue growth (%)")
    axes[0].set_title("Memory-cycle reference class")
    axes[0].grid(alpha=0.25)
    axes[0].legend(frameon=False, fontsize=9)

    median_growth_pct = []
    p10_growth_pct = []
    p90_growth_pct = []
    for year in YEARS:
        prev_median = SK_HYNIX_REVENUE_TN[2025] if year == YEARS[0] else summary[year - 1]["median_tn"]
        median_growth_pct.append((summary[year]["median_tn"] / prev_median - 1.0) * 100.0)
        prev_p10 = SK_HYNIX_REVENUE_TN[2025] if year == YEARS[0] else summary[year - 1]["p10_tn"]
        prev_p90 = SK_HYNIX_REVENUE_TN[2025] if year == YEARS[0] else summary[year - 1]["p90_tn"]
        p10_growth_pct.append((summary[year]["p10_tn"] / prev_p10 - 1.0) * 100.0)
        p90_growth_pct.append((summary[year]["p90_tn"] / prev_p90 - 1.0) * 100.0)

    axes[1].plot(YEARS, median_growth_pct, color="#0f766e", marker="o", lw=2.2, label="Median modeled YoY growth")
    axes[1].fill_between(YEARS, p10_growth_pct, p90_growth_pct, color="#14b8a6", alpha=0.16, label="P10/P90 growth band")
    axes[1].axhline(LAST_FY_GROWTH_PCT, color="#111827", ls="--", lw=1.2, alpha=0.7, label="FY2025 reported YoY growth")
    axes[1].axhline(diagnostics["memory_mean_growth_pct"], color="#2563eb", ls=":", lw=2.0, label="Memory-cycle average")
    axes[1].set_title("Modeled growth path after cycle shrinkage")
    axes[1].set_xlabel("Fiscal year")
    axes[1].set_ylabel("Growth (%)")
    axes[1].grid(alpha=0.25)
    axes[1].legend(frameon=False, fontsize=9)

    fig.tight_layout()
    fig.savefig(REFERENCE_PLOT_PATH, dpi=180)
    plt.close(fig)


def render_forecast_table(summary: dict[int, dict[str, float | list[float]]]) -> str:
    rows = []
    for year in YEARS:
        item = summary[year]
        rows.append(
            "<tr>"
            f"<td>FY{year}</td>"
            f"<td>{tn_label(float(item['median_tn']))}</td>"
            f"<td>{tn_label(float(item['p10_tn']))}</td>"
            f"<td>{tn_label(float(item['p90_tn']))}</td>"
            "</tr>"
        )
    return "\n".join(rows)


def write_background_html(summary: dict[int, dict[str, float | list[float]]], diagnostics: dict[str, float]) -> None:
    growths = compute_growth_observations()
    sk_obs = len(growths["sk_hynix"])
    micron_obs = len(growths["micron"])
    html = f"""<h3>Calculation of starting probabilities</h3>
<p>
The starting probabilities of this market were calculated by combining SK hynix's own revenue history, a memory-chip peer cycle, and near-term AI-memory demand signals.
The market resolves in Korean won because SK hynix's official K-IFRS releases report consolidated revenue in KRW; converting to USD would add exchange-rate noise that is not part of the question.
</p>

<p>
<strong>Latest completed fiscal-year anchor:</strong> SK hynix reported <strong>KRW {SK_HYNIX_REVENUE_TN[2025]:,.1f}tn</strong> of FY2025 revenue,
up <strong>{LAST_FY_GROWTH_PCT:.0f}% year over year</strong> from <strong>KRW {SK_HYNIX_REVENUE_TN[2024]:,.1f}tn</strong> in FY2024.
Its 4Q25 revenue was <strong>KRW {FOURTH_QUARTER_2025_TN:,.1f}tn</strong>, equal to a simple annualized run rate of about <strong>KRW {Q4_2025_ANNUALIZED_TN:,.1f}tn</strong>.
The company also said HBM revenue more than doubled year over year and that it had begun large-scale HBM4 production to meet customer requests.
</p>

<h4>Key charts</h4>
<p><img src="{{{{asset:forecast_path.png}}}}" alt="SK hynix annual revenue forecast bands" width="960" /></p>
<p><img src="{{{{asset:reference_classes.png}}}}" alt="SK hynix memory-cycle reference classes" width="960" /></p>

<h4>Reference classes used</h4>
<ul>
  <li><strong>SK hynix's own cycle:</strong> {sk_obs} year-to-year observations from 2015-2025. This captures the company's direct exposure to memory booms and downturns, including the 2023 trough and the 2024-2025 AI-memory rebound.</li>
  <li><strong>Memory peer cycle:</strong> {micron_obs} Micron year-to-year observations from FY2015-FY2025. Micron is not identical to SK hynix, but it faces similar DRAM/NAND pricing and capacity cycles, so it helps avoid treating one company history as the whole reference class.</li>
  <li><strong>Analyst expectations with partial pooling:</strong> current published estimates are treated as noisy observations rather than ground truth. For FY2026 and FY2027, the model uses Hana Securities' quoted FnGuide consensus of about <strong>KRW {ANALYST_EXPECTATIONS_TN[2026]['consensus_hana_fnguide_2026_02_23']:,.1f}tn</strong> and <strong>KRW {ANALYST_EXPECTATIONS_TN[2027]['consensus_hana_fnguide_2026_02_23']:,.1f}tn</strong>, alongside Hana's own higher house view of <strong>KRW {ANALYST_EXPECTATIONS_TN[2026]['hana_house_2026_02_23']:,.1f}tn</strong> and <strong>KRW {ANALYST_EXPECTATIONS_TN[2027]['hana_house_2026_02_23']:,.1f}tn</strong>.</li>
  <li><strong>General company-growth evidence:</strong> the simulation uses fat-tailed shocks and lower percentage volatility at larger scale, consistent with empirical findings that company growth rates are noisy, heavy-tailed, and less volatile for larger firms.</li>
</ul>

<h4>How overconfidence is reduced</h4>
<p>
Some observations overlap conceptually: SK hynix and Micron both sit inside the same memory cycle, and the near-term analyst path already reflects recent HBM demand.
The model therefore does not count these as independent evidence streams. It gives the SK hynix and Micron cycle histories a shared memory-cycle layer, and then partially pools the structural path with analyst expectations using inverse-variance weights rather than snapping to a broker target.
Near-term outside-view guidance error priors are set tighter than structural cycle error (about <strong>{diagnostics['analyst_error_prior_log_2026']:.3f}</strong> log-sd in FY2026 and <strong>{diagnostics['analyst_error_prior_log_2027']:.3f}</strong> in FY2027), so guidance gets more weight where its predictive power is stronger. In the current fit, the analyst layer gets an average weight of about <strong>{diagnostics['pooled_weight_analyst_2026']:.0%}</strong> in FY2026 and <strong>{diagnostics['pooled_weight_analyst_2027']:.0%}</strong> in FY2027.
The fitted memory-cycle sample has an average annual log-growth equivalent of about <strong>{diagnostics['memory_mean_growth_pct']:.0f}%</strong>, a log-growth standard deviation of <strong>{diagnostics['memory_sd_log_growth']:.2f}</strong>, average boom observations around <strong>{diagnostics['memory_boom_mean_growth_pct']:.0f}%</strong>, and average bust observations around <strong>{diagnostics['memory_bust_mean_growth_pct']:.0f}%</strong>.
</p>

<h4>Most influential historical examples</h4>
<table>
  <thead>
    <tr>
      <th>Example</th>
      <th>Revenue change</th>
      <th>Why it matters</th>
      <th>Context</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>SK hynix FY2025</td>
      <td>KRW 66.2tn to KRW 97.1tn, +47%</td>
      <td>Latest anchor; HBM revenue more than doubled.</td>
      <td><a href="https://news.skhynix.com/sk-hynix-announces-fy25-financial-results/" target="_blank" rel="noopener">FY2025 results</a></td>
    </tr>
    <tr>
      <td>SK hynix FY2024</td>
      <td>KRW 32.8tn to KRW 66.2tn, +102%</td>
      <td>Shows how quickly memory revenue can rebound from a trough.</td>
      <td><a href="https://news.skhynix.com/sk-hynix-announces-4q24-financial-results/" target="_blank" rel="noopener">FY2024 results</a></td>
    </tr>
    <tr>
      <td>SK hynix FY2023</td>
      <td>KRW 44.6tn to KRW 32.8tn, -27%</td>
      <td>Downturn example that prevents straight-line AI extrapolation.</td>
      <td><a href="https://news.skhynix.com/sk-hynix-announces-4q24-financial-results/" target="_blank" rel="noopener">FY2024 comparative table</a></td>
    </tr>
    <tr>
      <td>Micron FY2025</td>
      <td>$25.1bn to $37.4bn, +49%</td>
      <td>Independent memory peer experiencing the same AI-led upcycle.</td>
      <td><a href="https://investors.micron.com/news-releases/news-release-details/micron-technology-inc-reports-results-fourth-quarter-and-full-8" target="_blank" rel="noopener">Micron FY2025 results</a></td>
    </tr>
    <tr>
      <td>Micron FY2023</td>
      <td>$30.8bn to $15.5bn, -49%</td>
      <td>Large peer-cycle bust used to keep downside tails wide.</td>
      <td><a href="https://investors.micron.com/news-releases/news-release-details/micron-technology-inc-reports-results-fourth-quarter-and-full-8" target="_blank" rel="noopener">Micron comparative figures</a></td>
    </tr>
  </tbody>
</table>

<h4>Model structure</h4>
<ol>
  <li><strong>Current company layer:</strong> starts from FY2025 revenue and 4Q25 run rate, then applies a decaying HBM uplift.</li>
  <li><strong>Memory-cycle layer:</strong> draws from SK hynix and Micron historical boom and bust behavior, with persistent cycle shocks.</li>
  <li><strong>Near-term analyst layer:</strong> infers a latent analyst target for FY2026-FY2027 from current consensus and house estimates, then partially pools that target with the structural path using precision weights.</li>
  <li><strong>Simulation:</strong> {SIM_DRAWS:,} Monte Carlo paths produce yearly revenue samples. Each year's market ladder is <code>P(revenue &gt;= threshold)</code> at {N_THRESHOLDS} friendly-rounded KRW trillion thresholds.</li>
</ol>

<h4>Forecast summary</h4>
<table>
  <thead>
    <tr>
      <th>Fiscal year</th>
      <th>Median</th>
      <th>P10</th>
      <th>P90</th>
    </tr>
  </thead>
  <tbody>
    {render_forecast_table(summary)}
  </tbody>
</table>

<h4>Source notes</h4>
<ul>
  <li><a href="https://news.skhynix.com/sk-hynix-announces-fy25-financial-results/" target="_blank" rel="noopener">SK hynix FY2025 results</a>: FY2025 revenue of KRW 97.1467tn, FY2024 comparison of KRW 66.1930tn, 4Q25 revenue of KRW 32.8267tn, and HBM/HBM4 commentary.</li>
  <li><a href="https://news.skhynix.com/sk-hynix-announces-4q24-financial-results/" target="_blank" rel="noopener">SK hynix FY2024 results</a>: FY2024 and FY2023 revenue history, plus HBM share of DRAM revenue in 4Q24.</li>
  <li><a href="https://news.skhynix.com/sk-hynix-reports-2022-and-fourth-quarter-financial-results/" target="_blank" rel="noopener">SK hynix FY2022 results</a>: direct context for the prior memory downturn.</li>
  <li><a href="https://investors.micron.com/news-releases/news-release-details/micron-technology-inc-reports-results-fourth-quarter-and-full-8" target="_blank" rel="noopener">Micron FY2025 results</a>: memory-peer cycle reference, including FY2025, FY2024, and FY2023 revenue figures.</li>
  <li><a href="https://www.hanaw.com/download/research/FileServer/WEB/industry/industry/2026/02/23/Semi_260224.pdf" target="_blank" rel="noopener">Hana Securities sector report</a>: quoted FnGuide consensus and Hana house estimates for SK hynix FY2026-FY2027 revenue.</li>
  <li><a href="https://koreajoongangdaily.joins.com/news/2026-04-06/business/industry/Samsung-SK-hynix-to-post-record-Q1-profits-with-blockbuster-earnings-still-to-come/2560747" target="_blank" rel="noopener">Korea JoongAng Daily on 1Q26 consensus</a>: contemporaneous FnGuide market consensus of KRW 46.6tn for SK hynix 1Q26 revenue.</li>
  <li><a href="https://www.nature.com/articles/379804a0" target="_blank" rel="noopener">Stanley et al. (1996)</a> and <a href="https://ideas.repec.org/a/taf/teurst/v40y2006i2p235-256.html" target="_blank" rel="noopener">Bottazzi &amp; Secchi (2006)</a>: motivation for fat-tailed growth shocks and size-sensitive volatility.</li>
</ul>

<h4>Data and reproducibility assets</h4>
<ul>
  <li><a href="{{{{asset:model_summary.json}}}}" target="_blank" rel="noopener">Download the model summary JSON</a></li>
  <li><a href="{{{{asset:forecast_path.png}}}}" target="_blank" rel="noopener">Download the forecast chart</a></li>
  <li><a href="{{{{asset:reference_classes.png}}}}" target="_blank" rel="noopener">Download the reference-class chart</a></li>
</ul>

<p><a href="{{{{asset:generate_market.py}}}}" target="_blank" rel="noopener">Download the Python script used for this upload</a></p>
"""
    BACKGROUND_PATH.write_text(html, encoding="utf-8")


def write_readme() -> None:
    readme = f"""# SK hynix annual revenue market package

This folder contains the staged market inputs for:

- `data/sk-hynix-revenue/sk_hynix_revenue_market.csv`

and the supporting background assets:

- `data/sk-hynix-revenue/background_info.html`
- `data/sk-hynix-revenue/model_summary.json`
- `data/sk-hynix-revenue/forecast_path.png`
- `data/sk-hynix-revenue/reference_classes.png`

The generator is:

- `scripts/sk-hynix-revenue/generate_market.py`

Regenerate everything with:

```bash
python3 scripts/sk-hynix-revenue/generate_market.py
```

Stage the shadow draft with:

```bash
AX_TOKEN=... python3 scripts/publish_market.py \\
  --csv data/sk-hynix-revenue/sk_hynix_revenue_market.csv \\
  --background-html data/sk-hynix-revenue/background_info.html \\
  --upload data/sk-hynix-revenue/model_summary.json \\
  --upload data/sk-hynix-revenue/forecast_path.png \\
  --upload data/sk-hynix-revenue/reference_classes.png \\
  --upload scripts/sk-hynix-revenue/generate_market.py \\
  --stage-only
```
"""
    (DATA_DIR / "README.md").write_text(readme, encoding="utf-8")


def main() -> int:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    samples, diagnostics = simulate_paths()
    summary = summarize_samples(samples)
    write_market_csv(samples, summary, diagnostics)
    write_summary_json(summary, diagnostics)
    plot_forecast(summary)
    plot_reference_classes(summary, diagnostics)
    write_background_html(summary, diagnostics)
    write_readme()

    print(f"Saved market CSV: {CSV_PATH}")
    print(f"Saved model summary: {SUMMARY_PATH}")
    print(f"Saved background HTML: {BACKGROUND_PATH}")
    print(f"Saved charts: {FORECAST_PLOT_PATH.name}, {REFERENCE_PLOT_PATH.name}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
