#!/usr/bin/env python3
from __future__ import annotations

import csv
import json
import math
from dataclasses import dataclass
from pathlib import Path

import matplotlib
import numpy as np

matplotlib.use("Agg")
import matplotlib.pyplot as plt

REPO_ROOT = Path(__file__).resolve().parents[2]
DATA_DIR = REPO_ROOT / "data" / "nebius-revenue"
SCRIPT_PATH = Path(__file__).resolve()
CSV_PATH = DATA_DIR / "nebius_revenue_market.csv"
SUMMARY_PATH = DATA_DIR / "model_summary.json"
BACKGROUND_PATH = DATA_DIR / "background_info.html"
FORECAST_PLOT_PATH = DATA_DIR / "forecast_path.png"
REFERENCE_PLOT_PATH = DATA_DIR / "reference_classes.png"

MARKET_CODE = "nebius-revenue"
MARKET_TITLE = "Nebius annual revenue (FY2026-FY2035)"
MARKET_BUDGET = 300
MARKET_DECAY_RATE = 0.005
PROBABILITY_DECIMALS = 3
N_THRESHOLDS = 50
SIM_DRAWS = 80000
RNG_SEED = 20260305

YEARS = list(range(2026, 2036))
YEAR_ENDS = {year: f"{year}-12-31T23:59:59Z" for year in YEARS}

HISTORICAL_REVENUE_M = {
    2024: 91.5,
    2025: 529.8,
}
LAST_FY_GROWTH_PCT = (HISTORICAL_REVENUE_M[2025] / HISTORICAL_REVENUE_M[2024] - 1.0) * 100.0

NEBIUS_START_ARR_2025_M = 1250.0


@dataclass(frozen=True)
class ComparableCompany:
    name: str
    bucket: str
    revenues_m: list[float]
    source_label: str
    source_url: str


COMPARABLES: list[ComparableCompany] = [
    ComparableCompany(
        name="Cloudflare",
        bucket="broad_cloud",
        revenues_m=[736.1, 975.2, 1296.7, 1669.6, 2167.6],
        source_label="Cloudflare FY2025 results / prior FY results",
        source_url="https://investor.cloudflare.com/news/news-details/2026/Cloudflare-Announces-Fourth-Quarter-and-Fiscal-Year-2025-Financial-Results/default.aspx",
    ),
    ComparableCompany(
        name="DigitalOcean",
        bucket="broad_cloud",
        revenues_m=[429.1, 576.3, 692.7, 781.0, 901.4],
        source_label="DigitalOcean FY2025 results / prior FY results",
        source_url="https://investors.digitalocean.com/news/news-details/2026/DigitalOcean-Reports-Fourth-Quarter-and-Full-Year-2025-Financial-Results/default.aspx",
    ),
    ComparableCompany(
        name="Datadog",
        bucket="broad_cloud",
        revenues_m=[1028.8, 1675.1, 2128.1, 2675.7],
        source_label="Datadog FY2024 results / prior FY results",
        source_url="https://investors.datadoghq.com/news-releases/news-release-details/datadog-announces-fourth-quarter-and-fiscal-year-2024-financial",
    ),
    ComparableCompany(
        name="Snowflake",
        bucket="broad_cloud",
        revenues_m=[1219.3, 2065.7, 2806.5, 3626.4],
        source_label="Snowflake FY2025 results / prior FY results",
        source_url="https://investors.snowflake.com/news/news-details/2025/Snowflake-Reports-Fourth-Quarter-and-Full-Year-Fiscal-2025-Financial-Results/default.aspx",
    ),
    ComparableCompany(
        name="CoreWeave",
        bucket="ai_infra",
        revenues_m=[15.8, 228.9, 1915.4, 5131.0],
        source_label="CoreWeave S-1 and FY2025 results",
        source_url="https://www.sec.gov/Archives/edgar/data/1839341/000119312525066762/d243754ds1.htm",
    ),
]

OFFICIAL_SOURCE_LINKS = [
    (
        "Nebius FY2025 results",
        "https://group.nebius.com/newsroom/nebius-group-reports-fourth-quarter-and-full-year-2025-financial-results",
    ),
    (
        "Nebius Q3 2025 shareholder letter",
        "https://assets.nebius.com/files/Q3-2025-shareholder-letter-Nebius.pdf",
    ),
    (
        "Nebius FY2024 results",
        "https://group.nebius.com/newsroom/nebius-group-reports-fourth-quarter-and-full-year-2024-financial-results",
    ),
    (
        "Nature (1996): scaling behaviour in the growth of companies",
        "https://www.nature.com/articles/379804a0",
    ),
    (
        "Bottazzi & Secchi (2006): explaining the distribution of firm growth rates",
        "https://ideas.repec.org/a/taf/teurst/v40y2006i2p235-256.html",
    ),
    (
        "World Bank: High-Growth Firms: Facts, Fiction, and Policy Implications",
        "https://documents.worldbank.org/en/publication/documents-reports/documentdetail/661271468126682795/high-growth-firms-facts-fiction-and-policy-implications-for-developing-countries",
    ),
]


def clamp(value: float, low: float, high: float) -> float:
    return min(high, max(low, value))


def standardized_student_t(rng: np.random.Generator, df: int) -> float:
    scale = math.sqrt(df / (df - 2))
    return float(rng.standard_t(df) / scale)


def percentile(values: np.ndarray, q: float) -> float:
    return float(np.percentile(values, q))


def bn_label(value_m: float) -> str:
    if value_m >= 1000.0:
        return f"${value_m / 1000.0:,.1f}bn".replace(".0bn", "bn")
    return f"${value_m:,.0f}m"


def axis_label(value_m: float) -> str:
    if value_m >= 1000.0:
        return f"${value_m / 1000.0:,.0f}bn" if value_m % 1000 == 0 else f"${value_m / 1000.0:,.1f}bn"
    return f"${value_m:,.0f}m"


def nice_round(value: float) -> int:
    if value < 100:
        step = 5
    elif value < 250:
        step = 10
    elif value < 500:
        step = 25
    elif value < 1000:
        step = 50
    elif value < 2500:
        step = 100
    elif value < 5000:
        step = 250
    elif value < 10000:
        step = 500
    elif value < 25000:
        step = 1000
    elif value < 50000:
        step = 2500
    else:
        step = 5000
    return max(step, int(round(value / step) * step))


def build_thresholds(samples: np.ndarray, n_thresholds: int) -> list[int]:
    low = max(100.0, percentile(samples, 0.5) * 0.80)
    high = percentile(samples, 99.7) * 1.05
    raw = np.geomspace(low, high, 180)
    unique_sorted = sorted({nice_round(float(x)) for x in raw})

    while len(unique_sorted) < n_thresholds:
        high *= 1.08
        raw = np.geomspace(low, high, 260)
        unique_sorted = sorted({nice_round(float(x)) for x in raw})

    picks = np.linspace(0, len(unique_sorted) - 1, n_thresholds)
    thresholds = [unique_sorted[int(round(idx))] for idx in picks]
    deduped: list[int] = []
    for threshold in thresholds:
        if deduped and threshold <= deduped[-1]:
            threshold = deduped[-1] + max(5, int(deduped[-1] * 0.02))
            threshold = nice_round(threshold)
            if threshold <= deduped[-1]:
                threshold = deduped[-1] + 5
        deduped.append(int(threshold))
    return deduped[:n_thresholds]


def compute_comparable_growths(companies: list[ComparableCompany]) -> dict[str, list[dict[str, float | str]]]:
    rows: dict[str, list[dict[str, float | str]]] = {"broad_cloud": [], "ai_infra": []}
    for company in companies:
        for start_rev, end_rev in zip(company.revenues_m, company.revenues_m[1:]):
            rows[company.bucket].append(
                {
                    "company": company.name,
                    "start_revenue_m": start_rev,
                    "end_revenue_m": end_rev,
                    "growth_log": math.log(end_rev / start_rev),
                    "growth_pct": (end_rev / start_rev - 1.0) * 100.0,
                }
            )
    return rows


def simulate_paths() -> tuple[np.ndarray, dict[str, float]]:
    rng = np.random.default_rng(RNG_SEED)
    samples = np.zeros((SIM_DRAWS, len(YEARS)))
    comparable_growths = compute_comparable_growths(COMPARABLES)

    broad_growth_logs = np.array([row["growth_log"] for row in comparable_growths["broad_cloud"]], dtype=float)
    ai_growth_logs = np.array([row["growth_log"] for row in comparable_growths["ai_infra"]], dtype=float)

    broad_mu = float(np.mean(broad_growth_logs))
    broad_sd = float(np.std(broad_growth_logs, ddof=1))
    ai_uplift_raw = float(np.mean(ai_growth_logs) - broad_mu)
    ai_uplift_shrunk = 0.35 * ai_uplift_raw

    for draw_idx in range(SIM_DRAWS):
        revenue_prev = HISTORICAL_REVENUE_M[2025]
        global_floor = rng.normal(math.log1p(0.07), 0.035)
        broad_sector = max(0.04, rng.normal(broad_mu, broad_sd * 0.45))
        ai_uplift0 = max(0.0, rng.normal(ai_uplift_shrunk, 0.14))
        ai_decay = max(0.45, rng.normal(0.70, 0.14))
        company_decay = max(0.60, rng.normal(1.00, 0.20))

        end_arr_2026 = clamp(float(rng.triangular(4500.0, 7000.0, 9000.0)), 2500.0, 10500.0)
        recognition_ratio = clamp(
            0.5 + 0.5 * NEBIUS_START_ARR_2025_M / end_arr_2026 + rng.normal(-0.13, 0.11),
            0.24,
            0.68,
        )
        execution_slippage = math.exp(rng.normal(-0.30, 0.35))
        company_anchor_2026 = end_arr_2026 * recognition_ratio * execution_slippage

        base_growth_2026 = global_floor + broad_sector + ai_uplift0
        company_excess0 = clamp(math.log(company_anchor_2026 / revenue_prev) - base_growth_2026, -0.50, 1.35)
        prev_shock = 0.0

        for year_idx, year in enumerate(YEARS):
            horizon = year - YEARS[0]
            ai_uplift = ai_uplift0 * math.exp(-ai_decay * horizon)
            company_excess = company_excess0 * math.exp(-company_decay * horizon)
            size_sigma = max(0.10, 0.27 * ((max(revenue_prev, 100.0) / 500.0) ** -0.20))
            shock = standardized_student_t(rng, df=5) * size_sigma
            shock_persistence = 0.10 * prev_shock if horizon > 0 else 0.0
            mean_growth = global_floor + broad_sector * math.exp(-0.18 * horizon) + ai_uplift + company_excess + shock_persistence
            realized_growth = clamp(mean_growth + shock, math.log(0.55), math.log(12.0))
            revenue_next = revenue_prev * math.exp(realized_growth)
            samples[draw_idx, year_idx] = revenue_next
            revenue_prev = revenue_next
            prev_shock = shock

    diagnostics = {
        "broad_cloud_mean_log_growth": broad_mu,
        "broad_cloud_sd_log_growth": broad_sd,
        "broad_cloud_mean_growth_pct": (math.exp(broad_mu) - 1.0) * 100.0,
        "ai_infra_mean_log_growth": float(np.mean(ai_growth_logs)),
        "ai_infra_mean_growth_pct": (math.exp(float(np.mean(ai_growth_logs))) - 1.0) * 100.0,
        "ai_infra_shrunk_uplift_log": ai_uplift_shrunk,
    }
    return samples, diagnostics


def summarize_samples(samples: np.ndarray) -> dict[int, dict[str, float | list[int]]]:
    summary: dict[int, dict[str, float | list[int]]] = {}
    for idx, year in enumerate(YEARS):
        year_samples = samples[:, idx]
        summary[year] = {
            "mean_m": float(np.mean(year_samples)),
            "median_m": float(np.median(year_samples)),
            "p05_m": percentile(year_samples, 5),
            "p10_m": percentile(year_samples, 10),
            "p25_m": percentile(year_samples, 25),
            "p75_m": percentile(year_samples, 75),
            "p90_m": percentile(year_samples, 90),
            "p95_m": percentile(year_samples, 95),
            "thresholds_m": build_thresholds(year_samples, N_THRESHOLDS),
        }
    return summary


def survival_probabilities(samples: np.ndarray, thresholds: list[int]) -> list[float]:
    n = len(samples)
    probabilities: list[float] = []
    prev = 0.999
    for threshold in thresholds:
        probability = sum(1 for value in samples if value >= threshold) / n
        probability = clamp(round(probability, PROBABILITY_DECIMALS), 0.001, 0.999)
        probability = min(prev, probability)
        probabilities.append(probability)
        prev = probability
    return probabilities


def write_market_csv(samples: np.ndarray, summary: dict[int, dict[str, float | list[int]]], diagnostics: dict[str, float]) -> None:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    with CSV_PATH.open("w", newline="", encoding="utf-8") as handle:
        handle.write(f"# market_code: {MARKET_CODE}\n")
        handle.write(f"# market_title: {MARKET_TITLE}\n")
        handle.write("# market_type: count\n")
        handle.write("# market_visibility: public\n")
        handle.write("# market_status: draft\n")
        handle.write(f"# market_budget: {MARKET_BUDGET}\n")
        handle.write(f"# market_decay_rate: {MARKET_DECAY_RATE:.3f}\n")
        handle.write(
            "# market_resolution_criteria: Each yearly projection group resolves to Nebius Group's consolidated annual revenue "
            "for that fiscal year (FY2026 through FY2035), on a non-cumulative basis, in USD millions (displayed as USD billions "
            "in chart labels), from the company's "
            "audited annual report, Form 20-F, or official full-year financial results release. If Nebius materially changes "
            "reporting structure, use the most directly comparable consolidated revenue-from-continuing-operations figure "
            "explicitly disclosed for that fiscal year.\n"
        )
        handle.write("# market_x_unit: $bn\n")
        handle.write("# market_number_format: decimal\n")
        handle.write("# market_end_date: 2035-12-31T23:59:59Z\n")
        handle.write("# market_resolution_date: 2036-04-30T23:59:59Z\n")
        handle.write("# market_cumulative: false\n")
        handle.write("# market_background_info_path: background_info.html\n")
        handle.write(
            '# market_svelte_params: {"scaleType":"log","scaleBase":1.10,"timeCadence":"yearly","countDisplayScale":0.001,"countUnitDisplay":"inline","countAxisLabel":"Annual revenue ($bn)","projectionEndLabelPrefix":"FY ends"}\n'
        )
        handle.write(
            "# generated_note: multilevel_revenue_model, draws="
            f"{SIM_DRAWS}, seed={RNG_SEED}, thresholds_per_year={N_THRESHOLDS}, "
            f"last_fy_growth_pct={LAST_FY_GROWTH_PCT:.1f}, broad_cloud_mean_growth_pct={diagnostics['broad_cloud_mean_growth_pct']:.1f}, "
            f"ai_infra_mean_growth_pct={diagnostics['ai_infra_mean_growth_pct']:.1f}, ai_uplift_shrunk_log={diagnostics['ai_infra_shrunk_uplift_log']:.3f}\n"
        )
        for year in YEARS:
            item = summary[year]
            handle.write(f"# generated_median_{year}: {item['median_m']:.1f}\n")
            handle.write(f"# generated_interval90_{year}: [{item['p10_m']:.1f},{item['p90_m']:.1f}]\n")
        handle.write("\n")

        writer = csv.writer(handle)
        writer.writerow(
            [
                "projection_group",
                "threshold_decimal",
                "threshold_date",
                "initial_probability",
                "label",
                "end_date",
                "decay_rate",
                "budget_allocation",
                "status",
            ]
        )

        total_submarkets = len(YEARS) * N_THRESHOLDS
        per_submarket_liquidity = MARKET_BUDGET / total_submarkets

        for idx, year in enumerate(YEARS):
            year_end = YEAR_ENDS[year]
            thresholds_m = summary[year]["thresholds_m"]
            probabilities = survival_probabilities(samples[:, idx], thresholds_m)
            for threshold_m, probability in zip(thresholds_m, probabilities):
                writer.writerow(
                    [
                        year_end,
                        threshold_m,
                        "",
                        f"{probability:.3f}",
                        f"FY{year} revenue >= {bn_label(float(threshold_m))}",
                        year_end,
                        f"{MARKET_DECAY_RATE:.3f}",
                        f"{per_submarket_liquidity:.3f}",
                        "open",
                    ]
                )


def write_summary_json(summary: dict[int, dict[str, float | list[int]]], diagnostics: dict[str, float]) -> None:
    comparable_growths = compute_comparable_growths(COMPARABLES)
    payload = {
        "seed": RNG_SEED,
        "draws": SIM_DRAWS,
        "historical_revenue_m": HISTORICAL_REVENUE_M,
        "last_financial_year_growth_pct": round(LAST_FY_GROWTH_PCT, 2),
        "nebius_specific_inputs": {
            "year_end_2025_arr_m": 1250.0,
            "guidance_end_2026_arr_m": [7000.0, 8000.0, 9000.0],
            "connected_power_target_2026_mw": [800.0, 1000.0],
            "contracted_power_gt_mw": 3000.0,
        },
        "reference_classes": {
            "broad_cloud_growth_observations": comparable_growths["broad_cloud"],
            "ai_infra_growth_observations": comparable_growths["ai_infra"],
            "diagnostics": diagnostics,
        },
        "yearly_forecast_m": {
            str(year): {
                "mean_m": round(float(item["mean_m"]), 1),
                "median_m": round(float(item["median_m"]), 1),
                "p05_m": round(float(item["p05_m"]), 1),
                "p10_m": round(float(item["p10_m"]), 1),
                "p25_m": round(float(item["p25_m"]), 1),
                "p75_m": round(float(item["p75_m"]), 1),
                "p90_m": round(float(item["p90_m"]), 1),
                "p95_m": round(float(item["p95_m"]), 1),
                "thresholds_m": item["thresholds_m"],
            }
            for year, item in summary.items()
        },
        "sources": [{"label": label, "url": url} for label, url in OFFICIAL_SOURCE_LINKS],
    }
    SUMMARY_PATH.write_text(json.dumps(payload, indent=2), encoding="utf-8")


def plot_forecast(summary: dict[int, dict[str, float | list[int]]]) -> None:
    years = [2024, 2025] + YEARS
    medians = [HISTORICAL_REVENUE_M[2024], HISTORICAL_REVENUE_M[2025]] + [summary[year]["median_m"] for year in YEARS]
    p10 = [None, None] + [summary[year]["p10_m"] for year in YEARS]
    p90 = [None, None] + [summary[year]["p90_m"] for year in YEARS]
    p25 = [None, None] + [summary[year]["p25_m"] for year in YEARS]
    p75 = [None, None] + [summary[year]["p75_m"] for year in YEARS]

    fig, ax = plt.subplots(figsize=(11, 6))
    ax.plot(years, medians, color="#b45309", lw=2.5, marker="o", label="Median revenue")
    ax.scatter([2024, 2025], [HISTORICAL_REVENUE_M[2024], HISTORICAL_REVENUE_M[2025]], color="#111827", zorder=4, label="Reported revenue")
    ax.fill_between(YEARS, [summary[y]["p10_m"] for y in YEARS], [summary[y]["p90_m"] for y in YEARS], color="#f59e0b", alpha=0.18, label="80% interval")
    ax.fill_between(YEARS, [summary[y]["p25_m"] for y in YEARS], [summary[y]["p75_m"] for y in YEARS], color="#f59e0b", alpha=0.35, label="50% interval")

    ax.set_yscale("log")
    ax.set_title("Nebius annual revenue model: historical anchor and forecast bands")
    ax.set_ylabel("Revenue ($m, log scale)")
    ax.set_xlabel("Fiscal year")
    ax.grid(alpha=0.25, which="both")
    ax.legend(frameon=False)

    yticks = [100, 250, 500, 1000, 2500, 5000, 10000, 25000, 50000, 100000]
    ax.set_yticks(yticks)
    ax.set_yticklabels([axis_label(v) for v in yticks])
    fig.tight_layout()
    fig.savefig(FORECAST_PLOT_PATH, dpi=180)
    plt.close(fig)


def plot_reference_classes(summary: dict[int, dict[str, float | list[int]]], diagnostics: dict[str, float]) -> None:
    comparable_growths = compute_comparable_growths(COMPARABLES)
    fig, axes = plt.subplots(1, 2, figsize=(13, 5.5))

    broad = comparable_growths["broad_cloud"]
    ai = comparable_growths["ai_infra"]

    axes[0].scatter(
        [row["start_revenue_m"] for row in broad],
        [row["growth_pct"] for row in broad],
        color="#2563eb",
        alpha=0.75,
        label="Broad cloud comps",
    )
    axes[0].scatter(
        [row["start_revenue_m"] for row in ai],
        [row["growth_pct"] for row in ai],
        color="#dc2626",
        alpha=0.85,
        label="AI infra comp",
    )
    axes[0].axvline(HISTORICAL_REVENUE_M[2025], color="#111827", ls="--", lw=1.3, label="Nebius FY2025 revenue")
    axes[0].set_xscale("log")
    axes[0].set_xlabel("Starting revenue ($m, log scale)")
    axes[0].set_ylabel("Next-year revenue growth (%)")
    axes[0].set_title("Reference classes by revenue scale")
    axes[0].grid(alpha=0.25)
    axes[0].legend(frameon=False, fontsize=9)

    median_growth_pct = []
    broad_component_pct = []
    ai_component_pct = []
    for year in YEARS:
        horizon = year - YEARS[0]
        broad_component = math.exp(diagnostics["broad_cloud_mean_log_growth"] * math.exp(-0.18 * horizon)) - 1.0
        ai_component = math.exp(diagnostics["ai_infra_shrunk_uplift_log"] * math.exp(-0.70 * horizon)) - 1.0
        median_prev = HISTORICAL_REVENUE_M[2025] if year == 2026 else summary[year - 1]["median_m"]
        median_growth = summary[year]["median_m"] / median_prev - 1.0
        median_growth_pct.append(median_growth * 100.0)
        broad_component_pct.append(broad_component * 100.0)
        ai_component_pct.append(ai_component * 100.0)

    axes[1].plot(YEARS, median_growth_pct, color="#b45309", marker="o", lw=2.2, label="Median modeled YoY growth")
    axes[1].plot(YEARS, broad_component_pct, color="#2563eb", ls="--", lw=1.8, label="Broad-cloud contribution")
    axes[1].plot(YEARS, ai_component_pct, color="#dc2626", ls=":", lw=2.0, label="Shrunk AI-infra uplift")
    axes[1].axhline(LAST_FY_GROWTH_PCT, color="#111827", ls="--", lw=1.2, alpha=0.7, label="FY2025 reported YoY growth")
    axes[1].set_title("Growth-rate path after hierarchical shrinkage")
    axes[1].set_xlabel("Fiscal year")
    axes[1].set_ylabel("Growth (%)")
    axes[1].grid(alpha=0.25)
    axes[1].legend(frameon=False, fontsize=9)

    fig.tight_layout()
    fig.savefig(REFERENCE_PLOT_PATH, dpi=180)
    plt.close(fig)


def render_forecast_table(summary: dict[int, dict[str, float | list[int]]]) -> str:
    rows = []
    for year in YEARS:
        item = summary[year]
        rows.append(
            "<tr>"
            f"<td>FY{year}</td>"
            f"<td>{axis_label(float(item['median_m']))}</td>"
            f"<td>{axis_label(float(item['p10_m']))}</td>"
            f"<td>{axis_label(float(item['p90_m']))}</td>"
            "</tr>"
        )
    return "\n".join(rows)


def write_background_html(summary: dict[int, dict[str, float | list[int]]], diagnostics: dict[str, float]) -> None:
    broad_pct = diagnostics["broad_cloud_mean_growth_pct"]
    ai_pct = diagnostics["ai_infra_mean_growth_pct"]
    html = f"""<h3>Calculation of starting probabilities</h3>
<p>
This market uses a <strong>three-level revenue model</strong> for Nebius's non-cumulative annual revenue over FY2026-FY2035.
The model combines: (1) Nebius-specific execution signals, (2) a comparable cloud-platform reference class, and
(3) a general firm-growth base rate from the academic literature so the forecast does not naively extrapolate one explosive year forever.
</p>

<p>
<strong>Resolved anchor from the last completed financial year:</strong> Nebius reported <strong>${HISTORICAL_REVENUE_M[2025]:,.1f}m</strong> of FY2025 revenue
versus a recast FY2024 continuing-operations revenue of <strong>${HISTORICAL_REVENUE_M[2024]:,.1f}m</strong>, which is
<strong>+{LAST_FY_GROWTH_PCT:.0f}% year over year</strong>. That value is included here as a calibration aid; the market itself stays in revenue units only.
</p>

<h4>Key charts</h4>
<p><img src="{{{{asset:forecast_path.png}}}}" alt="Nebius annual revenue forecast bands" width="960" /></p>
<p><img src="{{{{asset:reference_classes.png}}}}" alt="Nebius reference classes and growth-path decomposition" width="960" /></p>

<h4>Why these reference classes</h4>
<ul>
  <li><strong>Nebius-specific:</strong> FY2025 revenue, year-end ARR of about $1.25bn, FY2026 end-year ARR guidance of $7bn-$9bn, the Microsoft agreement announced in December 2025, and disclosed power-capacity targets (800MW-1GW active in 2026; more than 3GW contracted).</li>
  <li><strong>Broad cloud/platform cohort:</strong> Cloudflare, DigitalOcean, Datadog, and Snowflake. Their observed mean annual log-growth in the sample used here is about <strong>{broad_pct:.0f}%</strong>, which prevents the model from treating Nebius as a generic mature company too early.</li>
  <li><strong>AI-infrastructure uplift:</strong> CoreWeave is used as the direct AI-cloud analogue, but only after strong shrinkage. Its raw growth history is much hotter (mean about <strong>{ai_pct:.0f}%</strong>), so the model only takes a partial, decaying uplift from that reference class to reduce single-comparable bias.</li>
  <li><strong>Companies in general:</strong> the academic literature says firm growth shocks are heavy-tailed, high growth is rarely persistent forever, and volatility falls with size. That is why the simulation uses fat-tailed shocks and size-scaled volatility instead of a smooth deterministic curve.</li>
</ul>

<h4>Model structure</h4>
<ol>
  <li><strong>Global layer:</strong> long-run revenue growth mean-reverts toward a modest positive floor, with shock volatility shrinking roughly with firm size.</li>
  <li><strong>Sector layer:</strong> Nebius inherits a broad-cloud growth premium estimated from comparable public firms, plus a shrunk AI-infrastructure uplift that decays as the market matures.</li>
  <li><strong>Company layer:</strong> FY2026 is anchored to ARR guidance and deployment timing. The residual company-specific growth premium then decays faster than the sector premium.</li>
  <li><strong>Simulation:</strong> 80,000 Monte Carlo paths produce yearly revenue samples, and each year's market ladder is the survival curve <code>P(revenue &gt;= threshold)</code> at 50 friendly-rounded thresholds.</li>
</ol>

<h4>Forecast summary</h4>
<table>
  <thead>
    <tr>
      <th>Fiscal year</th>
      <th>Median</th>
      <th>P10</th>
      <th>P90</th>
    </tr>
  </thead>
  <tbody>
    {render_forecast_table(summary)}
  </tbody>
</table>

<h4>Source notes</h4>
<ul>
  <li><a href="https://group.nebius.com/newsroom/nebius-group-reports-fourth-quarter-and-full-year-2025-financial-results" target="_blank" rel="noopener">Nebius FY2025 results</a>: FY2025 revenue of $529.8m and recast FY2024 revenue of $91.5m.</li>
  <li><a href="https://assets.nebius.com/files/Q3-2025-shareholder-letter-Nebius.pdf" target="_blank" rel="noopener">Nebius Q3 2025 shareholder letter</a>: year-end ARR of about $1.25bn, FY2026 end-year ARR guidance of $7bn-$9bn, and power-capacity targets.</li>
  <li><a href="https://group.nebius.com/newsroom/nebius-group-signs-multi-billion-dollar-agreement-with-microsoft-to-support-global-expansion" target="_blank" rel="noopener">Nebius-Microsoft agreement</a>: direct evidence that the company has unusually strong near-term demand support.</li>
  <li><a href="https://www.nature.com/articles/379804a0" target="_blank" rel="noopener">Stanley et al. (1996)</a> and <a href="https://ideas.repec.org/a/taf/teurst/v40y2006i2p235-256.html" target="_blank" rel="noopener">Bottazzi &amp; Secchi (2006)</a>: motivation for fat-tailed growth shocks and size-sensitive volatility.</li>
  <li><a href="https://documents.worldbank.org/en/publication/documents-reports/documentdetail/661271468126682795/high-growth-firms-facts-fiction-and-policy-implications-for-developing-countries" target="_blank" rel="noopener">World Bank high-growth firms review</a>: motivation for mean reversion and caution about assuming repeated hypergrowth episodes.</li>
</ul>

<h4>Data and reproducibility assets</h4>
<ul>
  <li><a href="{{{{asset:model_summary.json}}}}" target="_blank" rel="noopener">Download the model summary JSON</a></li>
  <li><a href="{{{{asset:forecast_path.png}}}}" target="_blank" rel="noopener">Download the forecast chart</a></li>
  <li><a href="{{{{asset:reference_classes.png}}}}" target="_blank" rel="noopener">Download the reference-class chart</a></li>
</ul>

<p><a href="{{{{asset:generate_market.py}}}}" target="_blank" rel="noopener">Download the Python script used for this upload</a></p>
"""
    BACKGROUND_PATH.write_text(html, encoding="utf-8")


def main() -> int:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    samples, diagnostics = simulate_paths()
    summary = summarize_samples(samples)
    write_market_csv(samples, summary, diagnostics)
    write_summary_json(summary, diagnostics)
    plot_forecast(summary)
    plot_reference_classes(summary, diagnostics)
    write_background_html(summary, diagnostics)

    print(f"Saved market CSV: {CSV_PATH}")
    print(f"Saved model summary: {SUMMARY_PATH}")
    print(f"Saved background HTML: {BACKGROUND_PATH}")
    print(f"Saved charts: {FORECAST_PLOT_PATH.name}, {REFERENCE_PLOT_PATH.name}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
