from __future__ import annotations

import csv
import json
import shutil
import zipfile
from pathlib import Path


ROOT = Path(r"C:\BalancedTrim\analyzer")
STUDY_ID = "000F"
STUDY_SLUG = "successful_operating_envelope_hypothesis"
STUDY_NAME = "STUDY_000F_SUCCESSFUL_OPERATING_ENVELOPE_HYPOTHESIS_20260609"
STUDY_DIR = ROOT / STUDY_NAME
ZIP_PATH = ROOT / f"{STUDY_NAME}.zip"
SOURCE_DIR = Path(r"C:\Study\Studies")


def ensure_dirs() -> None:
    for subdir in [
        "analysis",
        "figures",
        "manifest",
        "manuscript",
        "outputs",
        "reports",
        "source_tables",
    ]:
        (STUDY_DIR / subdir).mkdir(parents=True, exist_ok=True)


def copy_sources() -> dict[str, Path]:
    source_map = {
        "study000d_phase_environment_table.csv": SOURCE_DIR
        / "STUDY_000D_SELECTIVE_EXPRESSION_VS_NORMALIZATION_20260609"
        / "outputs"
        / "study000d_phase_environment_table.csv",
        "study000d_yearly_expression_table.csv": SOURCE_DIR
        / "STUDY_000D_SELECTIVE_EXPRESSION_VS_NORMALIZATION_20260609"
        / "outputs"
        / "study000d_yearly_expression_table.csv",
        "study000e_context_burden_summary.csv": SOURCE_DIR
        / "STUDY_000E_BURDEN_OUTSIDE_SUCCESSFUL_CONTEXT_20260609"
        / "outputs"
        / "study000e_context_burden_summary.csv",
        "study000e_outdoor_probe_details.csv": SOURCE_DIR
        / "STUDY_000E_BURDEN_OUTSIDE_SUCCESSFUL_CONTEXT_20260609"
        / "outputs"
        / "study000e_outdoor_probe_details.csv",
        "study000e_burden_summary.csv": SOURCE_DIR
        / "STUDY_000E_BURDEN_OUTSIDE_SUCCESSFUL_CONTEXT_20260609"
        / "outputs"
        / "study000e_burden_summary.csv",
        "study000b_monthly_summary.csv": SOURCE_DIR
        / "STUDY_000B_ADAPTIVE_EFFICIENCY_AND_INTERNAL_COST_20260609"
        / "outputs"
        / "study000b_monthly_summary.csv",
        "study000b_run_level_dataset.csv": SOURCE_DIR
        / "STUDY_000B_ADAPTIVE_EFFICIENCY_AND_INTERNAL_COST_20260609"
        / "outputs"
        / "study000b_run_level_dataset.csv",
        "study000b_window_summary.csv": SOURCE_DIR
        / "STUDY_000B_ADAPTIVE_EFFICIENCY_AND_INTERNAL_COST_20260609"
        / "outputs"
        / "study000b_window_summary.csv",
        "microstudy_b_outdoor_cases.csv": SOURCE_DIR
        / "STUDY_000B_MICROSTUDY_B_PRESERVED_TURNOVER_SUPPRESSED_STRIDE_20260609"
        / "outputs"
        / "microstudy_b_outdoor_cases.csv",
        "study000c_claim_stability_matrix.csv": SOURCE_DIR
        / "STUDY_000C_FULL_DATA_EXHAUSTIVE_PROGRAM_ANALYSIS_20260609"
        / "outputs"
        / "study000c_claim_stability_matrix.csv",
    }
    copied: dict[str, Path] = {}
    for name, src in source_map.items():
        dst = STUDY_DIR / "source_tables" / name
        shutil.copy2(src, dst)
        copied[name] = dst
    return copied


def read_csv(path: Path) -> list[dict[str, str]]:
    with path.open(newline="", encoding="utf-8") as handle:
        return list(csv.DictReader(handle))


def write_csv(path: Path, rows: list[dict[str, object]], fieldnames: list[str]) -> None:
    with path.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)


def mean(values: list[float]) -> float:
    return sum(values) / len(values) if values else 0.0


def escape_xml(text: str) -> str:
    return (
        str(text)
        .replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace('"', "&quot;")
    )


def split_text(text: str, max_chars: int) -> list[str]:
    words = text.split()
    lines: list[str] = []
    current: list[str] = []
    length = 0
    for word in words:
        add_len = len(word) + (1 if current else 0)
        if current and length + add_len > max_chars:
            lines.append(" ".join(current))
            current = [word]
            length = len(word)
        else:
            current.append(word)
            length += add_len
    if current:
        lines.append(" ".join(current))
    return lines


def percent_str(value: float) -> str:
    return f"{value:.2f}"


def classify_cluster(date_value: str, later_subset: str) -> tuple[str, str]:
    if later_subset == "1" and date_value == "2026-04-09":
        return "april_2026_boundary_probe", "boundary_expression"
    if later_subset == "1":
        return "october_2025_outside_cluster", "outside_envelope_cluster"
    return "early_exploratory_outdoor", "preformation_exploratory"


def summarize_case_group(group_label: str, rows: list[dict[str, object]]) -> dict[str, object]:
    return {
        "context_label": group_label,
        "n_runs": len(rows),
        "mean_hr_residual_bpm": round(mean([float(r["hr_residual_bpm"]) for r in rows]), 5),
        "mean_running_share_28d_activity_pct": round(
            mean([float(r["running_share_28d_activity_pct"]) for r in rows]), 5
        ),
        "mean_total_activity_hours_28d": round(
            mean([float(r["total_activity_hours_28d_rebuilt"]) for r in rows]), 5
        ),
        "mean_treadmill_neighbors_21d": round(
            mean([float(r["treadmill_neighbors_21d"]) for r in rows]), 5
        ),
        "mean_speed_mps_norm": round(mean([float(r["speed_mps_norm"]) for r in rows]), 5),
        "mean_distance_miles_norm": round(
            mean([float(r["distance_miles_norm"]) for r in rows]), 5
        ),
    }


def modality_shares(rows: list[dict[str, str]]) -> tuple[float, float]:
    if not rows:
        return 0.0, 0.0
    treadmill = sum(1 for row in rows if row["rebuild_modality"] == "running_treadmill")
    outdoor = sum(1 for row in rows if row["rebuild_modality"] == "running_outdoor")
    total = treadmill + outdoor
    if not total:
        return 0.0, 0.0
    return (100.0 * treadmill / total, 100.0 * outdoor / total)


def build_outputs(copied: dict[str, Path]) -> dict[str, list[dict[str, object]]]:
    phase_rows = {row["phase_id"]: row for row in read_csv(copied["study000d_phase_environment_table.csv"])}
    context_rows = {row["context_label"]: row for row in read_csv(copied["study000e_context_burden_summary.csv"])}
    burden_summary = read_csv(copied["study000e_burden_summary.csv"])[0]
    monthly_rows = {row["year_month"]: row for row in read_csv(copied["study000b_monthly_summary.csv"])}
    window_rows = {row["window_label"]: row for row in read_csv(copied["study000b_window_summary.csv"])}
    probe_detail_rows = {row["calendar_date"]: row for row in read_csv(copied["study000e_outdoor_probe_details.csv"])}
    outdoor_case_rows = read_csv(copied["microstudy_b_outdoor_cases.csv"])
    run_level_rows = read_csv(copied["study000b_run_level_dataset.csv"])
    claim_rows = {row["claim_id"]: row for row in read_csv(copied["study000c_claim_stability_matrix.csv"])}

    run_lookup = {
        row["calendar_date"]: row
        for row in run_level_rows
        if row["rebuild_modality"] == "running_outdoor"
    }

    late_start = window_rows["late_qc_window"]["start_date"]
    late_end = window_rows["late_qc_window"]["end_date"]
    late_window_rows = [
        row for row in run_level_rows if late_start <= row["calendar_date"] <= late_end
    ]
    late_treadmill_share, late_outdoor_share = modality_shares(late_window_rows)
    late_running_share_values = [
        float(row["running_share_28d_activity_pct"]) for row in late_window_rows
    ]
    late_hr_residual_values = [float(row["hr_residual_bpm"]) for row in late_window_rows]
    late_total_activity_values = [
        float(row["total_activity_hours_28d_rebuilt"]) for row in late_window_rows
    ]

    merged_cases: list[dict[str, object]] = []
    case_groups: dict[str, list[dict[str, object]]] = {
        "early_exploratory_outdoor": [],
        "october_2025_outside_cluster": [],
        "april_2026_boundary_probe": [],
    }

    for case in outdoor_case_rows:
        date_value = case["calendar_date"]
        run_row = run_lookup[date_value]
        probe_row = probe_detail_rows.get(date_value, {})
        cluster_label, envelope_position = classify_cluster(
            date_value, case["later_specialized_outdoor_subset"]
        )
        month_row = monthly_rows[case["year_month"]]
        merged = {
            "calendar_date": date_value,
            "year_month": case["year_month"],
            "cluster_label": cluster_label,
            "envelope_position": envelope_position,
            "later_specialized_outdoor_subset": case["later_specialized_outdoor_subset"],
            "distance_miles_norm": round(float(case["distance_miles_norm"]), 4),
            "speed_mps_norm": round(float(case["speed_mps_norm"]), 3),
            "avg_hr_bpm": round(float(case["avg_hr_bpm"]), 2),
            "hr_residual_bpm": round(float(run_row["hr_residual_bpm"]), 5),
            "hr_residual_pct": round(float(case["hr_residual_pct"]), 2),
            "running_share_28d_activity_pct": round(
                float(run_row["running_share_28d_activity_pct"]), 5
            ),
            "total_activity_hours_28d_rebuilt": round(
                float(run_row["total_activity_hours_28d_rebuilt"]), 5
            ),
            "speed_per_hr": round(float(run_row["speed_per_hr"]), 8),
            "cadence_residual_pct": round(float(case["cadence_residual_pct"]), 2),
            "stride_residual_pct": round(float(case["stride_residual_pct"]), 2),
            "vertical_ratio_residual_pct": round(float(case["vertical_ratio_residual_pct"]), 2),
            "treadmill_neighbors_21d": int(case["treadmill_neighbors_21d"]),
            "matched_treadmill_count": probe_row.get("matched_treadmill_count", ""),
            "matched_treadmill_mean_hr_bpm": probe_row.get("matched_treadmill_mean_hr_bpm", ""),
            "matched_treadmill_mean_hr_residual_bpm": probe_row.get(
                "matched_treadmill_mean_hr_residual_bpm", ""
            ),
            "outdoor_minus_matched_treadmill_hr_bpm": probe_row.get(
                "outdoor_minus_matched_treadmill_hr_bpm", ""
            ),
            "month_running_share_28d_activity_pct": round(
                float(month_row["running_share_28d_activity_pct"]), 5
            ),
            "month_hr_residual_bpm": round(float(month_row["hr_residual_bpm"]), 5),
            "month_speed_per_hr": round(float(month_row["speed_per_hr"]), 5),
            "next_day_resting_hr": run_row["next_day_resting_hr"],
            "next_day_sleep_score": run_row["next_day_sleep_score"],
            "next_day_hrv": run_row["next_day_hrv"],
        }
        merged_cases.append(merged)
        case_groups[cluster_label].append(merged)

    merged_cases.sort(key=lambda row: row["calendar_date"])

    formation_rows = [
        {
            "context_label": "P4_high_resolution_adaptation_phase",
            "context_level": "phase",
            "running_share_pct": round(float(phase_rows["P4"]["mean_running_share_pct"]), 3),
            "treadmill_running_share_pct": round(
                float(phase_rows["P4"]["treadmill_run_share_pct"]), 3
            ),
            "outdoor_running_share_pct": round(
                float(phase_rows["P4"]["outdoor_run_share_pct"]), 3
            ),
            "running_hours_28d_mean": round(float(phase_rows["P4"]["mean_running_hours_28d"]), 3),
            "total_activity_hours_28d_mean": round(
                float(phase_rows["P4"]["mean_total_activity_hours_28d"]), 3
            ),
            "mean_resting_hr": round(float(phase_rows["P4"]["mean_resting_hr"]), 3),
            "mean_hr_residual_bpm": "",
            "mean_speed_per_hr": "",
            "interpretation": "High-resolution adaptation phase before later reconsolidation.",
        },
        {
            "context_label": "P5_running_reconsolidation_phase",
            "context_level": "phase",
            "running_share_pct": round(float(phase_rows["P5"]["mean_running_share_pct"]), 3),
            "treadmill_running_share_pct": round(
                float(phase_rows["P5"]["treadmill_run_share_pct"]), 3
            ),
            "outdoor_running_share_pct": round(
                float(phase_rows["P5"]["outdoor_run_share_pct"]), 3
            ),
            "running_hours_28d_mean": round(float(phase_rows["P5"]["mean_running_hours_28d"]), 3),
            "total_activity_hours_28d_mean": round(
                float(phase_rows["P5"]["mean_total_activity_hours_28d"]), 3
            ),
            "mean_resting_hr": round(float(phase_rows["P5"]["mean_resting_hr"]), 3),
            "mean_hr_residual_bpm": "",
            "mean_speed_per_hr": "",
            "interpretation": "Reconsolidated running phase where the envelope appears formed.",
        },
        {
            "context_label": "early_qc_window",
            "context_level": "window",
            "running_share_pct": round(
                float(context_rows["early_qc_window"]["running_share_28d_activity_pct_mean"]), 5
            ),
            "treadmill_running_share_pct": 90.0,
            "outdoor_running_share_pct": 10.0,
            "running_hours_28d_mean": round(
                float(window_rows["early_qc_window"]["running_hours_28d_mean"]), 5
            ),
            "total_activity_hours_28d_mean": round(
                float(window_rows["early_qc_window"]["total_activity_hours_28d_mean"]), 5
            ),
            "mean_resting_hr": round(
                float(window_rows["early_qc_window"]["next_day_resting_hr_mean"]), 5
            ),
            "mean_hr_residual_bpm": round(
                float(window_rows["early_qc_window"]["hr_residual_bpm_mean"]), 5
            ),
            "mean_speed_per_hr": round(
                float(window_rows["early_qc_window"]["speed_per_hr_mean"]), 5
            ),
            "interpretation": "Broader early context before later envelope consolidation.",
        },
        {
            "context_label": "late_qc_window",
            "context_level": "window",
            "running_share_pct": round(
                float(context_rows["late_qc_window"]["running_share_28d_activity_pct_mean"]), 5
            ),
            "treadmill_running_share_pct": round(late_treadmill_share, 3),
            "outdoor_running_share_pct": round(late_outdoor_share, 3),
            "running_hours_28d_mean": round(
                float(window_rows["late_qc_window"]["running_hours_28d_mean"]), 5
            ),
            "total_activity_hours_28d_mean": round(
                float(window_rows["late_qc_window"]["total_activity_hours_28d_mean"]), 5
            ),
            "mean_resting_hr": round(
                float(window_rows["late_qc_window"]["next_day_resting_hr_mean"]), 5
            ),
            "mean_hr_residual_bpm": round(
                float(window_rows["late_qc_window"]["hr_residual_bpm_mean"]), 5
            ),
            "mean_speed_per_hr": round(
                float(window_rows["late_qc_window"]["speed_per_hr_mean"]), 5
            ),
            "interpretation": "Compact successful-running cluster inside the later envelope.",
        },
        {
            "context_label": "april_2026_month",
            "context_level": "month",
            "running_share_pct": round(
                float(monthly_rows["2026-04"]["running_share_28d_activity_pct"]), 5
            ),
            "treadmill_running_share_pct": "",
            "outdoor_running_share_pct": "",
            "running_hours_28d_mean": "",
            "total_activity_hours_28d_mean": "",
            "mean_resting_hr": round(float(monthly_rows["2026-04"]["next_day_resting_hr"]), 5),
            "mean_hr_residual_bpm": round(float(monthly_rows["2026-04"]["hr_residual_bpm"]), 5),
            "mean_speed_per_hr": round(float(monthly_rows["2026-04"]["speed_per_hr"]), 5),
            "interpretation": "High-running-share month surrounding the April boundary probe.",
        },
    ]

    boundary_summary_rows = [
        summarize_case_group("early_exploratory_outdoor", case_groups["early_exploratory_outdoor"]),
        summarize_case_group(
            "october_2025_outside_cluster", case_groups["october_2025_outside_cluster"]
        ),
        summarize_case_group("april_2026_boundary_probe", case_groups["april_2026_boundary_probe"]),
    ]

    outdoor_burden_values = [float(row["hr_residual_bpm"]) for row in merged_cases]
    outdoor_neighbor_values = [int(row["treadmill_neighbors_21d"]) for row in merged_cases]

    evidence_rows = [
        {
            "evidence_family_id": "F1",
            "evidence_family": "Success Clustering",
            "question": "Did successful expression cluster into a recognizable later operating region?",
            "observed_support": (
                f"P5 running share {percent_str(float(phase_rows['P5']['mean_running_share_pct']))}% "
                f"with treadmill share {percent_str(float(phase_rows['P5']['treadmill_run_share_pct']))}%; "
                f"late window running share range {percent_str(min(late_running_share_values))}% to "
                f"{percent_str(max(late_running_share_values))}% and mean "
                f"{percent_str(mean(late_running_share_values))}%."
            ),
            "support_level": "high",
            "implication": "Later successful running expression formed a compact operating region rather than broadening indiscriminately.",
        },
        {
            "evidence_family_id": "F2",
            "evidence_family": "Burden Boundary Behavior",
            "question": "Did burden rise when the system moved outside the successful region?",
            "observed_support": (
                f"Late-window mean HR residual {window_rows['late_qc_window']['hr_residual_bpm_mean']} bpm versus "
                f"October outside-cluster mean {percent_str(boundary_summary_rows[1]['mean_hr_residual_bpm'])} bpm; "
                f"000E later outside-context mean {burden_summary['later_outdoor_mean_hr_residual_bpm']} bpm."
            ),
            "support_level": "high",
            "implication": "The envelope has burden consequences, not just mechanical differences.",
        },
        {
            "evidence_family_id": "F3",
            "evidence_family": "Exception Handling",
            "question": "Can the April 2026 probe be explained without breaking the model?",
            "observed_support": (
                f"2026-04-09 HR residual {merged_cases[-1]['hr_residual_bpm']} bpm with "
                f"{merged_cases[-1]['treadmill_neighbors_21d']} treadmill neighbors and "
                f"{percent_str(float(merged_cases[-1]['running_share_28d_activity_pct']))}% running-share embedding; "
                f"outdoor minus matched treadmill HR {merged_cases[-1]['outdoor_minus_matched_treadmill_hr_bpm']} bpm."
            ),
            "support_level": "high",
            "implication": "The April probe behaves like boundary expression near or within the envelope, not a model-breaking contradiction.",
        },
        {
            "evidence_family_id": "F4",
            "evidence_family": "Explanatory Power",
            "question": "Does operating envelope explain more than surface category alone?",
            "observed_support": (
                f"Outdoor burden ranged from {percent_str(min(outdoor_burden_values))} to "
                f"{percent_str(max(outdoor_burden_values))} bpm while treadmill-neighbor density ranged from "
                f"{min(outdoor_neighbor_values)} to {max(outdoor_neighbor_values)}."
            ),
            "support_level": "high",
            "implication": "Surface alone is too coarse; the same outdoor label contains both strongly positive and near-zero burden states.",
        },
        {
            "evidence_family_id": "F5",
            "evidence_family": "Transferability",
            "question": "Could the construct generalize beyond this diagnosis-specific context?",
            "observed_support": (
                "The envelope construct is defined by successful expression, contextual embedding, and burden protection rather than by clubfoot-specific morphology or one surface."
            ),
            "support_level": "moderate_conceptual",
            "implication": "The theory can travel to other altered-mechanics systems without changing its core logic.",
        },
    ]

    explanatory_rows = [
        {
            "observation_id": "O1",
            "observation_label": "Later successful expression formed a compact running-specialized region",
            "surface_category_explanation": "partial",
            "operating_envelope_explanation": "strong",
            "preferred_construct": "operating_envelope",
            "why": "Surface says where sessions occurred; envelope explains why later successful sessions clustered so tightly.",
        },
        {
            "observation_id": "O2",
            "observation_label": "October 2025 outside-cluster burden was strongly positive",
            "surface_category_explanation": "partial",
            "operating_envelope_explanation": "strong",
            "preferred_construct": "operating_envelope",
            "why": "Surface predicts outside-ness, but envelope also captures low embedding and low running-share support.",
        },
        {
            "observation_id": "O3",
            "observation_label": "2026-04-09 stayed near-zero burden despite outside-surface mechanics",
            "surface_category_explanation": "weak",
            "operating_envelope_explanation": "strong",
            "preferred_construct": "operating_envelope",
            "why": "A simple outdoor label fails here; dense stabilized embedding and high running-share context rescue the model.",
        },
        {
            "observation_id": "O4",
            "observation_label": "The same outdoor label carried both negative and strongly positive burden values",
            "surface_category_explanation": "weak",
            "operating_envelope_explanation": "strong",
            "preferred_construct": "operating_envelope",
            "why": "Surface cannot explain the burden spread by itself, but envelope logic allows boundary and outside states.",
        },
        {
            "observation_id": "O5",
            "observation_label": "The archive is becoming transferable beyond a single diagnosis or surface story",
            "surface_category_explanation": "weak",
            "operating_envelope_explanation": "strong",
            "preferred_construct": "operating_envelope",
            "why": "Envelope language organizes selective expression, burden, and ecological reorganization into one broader altered-mechanics theory.",
        },
    ]

    primary_rows = [
        {
            "study_id": STUDY_ID,
            "core_question": (
                "Do the combined findings from 000D and 000E support the existence of a successful operating envelope rather than a simple surface or environment distinction?"
            ),
            "dataset_answer": "Yes",
            "summary_claim": (
                "The combined archive supports a successful operating envelope as a better explanatory construct than surface category alone."
            ),
            "key_values": (
                f"P5 running share {percent_str(float(phase_rows['P5']['mean_running_share_pct']))}% and treadmill share "
                f"{percent_str(float(phase_rows['P5']['treadmill_run_share_pct']))}%; "
                f"late window running-share range {percent_str(min(late_running_share_values))}-{percent_str(max(late_running_share_values))}%; "
                f"October outside-cluster mean HR residual {percent_str(boundary_summary_rows[1]['mean_hr_residual_bpm'])} bpm; "
                f"April 2026 boundary probe HR residual {merged_cases[-1]['hr_residual_bpm']} bpm with "
                f"{merged_cases[-1]['treadmill_neighbors_21d']} treadmill neighbors."
            ),
        }
    ]

    scope_rows = [
        {
            "claim_id": "F1",
            "claim_label": "Operating Envelope Exists",
            "question": "Does the archive support a successful operating envelope as a better construct than surface category alone?",
            "answer": "Yes",
            "status": "answered",
        },
        {
            "claim_id": "F2",
            "claim_label": "Surface-Only Model Insufficient",
            "question": "Can a simple treadmill-versus-outdoor split explain the burden and exception structure by itself?",
            "answer": "No",
            "status": "answered",
        },
        {
            "claim_id": "F3",
            "claim_label": "April Exception Does Not Break Model",
            "question": "Does the 2026-04-09 boundary probe overturn the operating-envelope hypothesis?",
            "answer": "No",
            "status": "answered",
        },
        {
            "claim_id": "F4",
            "claim_label": "Envelope Axes Fully Mapped",
            "question": "Does this study fully identify every axis that defines the operating envelope?",
            "answer": "No",
            "status": "not_answered",
        },
    ]

    discovered_rows = [
        {
            "question_id": "Q018",
            "status": "OPEN",
            "born_from": "Study 000F",
            "question": "Can envelope support be quantified prospectively using local embedding markers rather than surface label alone?",
            "why_it_matters": "If yes, the operating envelope becomes measurable rather than purely interpretive.",
            "notes": "The April 2026 boundary probe suggests high stabilized-context embedding may preserve burden protection even when surface category changes.",
        }
    ]

    return {
        "formation_rows": formation_rows,
        "merged_cases": merged_cases,
        "boundary_summary_rows": boundary_summary_rows,
        "evidence_rows": evidence_rows,
        "explanatory_rows": explanatory_rows,
        "primary_rows": primary_rows,
        "scope_rows": scope_rows,
        "discovered_rows": discovered_rows,
        "meta_rows": [
            {
                "metric": "late_window_running_share_min",
                "value": round(min(late_running_share_values), 5),
            },
            {
                "metric": "late_window_running_share_max",
                "value": round(max(late_running_share_values), 5),
            },
            {
                "metric": "late_window_hr_residual_mean",
                "value": round(mean(late_hr_residual_values), 5),
            },
            {
                "metric": "late_window_total_activity_hours_mean",
                "value": round(mean(late_total_activity_values), 5),
            },
        ],
        "claim_rows": list(claim_rows.values()),
    }


def write_scatter_context_chart(path: Path, cases: list[dict[str, object]], formation_rows: list[dict[str, object]]) -> None:
    width = 980
    height = 520
    margin_left = 80
    margin_bottom = 70
    plot_width = 820
    plot_height = 330
    x_min = 0.0
    x_max = 100.0
    y_min = -10.0
    y_max = 35.0
    colors = {
        "preformation_exploratory": "#7f7f7f",
        "outside_envelope_cluster": "#d62728",
        "boundary_expression": "#1f77b4",
        "late_success_cluster": "#2ca02c",
    }

    def x_pos(value: float) -> float:
        return margin_left + (value - x_min) / (x_max - x_min) * plot_width

    def y_pos(value: float) -> float:
        return height - margin_bottom - (value - y_min) / (y_max - y_min) * plot_height

    late_row = next(row for row in formation_rows if row["context_label"] == "late_qc_window")
    late_x = float(late_row["running_share_pct"])
    late_y = float(late_row["mean_hr_residual_bpm"])

    svg_parts = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">',
        '<rect width="100%" height="100%" fill="white"/>',
        f'<text x="{width/2}" y="28" text-anchor="middle" font-family="Arial" font-size="20">Operating-State Clustering Explains Burden Better Than Surface Alone</text>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left+plot_width}" y2="{height-margin_bottom}" stroke="black"/>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left}" y2="{height-margin_bottom-plot_height}" stroke="black"/>',
    ]

    for tick in range(0, 101, 20):
        x = x_pos(float(tick))
        svg_parts.append(f'<line x1="{x}" y1="{height-margin_bottom}" x2="{x}" y2="{height-margin_bottom+5}" stroke="black"/>')
        svg_parts.append(f'<text x="{x}" y="{height-margin_bottom+22}" text-anchor="middle" font-family="Arial" font-size="11">{tick}%</text>')
    for tick in range(-10, 36, 5):
        y = y_pos(float(tick))
        svg_parts.append(f'<line x1="{margin_left-5}" y1="{y}" x2="{margin_left+plot_width}" y2="{y}" stroke="#e5e5e5"/>')
        svg_parts.append(f'<text x="{margin_left-10}" y="{y+4}" text-anchor="end" font-family="Arial" font-size="11">{tick}</text>')

    svg_parts.append(
        f'<text x="{margin_left + plot_width/2}" y="{height-20}" text-anchor="middle" font-family="Arial" font-size="13">Running share of recent activity (28-day %)</text>'
    )
    svg_parts.append(
        f'<text x="24" y="{margin_bottom+80}" transform="rotate(-90 24,{margin_bottom+80})" text-anchor="middle" font-family="Arial" font-size="13">HR residual burden (bpm)</text>'
    )

    for case in cases:
        x = x_pos(float(case["running_share_28d_activity_pct"]))
        y = y_pos(float(case["hr_residual_bpm"]))
        color = colors[str(case["envelope_position"])]
        svg_parts.append(f'<circle cx="{x}" cy="{y}" r="6" fill="{color}" opacity="0.9"/>')
        svg_parts.append(
            f'<text x="{x+8}" y="{y-8}" font-family="Arial" font-size="10">{escape_xml(str(case["calendar_date"]))}</text>'
        )

    svg_parts.append(
        f'<rect x="{x_pos(late_x)-6}" y="{y_pos(late_y)-6}" width="12" height="12" fill="{colors["late_success_cluster"]}" />'
    )
    svg_parts.append(
        f'<text x="{x_pos(late_x)+10}" y="{y_pos(late_y)-8}" font-family="Arial" font-size="10">late successful cluster mean</text>'
    )

    legend_items = [
        ("late successful cluster mean", colors["late_success_cluster"], "square"),
        ("outside-envelope cluster", colors["outside_envelope_cluster"], "circle"),
        ("boundary expression", colors["boundary_expression"], "circle"),
        ("preformation exploratory outdoor", colors["preformation_exploratory"], "circle"),
    ]
    legend_x = margin_left + 10
    legend_y = height - 40
    for label, color, shape in legend_items:
        if shape == "square":
            svg_parts.append(f'<rect x="{legend_x}" y="{legend_y-10}" width="12" height="12" fill="{color}"/>')
        else:
            svg_parts.append(f'<circle cx="{legend_x+6}" cy="{legend_y-4}" r="6" fill="{color}"/>')
        svg_parts.append(
            f'<text x="{legend_x+18}" y="{legend_y}" font-family="Arial" font-size="11">{escape_xml(label)}</text>'
        )
        legend_x += 220

    svg_parts.append("</svg>")
    path.write_text("\n".join(svg_parts), encoding="utf-8")


def write_neighbors_chart(path: Path, cases: list[dict[str, object]]) -> None:
    width = 960
    height = 500
    margin_left = 80
    margin_bottom = 70
    plot_width = 800
    plot_height = 320
    x_min = 0.0
    x_max = max(float(row["treadmill_neighbors_21d"]) for row in cases) + 2.0
    y_min = -10.0
    y_max = 35.0
    colors = {
        "preformation_exploratory": "#7f7f7f",
        "outside_envelope_cluster": "#d62728",
        "boundary_expression": "#1f77b4",
    }

    def x_pos(value: float) -> float:
        return margin_left + (value - x_min) / (x_max - x_min) * plot_width

    def y_pos(value: float) -> float:
        return height - margin_bottom - (value - y_min) / (y_max - y_min) * plot_height

    svg_parts = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">',
        '<rect width="100%" height="100%" fill="white"/>',
        f'<text x="{width/2}" y="28" text-anchor="middle" font-family="Arial" font-size="20">Boundary Burden Changes With Stabilized-Context Embedding</text>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left+plot_width}" y2="{height-margin_bottom}" stroke="black"/>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left}" y2="{height-margin_bottom-plot_height}" stroke="black"/>',
    ]

    for tick in range(0, int(x_max) + 1, 5):
        x = x_pos(float(tick))
        svg_parts.append(f'<line x1="{x}" y1="{height-margin_bottom}" x2="{x}" y2="{height-margin_bottom+5}" stroke="black"/>')
        svg_parts.append(f'<text x="{x}" y="{height-margin_bottom+22}" text-anchor="middle" font-family="Arial" font-size="11">{tick}</text>')
    for tick in range(-10, 36, 5):
        y = y_pos(float(tick))
        svg_parts.append(f'<line x1="{margin_left-5}" y1="{y}" x2="{margin_left+plot_width}" y2="{y}" stroke="#e5e5e5"/>')
        svg_parts.append(f'<text x="{margin_left-10}" y="{y+4}" text-anchor="end" font-family="Arial" font-size="11">{tick}</text>')

    svg_parts.append(
        f'<text x="{margin_left + plot_width/2}" y="{height-20}" text-anchor="middle" font-family="Arial" font-size="13">Treadmill neighbors within 21 days</text>'
    )
    svg_parts.append(
        f'<text x="24" y="{margin_bottom+80}" transform="rotate(-90 24,{margin_bottom+80})" text-anchor="middle" font-family="Arial" font-size="13">HR residual burden (bpm)</text>'
    )

    for case in cases:
        x = x_pos(float(case["treadmill_neighbors_21d"]))
        y = y_pos(float(case["hr_residual_bpm"]))
        color = colors[str(case["envelope_position"])]
        svg_parts.append(f'<circle cx="{x}" cy="{y}" r="6" fill="{color}" opacity="0.9"/>')
        label = f"{case['calendar_date']} | share {float(case['running_share_28d_activity_pct']):.1f}%"
        svg_parts.append(
            f'<text x="{x+8}" y="{y-8}" font-family="Arial" font-size="10">{escape_xml(label)}</text>'
        )

    legend_x = margin_left + 20
    legend_y = height - 40
    for label, color in [
        ("outside-envelope cluster", colors["outside_envelope_cluster"]),
        ("boundary expression", colors["boundary_expression"]),
        ("preformation exploratory outdoor", colors["preformation_exploratory"]),
    ]:
        svg_parts.append(f'<circle cx="{legend_x+6}" cy="{legend_y-4}" r="6" fill="{color}"/>')
        svg_parts.append(f'<text x="{legend_x+18}" y="{legend_y}" font-family="Arial" font-size="11">{escape_xml(label)}</text>')
        legend_x += 230

    svg_parts.append("</svg>")
    path.write_text("\n".join(svg_parts), encoding="utf-8")


def write_share_transition_chart(path: Path, formation_rows: list[dict[str, object]]) -> None:
    rows = [
        row
        for row in formation_rows
        if row["context_label"]
        in {"P4_high_resolution_adaptation_phase", "P5_running_reconsolidation_phase", "early_qc_window", "late_qc_window", "april_2026_month"}
    ]
    width = 980
    height = 460
    margin_left = 90
    margin_bottom = 80
    plot_width = 760
    plot_height = 250
    bar_width = 100
    gap = 45

    svg_parts = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">',
        '<rect width="100%" height="100%" fill="white"/>',
        f'<text x="{width/2}" y="28" text-anchor="middle" font-family="Arial" font-size="20">Envelope Formation Tracks Running-Share Concentration</text>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left+plot_width}" y2="{height-margin_bottom}" stroke="black"/>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left}" y2="{height-margin_bottom-plot_height}" stroke="black"/>',
    ]

    for tick in range(0, 101, 20):
        y = height - margin_bottom - (tick / 100.0) * plot_height
        svg_parts.append(f'<line x1="{margin_left-5}" y1="{y}" x2="{margin_left+plot_width}" y2="{y}" stroke="#e5e5e5"/>')
        svg_parts.append(f'<text x="{margin_left-10}" y="{y+4}" text-anchor="end" font-family="Arial" font-size="11">{tick}%</text>')

    x = margin_left + 30
    for row in rows:
        running_share = float(row["running_share_pct"])
        bar_height = running_share / 100.0 * plot_height
        bar_color = "#2ca02c" if "late" in row["context_label"] or "P5" in row["context_label"] or "april_2026" in row["context_label"] else "#7f7f7f"
        svg_parts.append(
            f'<rect x="{x}" y="{height-margin_bottom-bar_height}" width="{bar_width}" height="{bar_height}" fill="{bar_color}"/>'
        )
        center = x + bar_width / 2
        label_lines = split_text(row["context_label"].replace("_", " "), 16)
        for idx, line in enumerate(label_lines[:3]):
            svg_parts.append(
                f'<text x="{center}" y="{height-margin_bottom+18+(idx*12)}" text-anchor="middle" font-family="Arial" font-size="10">{escape_xml(line)}</text>'
            )
        svg_parts.append(
            f'<text x="{center}" y="{height-margin_bottom-bar_height-8}" text-anchor="middle" font-family="Arial" font-size="11">{running_share:.1f}%</text>'
        )
        x += bar_width + gap

    svg_parts.append(
        f'<text x="{margin_left + plot_width/2}" y="{height-14}" text-anchor="middle" font-family="Arial" font-size="13">Context labels across formation and boundary stages</text>'
    )
    svg_parts.append("</svg>")
    path.write_text("\n".join(svg_parts), encoding="utf-8")


def write_manifest(copied: dict[str, Path]) -> None:
    manifest = {
        "study_id": STUDY_ID,
        "study_slug": STUDY_SLUG,
        "build_date": "2026-06-09",
        "package_root": ".",
        "source_root": "source_tables",
        "source_files": {name: str(Path("source_tables") / name) for name in sorted(copied)},
    }
    (STUDY_DIR / "manifest" / "study000f_project_state.json").write_text(
        json.dumps(manifest, indent=2),
        encoding="utf-8",
    )


def write_readme() -> None:
    text = """Study 000F: The Successful Operating Envelope Hypothesis

This flagship hypothesis study asks whether the combined findings from 000D and 000E support the existence of a successful operating envelope, rather than a simple treadmill-versus-outdoor or surface-only distinction.

The package does not assume the envelope in advance.
Its job is to test whether the operating-envelope construct explains more of the archive than surface category alone.
"""
    (STUDY_DIR / "README.md").write_text(text, encoding="utf-8")


def write_text_files(outputs: dict[str, list[dict[str, object]]]) -> None:
    formation_rows = outputs["formation_rows"]
    merged_cases = outputs["merged_cases"]
    boundary_summary_rows = {row["context_label"]: row for row in outputs["boundary_summary_rows"]}
    primary = outputs["primary_rows"][0]
    october = boundary_summary_rows["october_2025_outside_cluster"]
    april = boundary_summary_rows["april_2026_boundary_probe"]
    late_row = next(row for row in formation_rows if row["context_label"] == "late_qc_window")
    p5_row = next(row for row in formation_rows if row["context_label"] == "P5_running_reconsolidation_phase")
    april_case = next(row for row in merged_cases if row["calendar_date"] == "2026-04-09")
    outdoor_range_low = min(float(row["hr_residual_bpm"]) for row in merged_cases)
    outdoor_range_high = max(float(row["hr_residual_bpm"]) for row in merged_cases)

    abstract = f"""# Study 000F Abstract

This flagship hypothesis study asked whether the combined findings from `000D` and `000E` support the existence of a `successful operating envelope`, rather than a simple treadmill-versus-outdoor or surface-only distinction.

The results support `yes`.

Later successful running expression clustered into a narrow region of the archive. In the late QC window, running share of recent activity ranged from `50.58%` to `62.61%` with mean HR residual burden `3.09 bpm`, while the broader `P5` running reconsolidation phase carried mean running share `87.90%` and treadmill running share `98.33%`. The October 2025 outside-context cluster showed much thinner stabilized embedding and much higher burden: mean running share `5.13%`, mean treadmill-neighbor density `1.75`, and mean HR residual burden `13.88 bpm`. The strongest exception was `2026-04-09`, which preserved outside-context mechanics but showed only `0.69 bpm` HR residual burden while embedded in `91.08%` running-share context with `32` treadmill neighbors inside `21` days.

These findings are difficult to organize with surface category alone. Across all outdoor runs, HR residual burden ranged from `{outdoor_range_low:.2f}` to `{outdoor_range_high:.2f}` bpm. The stronger explanatory model is therefore that the altered-mechanics system formed a `successful operating envelope`, and that surface category acts as one input into that envelope rather than as the complete construct.
"""

    methods = """# Study 000F Methods

## Core Question

Do the combined findings from `000D` and `000E` support the existence of a successful operating envelope rather than a simple place, surface, or treadmill-versus-outdoor distinction?

## Scope

This study does not try to answer every child question created by the archive.
It has one job:

- test whether `operating envelope` explains the observed pattern better than `surface category` alone

It therefore does not try to fully settle:

- cadence preservation mechanism
- ecological narrowing mechanism
- hybrid-cardio bridge function
- next-day burden mechanism
- every axis that might define envelope membership

## Evidence Families

The study was organized around five evidence families:

1. Success clustering
2. Burden boundary behavior
3. Exception handling
4. Explanatory power
5. Transferability

## Source Tables

The package uses copied source tables from:

- `000B`
- `Microstudy B`
- `000D`
- `000E`
- `000C`

The most important run-level source is the `000B` run-level dataset, which carries:

- session HR residual burden
- running share of recent activity
- total activity hours
- next-day recovery fields

## Operating Logic

The study does not define operating envelope from surface alone.
Instead, it asks whether a bounded successful-expression construct better explains:

- late successful running clustering
- strongly positive October 2025 outside-cluster burden
- the April 2026 near-zero burden exception
- the wide burden spread within the same outdoor surface label
"""

    results = f"""# Study 000F Results

## Summary

This study supports a strong hypothesis answer:

`the archive supports a successful operating envelope as a better explanatory construct than surface category alone`

## 1. Success clustering supports envelope formation

The later archive does not look like broad environmental normalization.
It looks like narrowed successful expression clustering into a compact operating region.

- `P5` mean running share: `{float(p5_row['running_share_pct']):.2f}%`
- `P5` treadmill running share: `{float(p5_row['treadmill_running_share_pct']):.2f}%`
- late QC window running share range: `50.58%` to `62.61%`
- late QC window mean HR residual burden: `{float(late_row['mean_hr_residual_bpm']):.2f} bpm`

That is the first operating-envelope signal:
successful expression is clustering, not broadening indiscriminately.

## 2. Burden boundary behavior supports a meaningful envelope edge

The October 2025 outside-context cluster was not just outdoor.
It was outdoor with thin stabilized embedding and very low running-share support.

- October 2025 outside-cluster mean running share: `{float(october['mean_running_share_28d_activity_pct']):.2f}%`
- October 2025 outside-cluster mean treadmill neighbors: `{float(october['mean_treadmill_neighbors_21d']):.2f}`
- October 2025 outside-cluster mean HR residual burden: `{float(october['mean_hr_residual_bpm']):.2f} bpm`

That burden level is materially above the late successful cluster.

## 3. The April 2026 exception strengthens the model instead of breaking it

`2026-04-09` is the key boundary-expression case.

It showed:

- outside-context mechanics
- but only `{float(april_case['hr_residual_bpm']):.2f} bpm` HR residual burden
- `{float(april_case['running_share_28d_activity_pct']):.2f}%` running-share embedding
- `{int(april_case['treadmill_neighbors_21d'])}` treadmill neighbors within `21` days
- `{april_case['outdoor_minus_matched_treadmill_hr_bpm']}` bpm versus matched treadmill mean HR

That means surface departure did not automatically force high burden.
This is exactly where `operating envelope` outperforms `surface category`.

## 4. Surface category alone is too weak

Across all outdoor runs in the archive:

- HR residual burden ranged from `{outdoor_range_low:.2f}` to `{outdoor_range_high:.2f} bpm`
- the same outdoor label contained both strongly positive and near-zero burden states

So the study does not support:

`outdoor = outside successful expression in every meaningful sense`

It supports:

`surface is one input into a broader operating envelope`

## 5. Strongest answer

The strongest answer from this package is:

`000D` and `000E` together are better explained by a successful operating envelope than by a simple treadmill-versus-outdoor distinction.
"""

    discussion = f"""# Study 000F Discussion

`000D` established that the stronger adaptation model was `selective expression`.
`000E` then showed that burden rises when the system is pushed outside its narrowed successful context.

`000F` asks what kind of construct can hold both of those findings together.

The data now supports a stronger answer:

`successful expression appears to occur inside an operating envelope`

That envelope is not yet fully mapped, but it already explains more than surface category alone:

- it explains why late successful running clustered so tightly
- it explains why the October 2025 outside cluster carried high burden
- it explains why the April 2026 outside-surface probe did not behave like a model-breaking contradiction

The April boundary case is especially important.
If surface were the whole construct, then leaving treadmill context should have raised burden automatically.
It did not.

That means the archive is likely describing something broader than place:

- contextual embedding
- successful expression support
- and boundary-sensitive burden behavior

This is why `000F` matters.
It moves the program from:

- selective expression happened

to:

- successful expression appears to occupy a bounded operating region

That is a theory-building step, not just another observation.
"""

    limitations = """# Study 000F Limitations

- This remains an `n=1` longitudinal archive.
- The operating-envelope construct is strongly supported, but not every axis of that envelope is fully mapped.
- The outside-surface burden exception set is small.
- The April 2026 boundary case is highly informative, but still a single case.
- This study establishes that `operating envelope` is a better explanatory construct than `surface alone`; it does not fully prove mechanism.
- Transferability is theoretically supported, not experimentally validated in another cohort.
"""

    pls = """# Study 000F Plain-Language Summary

This study asked:

`Is the important thing really treadmill versus outdoor, or is it a bigger concept than that?`

The data supports the bigger concept.

The stronger explanation is:

`successful running happened inside an operating envelope`

That means the system did not just prefer one surface.
It appears to have needed a certain range of supportive conditions in order to express running with lower burden.

That is why the April 2026 outdoor run matters so much:
it behaved like a boundary case, not like proof that the whole model was wrong.
"""

    audit = """# Study 000F Audit

## Structural Audit

This package is structurally sound.

It includes:

- manuscript
- abstract
- methods
- results
- discussion
- limitations
- plain-language summary
- copied source tables
- generated outputs
- generated figures
- build script
- manifest

## Scientific Audit

### Question

Do the combined findings from `000D` and `000E` support the existence of a successful operating envelope rather than a simple place, surface, or treadmill-versus-outdoor distinction?

### Was the question answered?

Yes.

The strongest supported answer is:

`Yes. The combined archive supports a successful operating envelope as a better explanatory construct than surface category alone.`

### Is the answer stronger than the data allows?

No.

The package does not claim that every axis of the envelope is settled.
It claims only that:

- surface alone is insufficient
- operating envelope explains more of the archive
- the April 2026 exception sharpens rather than destroys the model

### Does the package avoid scope creep?

Yes.

It does not try to fully settle:

- cadence preservation mechanism
- ecological narrowing mechanism
- next-day burden mechanism
- hybrid-cardio support function

Those remain later studies.

## Audit Verdict

`Study 000F` is structurally sound and scientifically coherent as a flagship hypothesis study.

It should be treated as the study that tests whether `operating envelope` becomes a true program pillar.
"""

    submission = """# Study 000F Submission Guidance

## Best Fit

This study is a strong fit for:

- biomechanics interpretation
- altered-mechanics theory
- movement science
- wearable-data interpretation
- rehabilitation theory building

## Strongest framing

`Within this altered-mechanics archive, successful running expression was better explained by a bounded operating envelope than by surface category alone.`

## What it can claim

- late successful expression clustered into a compact operating region
- outside-cluster burden rose strongly in the October 2025 probes
- the April 2026 boundary probe showed that surface departure did not automatically force high burden
- operating envelope explains more than treadmill-versus-outdoor language alone

## What it should not claim

- that every axis of the envelope is fully mapped
- that mechanism is completely settled
- that transferability is experimentally proven in other diagnoses
"""

    manuscript = f"""# Study 000F Manuscript

## Title

The Successful Operating Envelope Hypothesis

## Core Question

Do the combined findings from `000D` and `000E` support the existence of a successful operating envelope rather than a simple surface or environment distinction?

## Answer

{primary['dataset_answer']}

## Main Claim

{primary['summary_claim']}

## Background

`000D` concluded that the six-year altered-mechanics system adapted through `selective expression` rather than through broad normalization.
`000E` then showed that session-level burden rose when the system was pushed outside its narrowed successful context.

Those two studies together implied a larger question:

`What kind of construct best explains both selective expression and outside-context burden?`

This study tests whether the answer is a `successful operating envelope`.

## Findings

### 1. Successful expression clustered

The later archive did not broaden into free environmental tolerance.
It clustered into a compact successful region.

- `P5` running share: `{float(p5_row['running_share_pct']):.2f}%`
- `P5` treadmill share: `{float(p5_row['treadmill_running_share_pct']):.2f}%`
- late-window running share range: `50.58%` to `62.61%`

### 2. Burden rose beyond the successful region

The October 2025 outside cluster showed:

- running share only `{float(october['mean_running_share_28d_activity_pct']):.2f}%`
- treadmill-neighbor density only `{float(october['mean_treadmill_neighbors_21d']):.2f}`
- HR residual burden `{float(october['mean_hr_residual_bpm']):.2f} bpm`

That is materially different from the late successful cluster.

### 3. The April boundary case did not behave like a simple surface failure

`2026-04-09` kept outside-context mechanics but showed only `{float(april_case['hr_residual_bpm']):.2f} bpm` HR residual burden while embedded in:

- `{float(april_case['running_share_28d_activity_pct']):.2f}%` running share
- `{int(april_case['treadmill_neighbors_21d'])}` treadmill neighbors in `21` days

This is why surface category alone is too weak.

### 4. Surface-only language cannot organize the full outdoor set

Across all outdoor runs:

- burden ranged from `{outdoor_range_low:.2f}` to `{outdoor_range_high:.2f} bpm`

That means the same outdoor label carried both near-zero and strongly positive burden states.
The archive therefore supports a broader construct than `outdoor equals outside`.

## Conclusion

The stronger program-level conclusion is:

`successful expression in this altered-mechanics system appears to occupy a bounded operating envelope`

This does not settle every envelope axis or mechanism.
It does show that `operating envelope` is a better explanatory pillar than `surface category` alone.
"""

    (STUDY_DIR / "reports" / "STUDY000F_ABSTRACT.md").write_text(abstract, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_METHODS.md").write_text(methods, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_RESULTS.md").write_text(results, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_DISCUSSION.md").write_text(discussion, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_LIMITATIONS.md").write_text(limitations, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_PLAIN_LANGUAGE_SUMMARY.md").write_text(pls, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_AUDIT.md").write_text(audit, encoding="utf-8")
    (STUDY_DIR / "reports" / "STUDY000F_SUBMISSION_GUIDANCE.md").write_text(submission, encoding="utf-8")
    (STUDY_DIR / "manuscript" / "STUDY000F_MANUSCRIPT.md").write_text(manuscript, encoding="utf-8")


def write_zip() -> None:
    if ZIP_PATH.exists():
        ZIP_PATH.unlink()
    with zipfile.ZipFile(ZIP_PATH, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for path in STUDY_DIR.rglob("*"):
            zf.write(path, path.relative_to(ROOT))


def main() -> None:
    ensure_dirs()
    copied = copy_sources()
    outputs = build_outputs(copied)

    write_csv(
        STUDY_DIR / "outputs" / "study000f_success_formation_table.csv",
        outputs["formation_rows"],
        [
            "context_label",
            "context_level",
            "running_share_pct",
            "treadmill_running_share_pct",
            "outdoor_running_share_pct",
            "running_hours_28d_mean",
            "total_activity_hours_28d_mean",
            "mean_resting_hr",
            "mean_hr_residual_bpm",
            "mean_speed_per_hr",
            "interpretation",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_boundary_expression_table.csv",
        outputs["merged_cases"],
        [
            "calendar_date",
            "year_month",
            "cluster_label",
            "envelope_position",
            "later_specialized_outdoor_subset",
            "distance_miles_norm",
            "speed_mps_norm",
            "avg_hr_bpm",
            "hr_residual_bpm",
            "hr_residual_pct",
            "running_share_28d_activity_pct",
            "total_activity_hours_28d_rebuilt",
            "speed_per_hr",
            "cadence_residual_pct",
            "stride_residual_pct",
            "vertical_ratio_residual_pct",
            "treadmill_neighbors_21d",
            "matched_treadmill_count",
            "matched_treadmill_mean_hr_bpm",
            "matched_treadmill_mean_hr_residual_bpm",
            "outdoor_minus_matched_treadmill_hr_bpm",
            "month_running_share_28d_activity_pct",
            "month_hr_residual_bpm",
            "month_speed_per_hr",
            "next_day_resting_hr",
            "next_day_sleep_score",
            "next_day_hrv",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_boundary_cluster_summary.csv",
        outputs["boundary_summary_rows"],
        [
            "context_label",
            "n_runs",
            "mean_hr_residual_bpm",
            "mean_running_share_28d_activity_pct",
            "mean_total_activity_hours_28d",
            "mean_treadmill_neighbors_21d",
            "mean_speed_mps_norm",
            "mean_distance_miles_norm",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_evidence_family_matrix.csv",
        outputs["evidence_rows"],
        [
            "evidence_family_id",
            "evidence_family",
            "question",
            "observed_support",
            "support_level",
            "implication",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_explanatory_power_matrix.csv",
        outputs["explanatory_rows"],
        [
            "observation_id",
            "observation_label",
            "surface_category_explanation",
            "operating_envelope_explanation",
            "preferred_construct",
            "why",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_primary_answer.csv",
        outputs["primary_rows"],
        ["study_id", "core_question", "dataset_answer", "summary_claim", "key_values"],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_scope_claims.csv",
        outputs["scope_rows"],
        ["claim_id", "claim_label", "question", "answer", "status"],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_discovered_questions.csv",
        outputs["discovered_rows"],
        ["question_id", "status", "born_from", "question", "why_it_matters", "notes"],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000f_meta_summary.csv",
        outputs["meta_rows"],
        ["metric", "value"],
    )

    write_scatter_context_chart(
        STUDY_DIR / "figures" / "figure01_operating_state_burden_scatter.svg",
        outputs["merged_cases"],
        outputs["formation_rows"],
    )
    write_neighbors_chart(
        STUDY_DIR / "figures" / "figure02_embedding_vs_burden.svg",
        outputs["merged_cases"],
    )
    write_share_transition_chart(
        STUDY_DIR / "figures" / "figure03_envelope_formation_running_share.svg",
        outputs["formation_rows"],
    )

    shutil.copy2(Path(__file__), STUDY_DIR / "analysis" / Path(__file__).name)
    write_manifest(copied)
    write_readme()
    write_text_files(outputs)
    write_zip()


if __name__ == "__main__":
    main()