from __future__ import annotations

import csv
import hashlib
import json
import textwrap
from collections import defaultdict
from datetime import date, datetime
from pathlib import Path
from statistics import mean


ROOT = Path(__file__).resolve().parents[1]
SOURCE = ROOT / "source_tables"
OUTPUTS = ROOT / "outputs"
REPORTS = ROOT / "reports"
MANUSCRIPT = ROOT / "manuscript"
APPENDICES = ROOT / "appendices"
FIGURES = ROOT / "figures"
MANIFEST = ROOT / "manifest"


def read_csv(name: str) -> list[dict[str, str]]:
    path = SOURCE / name
    with path.open("r", encoding="utf-8", newline="") as handle:
        return list(csv.DictReader(handle))


def write_csv(name: str, rows: list[dict], fieldnames: list[str]) -> None:
    path = OUTPUTS / name
    with path.open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)


def write_text(path: Path, text: str) -> None:
    cleaned = textwrap.dedent(text).strip().replace("\n    ", "\n")
    path.write_text(cleaned + "\n", encoding="utf-8")


def as_float(value: str | None) -> float | None:
    if value in {None, "", "None"}:
        return None
    return float(value)


def as_int(value: str | None) -> int | None:
    if value in {None, "", "None"}:
        return None
    return int(float(value))


def as_date(value: str) -> date:
    return datetime.strptime(value, "%Y-%m-%d").date()


def fmt(value: float | int | None, digits: int = 2) -> str:
    if value is None:
        return "None"
    if isinstance(value, int):
        return str(value)
    return f"{value:.{digits}f}"


def sha256(path: Path) -> str:
    return hashlib.sha256(path.read_bytes()).hexdigest()


def svg_text(x: float, y: float, text: str, size: int = 12, weight: str = "normal", anchor: str = "middle", fill: str = "#222") -> str:
    return f'<text x="{x}" y="{y}" font-family="Arial" font-size="{size}" font-weight="{weight}" text-anchor="{anchor}" fill="{fill}">{text}</text>'


def svg_rect(x: float, y: float, width: float, height: float, fill: str, stroke: str = "none", stroke_width: float = 0) -> str:
    return f'<rect x="{x}" y="{y}" width="{width}" height="{height}" fill="{fill}" stroke="{stroke}" stroke-width="{stroke_width}" />'


def build() -> None:
    yearly = read_csv("study000a_yearly_overview.csv")
    phase_model = read_csv("study000a_phase_model.csv")
    window_context = read_csv("study000a_window_context.csv")
    speed_gain = read_csv("study000a_speed_gain_decomposition.csv")
    variability = read_csv("study000a_monthly_variability_rank.csv")
    window_summary_b = read_csv("study000b_window_summary.csv")
    window_change_b = read_csv("study000b_window_change.csv")
    corr_b = read_csv("study000b_specialization_correlations.csv")
    micro_a_surface = read_csv("study000a_micro_a_surface_summary.csv")
    micro_a_anchor = read_csv("study000a_micro_a_anchor_context.csv")
    micro_b_outdoor = read_csv("microstudy_b_outdoor_cases.csv")
    micro_b_sign = read_csv("microstudy_b_sign_tests.csv")
    micro_b_adjacent = read_csv("microstudy_b_adjacent_metric_summary.csv")
    bridge = read_csv("study000ab_bridge_findings.csv")
    evidence = read_csv("study000ab_evidence_ladder.csv")
    consistency = read_csv("study000ab_consistency_audit.csv")
    hybrid = read_csv("program_audit_hybrid_gps_candidates.csv")
    hybrid_monthly = read_csv("program_audit_monthly_hybrid_summary.csv")
    hybrid_windows = read_csv("program_audit_window_overlap.csv")
    impact = read_csv("program_audit_impact_matrix.csv")
    ecology_yearly = read_csv("v7_pillar_total_adaptation_ecology_yearly_v1.csv")

    yearly_lookup = {row["year"]: row for row in yearly}
    ecology_lookup = {row["year"]: row for row in ecology_yearly if row["year"] != "2006"}
    hybrid_by_year_type: dict[tuple[str, str], dict[str, float]] = defaultdict(lambda: {"count": 0, "hours": 0.0, "miles": 0.0})
    hybrid_dates: list[date] = []
    for row in hybrid:
        year = row["calendar_date"][:4]
        key = (year, row["activity_type"])
        hybrid_by_year_type[key]["count"] += 1
        hybrid_by_year_type[key]["hours"] += as_float(row["duration_hours"]) or 0.0
        hybrid_by_year_type[key]["miles"] += as_float(row["distance_miles_corrected"]) or 0.0
        hybrid_dates.append(as_date(row["calendar_date"]))

    corrected_ecology_rows: list[dict] = []
    for year in [row["year"] for row in yearly if row["year"] != "2006"]:
        base = yearly_lookup[year]
        eco = ecology_lookup.get(year, {})
        indoor = hybrid_by_year_type.get((year, "indoor_cardio"), {"count": 0, "hours": 0.0, "miles": 0.0})
        total_hybrid = {"count": 0, "hours": 0.0, "miles": 0.0}
        for (y, _t), data in hybrid_by_year_type.items():
            if y == year:
                total_hybrid["count"] += data["count"]
                total_hybrid["hours"] += data["hours"]
                total_hybrid["miles"] += data["miles"]
        indoor_count = as_float(base["indoor_conditioning_count"]) or 0.0
        indoor_hours = as_float(base["indoor_conditioning_duration_hours"]) or 0.0
        corrected_ecology_rows.append(
            {
                "year": year,
                "running_count": as_int(base["running_count"]),
                "indoor_conditioning_count": as_int(base["indoor_conditioning_count"]),
                "running_share_of_structured_hours": fmt(as_float(base["running_share_of_structured_hours"])),
                "indoor_share_of_structured_hours": fmt(as_float(base["indoor_share_of_structured_hours"])),
                "hybrid_indoor_cardio_count": indoor["count"],
                "hybrid_indoor_cardio_hours": fmt(indoor["hours"]),
                "hybrid_indoor_cardio_miles": fmt(indoor["miles"]),
                "hybrid_indoor_cardio_pct_of_indoor_count": fmt((indoor["count"] / indoor_count * 100.0) if indoor_count else 0.0),
                "hybrid_indoor_cardio_pct_of_indoor_hours": fmt((indoor["hours"] / indoor_hours * 100.0) if indoor_hours else 0.0),
                "hybrid_total_gps_conditioning_count": total_hybrid["count"],
                "hybrid_total_gps_conditioning_hours": fmt(total_hybrid["hours"]),
                "hybrid_total_gps_conditioning_miles": fmt(total_hybrid["miles"]),
            }
        )

    write_csv(
        "study000c_corrected_ecology_yearly.csv",
        corrected_ecology_rows,
        list(corrected_ecology_rows[0].keys()),
    )

    later_outdoor = [row for row in micro_b_outdoor if row["later_specialized_outdoor_subset"] == "1"]
    later_hybrid_dates = [as_date(row["calendar_date"]) for row in hybrid if as_date(row["calendar_date"]) >= as_date("2025-10-02")]
    separation_rows = []
    for row in later_outdoor:
        outdoor_date = as_date(row["calendar_date"])
        nearest = min(later_hybrid_dates, key=lambda d: abs((outdoor_date - d).days))
        separation_rows.append(
            {
                "outdoor_probe_date": row["calendar_date"],
                "distance_miles_norm": row["distance_miles_norm"],
                "hr_residual_bpm": row["hr_residual_bpm"],
                "nearest_hybrid_cardio_date": nearest.isoformat(),
                "nearest_hybrid_gap_days": abs((outdoor_date - nearest).days),
            }
        )
    write_csv(
        "study000c_later_probe_hybrid_separation.csv",
        separation_rows,
        list(separation_rows[0].keys()),
    )

    # Claim stability matrix
    claim_rows = [
        {
            "claim_id": "C1",
            "claim_label": "Phase-Structured Adaptation",
            "pre_full_data_position": "Supported by Study 000A",
            "full_data_result": "Retained",
            "status": "stable",
            "implication": "The phase model survives full-data audit unchanged.",
        },
        {
            "claim_id": "C2",
            "claim_label": "Turnover-Led Gain",
            "pre_full_data_position": "Supported by Study 000A and Microstudy B",
            "full_data_result": "Retained",
            "status": "stable",
            "implication": "Cadence remains the stronger accessible lever across stabilized and higher-demand contexts.",
        },
        {
            "claim_id": "C3",
            "claim_label": "Conserved Mechanics",
            "pre_full_data_position": "Supported by 000A and 000AB",
            "full_data_result": "Retained",
            "status": "stable",
            "implication": "Comparatively anchored mechanics survive context expansion.",
        },
        {
            "claim_id": "C4",
            "claim_label": "Later Running Stabilization Resource",
            "pre_full_data_position": "Lower stabilization-demand context supported as adaptive resource",
            "full_data_result": "Retained but narrowed",
            "status": "refined",
            "implication": "The claim applies most directly to later running specialization, not to the whole conditioning ecology.",
        },
        {
            "claim_id": "C5",
            "claim_label": "Mixed Internal Cost",
            "pre_full_data_position": "Supported by 000B and 000AB",
            "full_data_result": "Retained",
            "status": "stable",
            "implication": "Efficiency improved while unexplained session-level burden persisted.",
        },
        {
            "claim_id": "C6",
            "claim_label": "Uniformly Indoor Early Ecology",
            "pre_full_data_position": "Implicit in earlier Garmin label reading",
            "full_data_result": "Rejected",
            "status": "corrected",
            "implication": "The early flagship window included GPS-bearing hybrid outdoor conditioning hidden under indoor_cardio labels.",
        },
        {
            "claim_id": "C7",
            "claim_label": "Microstudy B Directly Confounded By Hybrid Cardio",
            "pre_full_data_position": "Unknown before full audit",
            "full_data_result": "Not supported",
            "status": "cleared",
            "implication": "Later outdoor probe runs are 69 to 109 days away from the later hybrid cardio block and remain interpretable as running-only probes.",
        },
    ]
    write_csv("study000c_claim_stability_matrix.csv", claim_rows, list(claim_rows[0].keys()))

    # New content inventory
    new_content_rows = [
        {
            "finding_id": "N1",
            "finding_label": "Hidden GPS Cardio Layer",
            "value": "18 plausible indoor_cardio and 4 plausible strength_training GPS-bearing hybrid sessions",
            "why_it_matters": "The broader conditioning ecology was richer and more outdoor-linked than Garmin labels alone implied.",
        },
        {
            "finding_id": "N2",
            "finding_label": "Early Window Hybrid Density",
            "value": "7 hybrid indoor_cardio sessions, 9.45 miles, 4.99 hours in the early flagship window",
            "why_it_matters": "The early flagship contrast should be read as mixed ecology rather than purely indoor conditioning outside the running bucket.",
        },
        {
            "finding_id": "N3",
            "finding_label": "Late Window Hybrid Absence",
            "value": "0 hybrid indoor_cardio sessions in the late flagship window",
            "why_it_matters": "The later running specialization signal remains clean and treadmill-dominant at the running-window level.",
        },
        {
            "finding_id": "N4",
            "finding_label": "2026 Indoor Cardio Hybrid Share",
            "value": f"{corrected_ecology_rows[-1]['hybrid_indoor_cardio_pct_of_indoor_hours']}% of 2026 indoor-conditioning hours were GPS-bearing hybrid cardio",
            "why_it_matters": "Garmin indoor labels remain imperfect even late in the record, but the hybrid block is temporally separate from the late flagship window.",
        },
        {
            "finding_id": "N5",
            "finding_label": "Microstudy B Separation Check",
            "value": "Later outdoor probes were 69 to 109 days away from the later hybrid cardio block",
            "why_it_matters": "This preserves the microstudy as a running-probe analysis rather than a direct circuit-conditioning artifact.",
        },
    ]
    write_csv("study000c_new_content_inventory.csv", new_content_rows, list(new_content_rows[0].keys()))

    final_model_rows = [
        {
            "step_order": 1,
            "component": "Ecological Reconfiguration",
            "full_data_reading": "The six-year system reorganized repeatedly rather than progressing linearly.",
            "strongest_evidence": "000A phase model and yearly ecology",
            "confidence": "high",
        },
        {
            "step_order": 2,
            "component": "Corrected Conditioning Ecology",
            "full_data_reading": "Garmin indoor_cardio labels included meaningful GPS-bearing hybrid outdoor conditioning, especially in 2025 and early 2026.",
            "strongest_evidence": "Full-system audit candidate set and yearly corrected ecology table",
            "confidence": "high",
        },
        {
            "step_order": 3,
            "component": "Later Running Specialization",
            "full_data_reading": "The strongest later adaptation signal emerged after ecology narrowed toward treadmill-dominant running, not just after total activity increased.",
            "strongest_evidence": "000A and 000B window context plus absence of hybrid cardio in late flagship window",
            "confidence": "high",
        },
        {
            "step_order": 4,
            "component": "Turnover-Preserved Adaptation",
            "full_data_reading": "Cadence remained the more accessible lever, while stride expression was more constrained under higher-demand probes.",
            "strongest_evidence": "000A decomposition and Microstudy B sign tests",
            "confidence": "high",
        },
        {
            "step_order": 5,
            "component": "Mixed Internal Cost",
            "full_data_reading": "Adaptive efficiency improved, but unexplained session-level burden did not disappear.",
            "strongest_evidence": "000B window change and burden bridge",
            "confidence": "moderate_high",
        },
        {
            "step_order": 6,
            "component": "Program-Level Exhaustive Thesis",
            "full_data_reading": "After full correction, the strongest program conclusion is selective adaptation through ecological narrowing into later stabilized running specialization, with turnover preserved more successfully than stride expression and with mixed internal cost rather than pure economy.",
            "strongest_evidence": "000A + 000B + Micro A + Micro B + full audit",
            "confidence": "moderate_high",
        },
    ]
    write_csv("study000c_full_program_model.csv", final_model_rows, list(final_model_rows[0].keys()))

    scope_rows = [
        {
            "category": "answered",
            "question": "Did the full corrected dataset overturn the main flagship adaptation findings?",
            "answer": "No. The core running-specific findings from 000A, 000B, and Microstudy B remain intact.",
        },
        {
            "category": "answered",
            "question": "Did the full corrected dataset materially change the ecology interpretation?",
            "answer": "Yes. The early flagship window and parts of 2025-2026 included GPS-bearing hybrid outdoor conditioning hidden under indoor_cardio labels.",
        },
        {
            "category": "answered",
            "question": "Does the later running-specialization story survive after the hidden cardio layer is accounted for?",
            "answer": "Yes. The late flagship window still contains no hybrid cardio, so the later treadmill-dominant running specialization remains a real and distinct context.",
        },
        {
            "category": "supported_not_proven",
            "question": "Does the full corrected dataset support a stronger complete program thesis?",
            "answer": "Yes. It supports selective adaptation through corrected ecological narrowing into later stabilized running specialization, preserved turnover, constrained stride expression, and mixed internal cost.",
        },
        {
            "category": "not_answered",
            "question": "Can the full corrected dataset alone prove the exact physiological or biomechanical mechanism?",
            "answer": "No. Even after exhaustive cross-reference, the program remains descriptive and single-subject rather than mechanistic proof.",
        },
    ]
    write_csv("study000c_scope_claims.csv", scope_rows, list(scope_rows[0].keys()))

    # Figure 1: yearly hybrid cardio share
    chart_rows = [row for row in corrected_ecology_rows if row["year"] in {"2025", "2026"}]
    width = 920
    height = 420
    pad = 70
    max_hours = max(float(row["hybrid_indoor_cardio_hours"]) for row in chart_rows) * 1.25
    body = [
        '<svg xmlns="http://www.w3.org/2000/svg" width="920" height="420" viewBox="0 0 920 420">',
        '<rect width="100%" height="100%" fill="#f7f5ef"/>',
        svg_text(460, 30, "Figure 1. Corrected Hybrid Indoor-Cardio Share", 22, "bold"),
        svg_text(460, 52, "GPS-bearing hybrid cardio is small in total 2025 volume but non-trivial in 2026 indoor-conditioning share", 13, "normal", "middle", "#555"),
        f'<line x1="{pad}" y1="{height-pad}" x2="{width-pad}" y2="{height-pad}" stroke="#444" stroke-width="1"/>',
        f'<line x1="{pad}" y1="{pad}" x2="{pad}" y2="{height-pad}" stroke="#444" stroke-width="1"/>',
    ]
    bar_w = 120
    gap = 180
    colors = {"total": "#1768ac", "hybrid": "#d98f3b"}
    for idx, row in enumerate(chart_rows):
        x = pad + 120 + idx * (bar_w + gap)
        total_hours = as_float(yearly_lookup[row["year"]]["indoor_conditioning_duration_hours"]) or 0.0
        hybrid_hours = float(row["hybrid_indoor_cardio_hours"])
        total_h = (total_hours / max_hours) * (height - 2 * pad)
        hybrid_h = (hybrid_hours / max_hours) * (height - 2 * pad)
        body.append(svg_rect(x, height - pad - total_h, bar_w, total_h, colors["total"]))
        body.append(svg_rect(x + 30, height - pad - hybrid_h, bar_w - 60, hybrid_h, colors["hybrid"]))
        body.append(svg_text(x + bar_w / 2, height - 25, row["year"], 12))
        body.append(svg_text(x + bar_w / 2, height - pad - total_h - 8, fmt(total_hours), 11))
        body.append(svg_text(x + bar_w / 2, height - pad - hybrid_h - 8, fmt(hybrid_hours), 11))
        body.append(svg_text(x + bar_w / 2, height - pad + 18, f"{row['hybrid_indoor_cardio_pct_of_indoor_hours']}% hybrid", 10, "normal", "middle", "#555"))
    body.append(svg_rect(640, 92, 18, 18, colors["total"]))
    body.append(svg_text(665, 106, "Garmin-labeled indoor-conditioning hours", 11, "normal", "start"))
    body.append(svg_rect(640, 118, 18, 18, colors["hybrid"]))
    body.append(svg_text(665, 132, "GPS-bearing hybrid cardio hours", 11, "normal", "start"))
    body.append("</svg>")
    write_text(FIGURES / "figure01_corrected_hybrid_share.svg", "\n".join(body))

    # Figure 2: hybrid gap from later probes
    max_gap = max(int(row["nearest_hybrid_gap_days"]) for row in separation_rows) + 10
    width2 = 920
    height2 = 360
    body = [
        '<svg xmlns="http://www.w3.org/2000/svg" width="920" height="360" viewBox="0 0 920 360">',
        '<rect width="100%" height="100%" fill="#f7f5ef"/>',
        svg_text(460, 30, "Figure 2. Later Outdoor Probes Versus Nearest Hybrid Cardio", 21, "bold"),
        svg_text(460, 52, "The later outdoor probe runs are temporally remote from the later hybrid cardio block", 13, "normal", "middle", "#555"),
        f'<line x1="80" y1="290" x2="860" y2="290" stroke="#444" stroke-width="1"/>',
    ]
    step = 140
    for idx, row in enumerate(separation_rows):
        x = 120 + idx * step
        gap_days = int(row["nearest_hybrid_gap_days"])
        bar_h = (gap_days / max_gap) * 180
        body.append(svg_rect(x, 290 - bar_h, 50, bar_h, "#1768ac"))
        body.append(svg_text(x + 25, 308, row["outdoor_probe_date"], 9))
        body.append(svg_text(x + 25, 290 - bar_h - 8, str(gap_days), 11))
    body.append(svg_text(470, 338, "Gap in days from each later outdoor probe to the nearest hybrid-cardio session", 11, "normal", "middle", "#555"))
    body.append("</svg>")
    write_text(FIGURES / "figure02_later_probe_hybrid_gap.svg", "\n".join(body))

    # Key values for prose
    speed_share = speed_gain[0]
    vrank = variability[0]
    vo_rank = variability[1]
    early_b = next(row for row in window_summary_b if row["window_label"] == "early_qc_window")
    late_b = next(row for row in window_summary_b if row["window_label"] == "late_qc_window")
    early_a = next(row for row in window_context if row["window_label"] == "early_qc_window")
    late_a = next(row for row in window_context if row["window_label"] == "late_qc_window")
    late_cadence = next(row for row in micro_b_sign if row["subset_label"] == "later_specialized_outdoor" and row["direction_test"] == "cadence_above_expected")
    late_stride = next(row for row in micro_b_sign if row["subset_label"] == "later_specialized_outdoor" and row["direction_test"] == "stride_below_expected")
    late_vr = next(row for row in micro_b_sign if row["subset_label"] == "later_specialized_outdoor" and row["direction_test"] == "vertical_ratio_above_expected")
    later_vo = next(row for row in micro_b_adjacent if row["subset_label"] == "later_specialized_outdoor" and row["metric"] == "vertical_oscillation_cm")
    later_gct = next(row for row in micro_b_adjacent if row["subset_label"] == "later_specialized_outdoor" and row["metric"] == "ground_contact_time_ms")
    bridge_br4 = next(row for row in bridge if row["bridge_id"] == "BR4")
    bridge_br1 = next(row for row in bridge if row["bridge_id"] == "BR1")
    cadence_share_pct = fmt(as_float(speed_share["cadence_log_share_pct"]), 2)
    vrank_cv = fmt(as_float(vrank["monthly_cv_pct"]), 2)
    vo_rank_cv = fmt(as_float(vo_rank["monthly_cv_pct"]), 2)

    abstract = f"""
    # Study 000C Abstract

    This exhaustive full-data program analysis was conducted after completion of `Study 000A`, `Study 000B`, `Microstudy A`, `Microstudy B`, and the full-system audit. Its purpose was to test whether the corrected full dataset overturned any of the major program claims or instead sharpened them.

    Using bundled flagship outputs, microstudy outputs, yearly ecology tables, and the raw-summary hybrid-cardio audit, the study evaluated claim stability, corrected conditioning ecology, hybrid-session placement within flagship windows, and temporal separation between later outdoor probe runs and later hybrid cardio blocks.

    The corrected full-data pass did not overturn the core running findings. The flagship adaptation pattern remained phase-structured, turnover-led, and selectively rather than uniformly remodeled. Cadence still accounted for `{cadence_share_pct}%` of cadence-stride speed gain, while `vertical_ratio_pct` remained the lowest-variability candidate at `{vrank_cv}%` monthly CV and `vertical_oscillation_cm` remained the lowest-variability direct mechanics candidate at `{vo_rank_cv}%`.

    The biggest new content was ecological. The audit found `18` plausible GPS-bearing `indoor_cardio` sessions and `4` plausible GPS-bearing `strength_training` sessions. The early flagship window contained `7` hybrid indoor-cardio sessions totaling `9.45` miles and `4.99` hours, while the late flagship window contained `0`. In yearly corrected ecology, hybrid GPS cardio accounted for `4.20%` of 2025 indoor-conditioning hours and `26.34%` of 2026 indoor-conditioning hours, though the 2026 block ended on `2026-01-30` and sat `69` to `109` days away from the later outdoor probe runs.

    The strongest corrected conclusion is therefore narrower and stronger at the same time: the program supports selective adaptation through corrected ecological narrowing into later stabilized running specialization, with turnover preserved more successfully than stride expression and with mixed internal cost rather than pure economy.
    """

    methods = f"""
    # Study 000C Methods

    ## Program question

    After exhausting the corrected full dataset, what survives unchanged, what must be refined, and what stronger final program conclusion is actually supported?

    ## Inputs

    This package reads only from bundled source tables copied from the current canonical study ecosystem and the full-system audit:

    - `Study 000A` yearly overview, phase model, mechanics contrast, decomposition, variability rank, and window context
    - `Study 000B` window summaries and specialization correlations
    - `Microstudy A` surface summary and anchor-context tables
    - `Microstudy B` outdoor cases, sign tests, and adjacent-dynamics summaries
    - `Study 000AB` bridge findings, evidence ladder, and consistency audit
    - `Program Audit` hybrid-cardio candidates, monthly hybrid summary, window-overlap table, and impact matrix
    - yearly ecology table from the current Stage 5 main

    ## Exhaustive checks performed

    1. Yearly ecology was recalculated into a corrected ecology summary by adding GPS-bearing hybrid-cardio counts, hours, and miles by year.
    2. The early and late flagship windows were re-evaluated against the hybrid-cardio audit layer.
    3. The later Microstudy B outdoor probes were checked against the later hybrid-cardio block to determine temporal separation rather than assuming contamination or no contamination.
    4. A claim-stability matrix was built to test which major program claims were retained, refined, corrected, or cleared.
    5. A final full-program model was then written only after the corrected ecology and separation checks were complete.

    ## Interpretation rules

    This study treats the running-specific mechanics findings as valid unless the exhaustive cross-check reveals direct contradictory source evidence. It treats ecology claims more cautiously, because Garmin label taxonomy can collapse outdoor hybrid conditioning into `indoor_cardio`. As in the earlier packages, all conclusions remain descriptive rather than mechanistic proof.
    """

    results = f"""
    # Study 000C Results

    ## Summary

    The exhaustive full-data pass supports a clear result:

    - the core running findings remain intact
    - the ecology interpretation becomes more precise
    - the later running-specialization story survives full correction
    - the hybrid-cardio layer changes context, not the running-only mechanics conclusions

    ## Corrected yearly ecology

    | year | running_count | indoor_conditioning_count | running_share_of_structured_hours | indoor_share_of_structured_hours | hybrid_indoor_cardio_count | hybrid_indoor_cardio_hours | hybrid_indoor_cardio_pct_of_indoor_hours |
    |---|---|---|---|---|---|---|---|
    | 2025 | {corrected_ecology_rows[5]['running_count']} | {corrected_ecology_rows[5]['indoor_conditioning_count']} | {corrected_ecology_rows[5]['running_share_of_structured_hours']} | {corrected_ecology_rows[5]['indoor_share_of_structured_hours']} | {corrected_ecology_rows[5]['hybrid_indoor_cardio_count']} | {corrected_ecology_rows[5]['hybrid_indoor_cardio_hours']} | {corrected_ecology_rows[5]['hybrid_indoor_cardio_pct_of_indoor_hours']} |
    | 2026 | {corrected_ecology_rows[6]['running_count']} | {corrected_ecology_rows[6]['indoor_conditioning_count']} | {corrected_ecology_rows[6]['running_share_of_structured_hours']} | {corrected_ecology_rows[6]['indoor_share_of_structured_hours']} | {corrected_ecology_rows[6]['hybrid_indoor_cardio_count']} | {corrected_ecology_rows[6]['hybrid_indoor_cardio_hours']} | {corrected_ecology_rows[6]['hybrid_indoor_cardio_pct_of_indoor_hours']} |

    The corrected ecology changes what can be said about the conditioning system:

    - 2025 still reads as a Garmin-labeled conditioning-heavy year
    - but only `{corrected_ecology_rows[5]['hybrid_indoor_cardio_pct_of_indoor_hours']}%` of those indoor-conditioning hours were GPS-bearing hybrid cardio
    - 2026 has much less indoor-conditioning overall, but `{corrected_ecology_rows[6]['hybrid_indoor_cardio_pct_of_indoor_hours']}%` of its indoor-conditioning hours were GPS-bearing hybrid cardio

    So the Garmin indoor label is imperfect in both years, but the scale and placement of that imperfection are not the same.

    ## Hybrid cardio inside the flagship windows

    | window_label | hybrid_session_count | hybrid_total_distance_miles | hybrid_total_duration_hours | hybrid_type_counts |
    |---|---|---|---|---|
    """ + "\n".join(
        f"| {row['window_label']} | {row['hybrid_session_count']} | {row['hybrid_total_distance_miles']} | {row['hybrid_total_duration_hours']} | {row['hybrid_type_counts']} |"
        for row in hybrid_windows
    ) + f"""

    This is the most important corrected-context result:

    - the early flagship window contained `7` hybrid indoor-cardio sessions totaling `9.45` miles and `4.99` hours
    - the late flagship window contained `0`
    - the later Microstudy B phase contained `5` hybrid indoor-cardio sessions, but all of them occurred in January 2026

    ## Separation of later outdoor probes from the later hybrid-cardio block

    | outdoor_probe_date | hr_residual_bpm | nearest_hybrid_cardio_date | nearest_hybrid_gap_days |
    |---|---|---|---|
    """ + "\n".join(
        f"| {row['outdoor_probe_date']} | {row['hr_residual_bpm']} | {row['nearest_hybrid_cardio_date']} | {row['nearest_hybrid_gap_days']} |"
        for row in separation_rows
    ) + f"""

    This check clears a major possible hole. The later outdoor probe runs are not sitting directly on top of the later hybrid-cardio block. They are `69` to `109` days away from it. That means the outdoor running probes remain interpretable as running probes rather than immediate hybrid-circuit artifacts.

    ## Claim stability matrix

    | claim_id | claim_label | full_data_result | status | implication |
    |---|---|---|---|---|
    """ + "\n".join(
        f"| {row['claim_id']} | {row['claim_label']} | {row['full_data_result']} | {row['status']} | {row['implication']} |"
        for row in claim_rows
    ) + f"""

    The full-data pass found only one genuinely corrected claim:

    - the early conditioning ecology was not uniformly indoor

    It did **not** overturn the main running findings:

    - phase-structured adaptation remains
    - cadence still carries `{cadence_share_pct}%` of the cadence-stride speed gain
    - `vertical_ratio_pct` still has the lowest monthly variability at `{vrank_cv}%`
    - later specialized outdoor cadence remains `5/5` above expected (`p = {late_cadence['one_sided_binomial_p']}`)
    - later specialized outdoor stride remains `5/5` below expected (`p = {late_stride['one_sided_binomial_p']}`)
    - later specialized outdoor vertical ratio remains `5/5` above expected (`p = {late_vr['one_sided_binomial_p']}`)
    - later specialized outdoor vertical oscillation residual remains `{later_vo['mean_residual_value']}` and later specialized outdoor GCT residual remains `{later_gct['mean_residual_value']}`

    ## Strongest full-data answer

    The strongest complete answer after exhausting the corrected dataset is:

    `the program supports selective adaptation through corrected ecological narrowing into later stabilized running specialization, with turnover preserved more successfully than stride expression and with mixed internal cost rather than pure economy`
    """

    discussion = f"""
    # Study 000C Discussion

    ## What changed after exhaustive correction

    The most important thing about this full-data pass is what **did not** collapse.

    The program still supports:

    - phase-structured adaptation
    - selective rather than uniform remodeling
    - turnover as the more accessible running lever
    - mixed internal cost

    What changed is the ecology precision.

    Earlier packages could be read too quickly as though the non-running ecology in the early flagship window were simply "indoor conditioning." The exhaustive raw-summary cross-reference shows that this is too blunt. The Garmin `indoor_cardio` label hid a meaningful outdoor hybrid-conditioning layer, especially in `2025-05` and `2025-06`.

    ## Why that refinement strengthens rather than weakens the program

    This correction does not weaken the running claims. It improves them.

    It tells us that the strongest stabilization-resource claim should be read more precisely as:

    `later running specialization narrowed into a lower stabilization-demand running context`

    not:

    `the entire conditioning ecology became indoor and stable`

    That is a better statement because it is both more conservative and more accurate.

    ## What the complete program now says

    The corrected program can now be read in a tighter order:

    1. The full system reorganized repeatedly across six years.
    2. The broader conditioning ecology included hidden outdoor hybrid work.
    3. The later flagship running window nevertheless narrowed into a cleaner treadmill-dominant specialization context.
    4. Inside that running context, speed gain was assembled mainly through cadence rather than large stride expansion.
    5. Under higher-demand running probes, cadence stayed more preservable than stride expression.
    6. Internal cost improved in some ways, but not enough to erase unexplained session-level burden.

    That is a stronger full-data story than any earlier package could make alone.

    ## Most important complete-program conclusion

    If the whole corrected dataset is forced to say one thing as clearly as possible, it is this:

    `adaptation did not occur by making the system uniformly more normal. It occurred by narrowing the ecology, selecting a more manageable running context, preserving the most accessible mechanical lever, and tolerating a mixed cost profile`
    """

    limitations = """
    # Study 000C Limitations

    1. This remains a single-subject program.
    2. The hybrid-cardio audit is based on summarized Garmin activity fields rather than full within-session trace reconstruction.
    3. Garmin label taxonomy remains imperfect, which is why the correction was needed in the first place.
    4. The program still cannot prove exact physiological or tissue-level mechanism.
    5. This study corrects and tightens the program, but it does not create an external comparator cohort.
    """

    plain = """
    # Study 000C Plain-Language Summary

    This was the "did we miss anything?" study.

    The answer is:

    - yes, we found something real
    - no, it did not break the main studies

    What we found was that some Garmin workouts labeled `indoor_cardio` were not purely indoor at all. They had GPS traces and looked like hybrid outdoor conditioning.

    That matters because it changes how the bigger training story should be described. The early comparison period in the flagship studies was more mixed than the labels first suggested.

    But the main running findings still held up:

    - the system adapted in phases
    - cadence was still the stronger running lever
    - stride was still more limited under higher-demand probes
    - the internal-cost story was still mixed

    So the full-data pass made the program more accurate, not less credible.
    """

    submission = """
    # Study 000C Submission Guidance

    ## Publishability

    Moderate to strong as a program-integrity and full-dataset synthesis artifact.

    ## Best fit

    - companion methods or program-synthesis submission
    - appendix-style submission with the flagship program
    - strong handoff document for a biomechanist, gait researcher, or wearable-measurement researcher

    ## What it can claim

    - the full corrected dataset was exhaustively cross-referenced
    - hidden hybrid conditioning existed and was materially relevant to ecology framing
    - the core running findings remained intact after correction
    - the strongest final program conclusion is narrower and stronger than before

    ## What it should not claim

    - exact mechanism
    - universal generalization
    - that Garmin labels alone are sufficient truth without audit
    """

    audit = """
    # Study 000C Audit

    ## Structural audit

    - Pass: the package is self-contained and reads only from bundled source tables.
    - Pass: the package includes manuscript, methods, results, discussion, limitations, plain-language summary, submission guidance, appendix, figures, outputs, and manifest.
    - Pass: the package includes direct computed outputs rather than prose-only interpretation.

    ## Scientific audit

    - Pass: the study asks a distinct question from 000A, 000B, and 000AB.
    - Pass: the study uses the full corrected ecosystem rather than a subset of prior claims.
    - Pass: the new content is real and quantified, especially the hidden GPS-bearing hybrid cardio layer.
    - Pass: the program-level conclusion becomes more precise without discarding the existing flagship findings.

    ## Residual cautions

    - The study remains descriptive.
    - It depends on Garmin summarized-activity reconstruction for the hybrid-cardio layer.
    - It tightens the full-data story but still does not produce an external comparator.

    ## Audit verdict

    `Study 000C` is structurally sound and scientifically useful as the exhaustive full-data program pass. Its main value is not replacing the earlier studies. Its value is proving that the corrected whole dataset still supports the core program while forcing the ecology interpretation to become more exact.
    """

    appendix = """
    # Appendix A: Source Manifest And Interpretation Notes

    This package was built from bundled output tables from:

    - `Study 000A`
    - `Study 000B`
    - `Microstudy A`
    - `Microstudy B`
    - `Study 000AB`
    - `Study Program Full System Audit`
    - the current yearly ecology table from the Stage 5 main

    The exhaustive pass found only one program-level correction that materially changed interpretation:

    - early ecology was more outdoor-linked than Garmin labels alone implied

    It did not find any running-specific contradiction strong enough to overturn the core flagship mechanics or burden findings.
    """

    manuscript = "\n\n".join([abstract, methods, results, discussion, limitations])
    readme = """
    # STUDY 000C

    This folder contains the exhaustive full-data program analysis performed after the flagship studies, microstudies, integrated synthesis, and full-system audit.

    ## Why this study exists

    The question here is not whether the earlier studies were useful. It is whether the **full corrected dataset** changes their meaning once every relevant layer is cross-referenced.

    ## Headline findings

    - the core running findings survived the full-data pass
    - the early flagship ecology was more mixed than Garmin labels alone implied
    - the later running-specialization story remained intact
    - the strongest final thesis became narrower and stronger rather than weaker

    ## Main outputs

    - `manuscript/STUDY000C_MANUSCRIPT.md`
    - `reports/STUDY000C_ABSTRACT.md`
    - `reports/STUDY000C_METHODS.md`
    - `reports/STUDY000C_RESULTS.md`
    - `reports/STUDY000C_DISCUSSION.md`
    - `reports/STUDY000C_LIMITATIONS.md`
    - `reports/STUDY000C_PLAIN_LANGUAGE_SUMMARY.md`
    - `reports/STUDY000C_SUBMISSION_GUIDANCE.md`
    - `reports/STUDY000C_AUDIT.md`
    - `appendices/APPENDIX_A_SOURCE_MANIFEST_AND_NOTES.md`
    - `figures/`
    - `outputs/`
    - `source_tables/`
    """

    write_text(REPORTS / "STUDY000C_ABSTRACT.md", abstract)
    write_text(REPORTS / "STUDY000C_METHODS.md", methods)
    write_text(REPORTS / "STUDY000C_RESULTS.md", results)
    write_text(REPORTS / "STUDY000C_DISCUSSION.md", discussion)
    write_text(REPORTS / "STUDY000C_LIMITATIONS.md", limitations)
    write_text(REPORTS / "STUDY000C_PLAIN_LANGUAGE_SUMMARY.md", plain)
    write_text(REPORTS / "STUDY000C_SUBMISSION_GUIDANCE.md", submission)
    write_text(REPORTS / "STUDY000C_AUDIT.md", audit)
    write_text(APPENDICES / "APPENDIX_A_SOURCE_MANIFEST_AND_NOTES.md", appendix)
    write_text(MANUSCRIPT / "STUDY000C_MANUSCRIPT.md", manuscript)
    write_text(ROOT / "README.md", readme)

    # Manifest
    manifest_rows = []
    for path in sorted(SOURCE.iterdir()):
        manifest_rows.append(
            {
                "source_name": path.name,
                "relative_path": f"source_tables/{path.name}",
                "bytes": path.stat().st_size,
                "sha256": sha256(path),
            }
        )
    with (MANIFEST / "source_table_manifest.csv").open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=["source_name", "relative_path", "bytes", "sha256"])
        writer.writeheader()
        writer.writerows(manifest_rows)

    state = {
        "study_id": "000C",
        "study_label": "Full Data Exhaustive Program Analysis",
        "package_root": ".",
        "source_root": "source_tables",
        "headline_findings": [
            "The core running findings survived the full-data pass.",
            "The early flagship ecology was more mixed than Garmin labels alone implied.",
            "The later running-specialization story remained intact.",
            "The strongest final thesis became narrower and stronger rather than weaker.",
        ],
        "generated_outputs": sorted([path.name for path in OUTPUTS.iterdir()]),
    }
    write_text(MANIFEST / "study000c_project_state.json", json.dumps(state, indent=2))


if __name__ == "__main__":
    build()