from __future__ import annotations

import csv
import datetime as dt
import json
import shutil
from collections import defaultdict
from pathlib import Path


ROOT = Path(r"C:\BalancedTrim\analyzer")
PACKAGE_NAME = "STUDY_000G_CADENCE_PROTECTION_HYPOTHESIS_20260610"
PACKAGE_DIR = ROOT / PACKAGE_NAME
ZIP_PATH = ROOT / f"{PACKAGE_NAME}.zip"

MICRO_B_OUTPUTS = Path(
    r"C:\Study\Studies\STUDY_000B_MICROSTUDY_B_PRESERVED_TURNOVER_SUPPRESSED_STRIDE_20260609\outputs"
)
STUDY_E_OUTPUTS = Path(
    r"C:\Study\Studies\STUDY_000E_BURDEN_OUTSIDE_SUCCESSFUL_CONTEXT_20260609\outputs"
)
STUDY_F_OUTPUTS = Path(
    r"C:\Study\Studies\STUDY_000F_SUCCESSFUL_OPERATING_ENVELOPE_HYPOTHESIS_20260609\outputs"
)


def ensure_dirs() -> None:
    for subdir in ["analysis", "appendices", "figures", "manifest", "manuscript", "outputs", "reports", "source_tables"]:
        (PACKAGE_DIR / subdir).mkdir(parents=True, exist_ok=True)


def read_csv(path: Path) -> list[dict[str, str]]:
    with path.open(newline="", encoding="utf-8") as handle:
        return list(csv.DictReader(handle))


def write_csv(path: Path, rows: list[dict[str, object]], fieldnames: list[str]) -> None:
    with path.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)


def mean(values: list[float]) -> float:
    return sum(values) / len(values) if values else 0.0


def round_or_blank(value: float | None, digits: int = 4) -> float | str:
    if value is None:
        return ""
    return round(value, digits)


def escape_xml(text: str) -> str:
    return (
        str(text)
        .replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace('"', "&quot;")
    )


def build_outputs() -> dict[str, list[dict[str, object]]]:
    subset_summary = read_csv(MICRO_B_OUTPUTS / "microstudy_b_subset_summary.csv")
    sign_tests = read_csv(MICRO_B_OUTPUTS / "microstudy_b_sign_tests.csv")
    nearest_speed = read_csv(MICRO_B_OUTPUTS / "microstudy_b_nearest_speed_summary.csv")
    speed_band = read_csv(MICRO_B_OUTPUTS / "microstudy_b_speed_band_summary.csv")
    burden_sign = read_csv(STUDY_E_OUTPUTS / "study000e_burden_sign_tests.csv")
    burden_context = read_csv(STUDY_E_OUTPUTS / "study000e_context_burden_summary.csv")
    burden_details = read_csv(STUDY_E_OUTPUTS / "study000e_outdoor_probe_details.csv")
    boundary_table = read_csv(STUDY_F_OUTPUTS / "study000f_boundary_expression_table.csv")

    subset_map = {row["subset_label"]: row for row in subset_summary}
    nearest_map = {row["subset_label"]: row for row in nearest_speed}
    speed_band_map = {row["subset_label"]: row for row in speed_band}
    burden_map = {row["context_label"]: row for row in burden_context}

    october_rows = [row for row in boundary_table if row["cluster_label"] == "october_2025_outside_cluster"]
    april_rows = [row for row in boundary_table if row["cluster_label"] == "april_2026_boundary_probe"]
    early_rows = [row for row in boundary_table if row["cluster_label"] == "early_exploratory_outdoor"]

    october_cadence = [float(row["cadence_residual_pct"]) for row in october_rows]
    october_stride = [float(row["stride_residual_pct"]) for row in october_rows]
    october_hr = [float(row["hr_residual_bpm"]) for row in october_rows]
    april_cadence = [float(row["cadence_residual_pct"]) for row in april_rows]
    april_stride = [float(row["stride_residual_pct"]) for row in april_rows]
    april_hr = [float(row["hr_residual_bpm"]) for row in april_rows]

    primary_answer = [
        {
            "question_id": "Q007_Q017",
            "study_id": "000G",
            "core_question": "Is cadence the preserved control variable, and can preserved cadence fail to fully protect internal burden?",
            "answer": "yes_supported",
            "answer_plain": "Yes. Cadence behaves like the more preservable running control variable, stride behaves like the more sacrificial expression variable, and cadence preservation alone does not guarantee low internal burden.",
        }
    ]

    evidence_families = [
        {
            "family_id": "F1",
            "family_label": "later_specialized_directionality",
            "observation": "Later specialized outdoor probes kept cadence above treadmill-expected values in every case while stride stayed below expected values in every case.",
            "key_values": "cadence 5/5 positive, stride 5/5 negative, p=0.03125 for both",
            "supports_hypothesis": "strong",
        },
        {
            "family_id": "F2",
            "family_label": "paired_and_speed_band_agreement",
            "observation": "Nearest-speed and speed-band comparisons agree that cadence stays positive while stride stays negative under higher stabilization-demand probes.",
            "key_values": "nearest-speed cadence +16.0 spm, stride -0.1042 m; speed-band cadence +15.79 spm, stride -0.0976 m",
            "supports_hypothesis": "strong",
        },
        {
            "family_id": "F3",
            "family_label": "burden_persists_despite_cadence_preservation",
            "observation": "Later outside-context probes still showed positive HR residual burden even while cadence remained preserved.",
            "key_values": "5/5 positive HR residuals, mean +12.67 bpm, p=0.03125",
            "supports_hypothesis": "strong",
        },
        {
            "family_id": "F4",
            "family_label": "boundary_exception_reframes_secondary_question",
            "observation": "The April 2026 boundary probe preserved the same cadence-up / stride-down pattern but stayed near-zero burden.",
            "key_values": "cadence +10.22%, stride -9.28%, HR residual +0.69 bpm, running-share context 91.08%, 32 treadmill neighbors within 21 days",
            "supports_hypothesis": "strong",
        },
        {
            "family_id": "F5",
            "family_label": "envelope_modulation",
            "observation": "The same protected cadence pattern can coexist with either high or low burden depending on envelope support.",
            "key_values": "October cluster mean HR residual +13.88 bpm versus April boundary +0.69 bpm despite same directional mechanics",
            "supports_hypothesis": "strong",
        },
    ]

    mechanism_bridge = []
    for row in october_rows + april_rows:
        mechanism_bridge.append(
            {
                "calendar_date": row["calendar_date"],
                "cluster_label": row["cluster_label"],
                "envelope_position": row["envelope_position"],
                "cadence_residual_pct": round(float(row["cadence_residual_pct"]), 4),
                "stride_residual_pct": round(float(row["stride_residual_pct"]), 4),
                "vertical_ratio_residual_pct": round(float(row["vertical_ratio_residual_pct"]), 4),
                "hr_residual_bpm": round(float(row["hr_residual_bpm"]), 4),
                "running_share_28d_activity_pct": round(float(row["running_share_28d_activity_pct"]), 4),
                "treadmill_neighbors_21d": int(float(row["treadmill_neighbors_21d"] or 0)),
                "mechanical_pattern": "cadence_preserved_stride_suppressed"
                if float(row["cadence_residual_pct"]) > 0 and float(row["stride_residual_pct"]) < 0
                else "other",
                "burden_state": "positive_burden" if float(row["hr_residual_bpm"]) > 3 else "low_burden",
            }
        )

    hypothesis_tests = [
        {
            "test_label": "cadence_preservable_later_specialized",
            "successes": int(sign_tests[6]["successes"]),
            "trials": int(sign_tests[6]["trials"]),
            "one_sided_binomial_p": float(sign_tests[6]["one_sided_binomial_p"]),
            "interpretation": "Cadence stayed above treadmill-expected values in all later specialized outside-context probes.",
        },
        {
            "test_label": "stride_sacrificial_later_specialized",
            "successes": int(sign_tests[7]["successes"]),
            "trials": int(sign_tests[7]["trials"]),
            "one_sided_binomial_p": float(sign_tests[7]["one_sided_binomial_p"]),
            "interpretation": "Stride stayed below treadmill-expected values in all later specialized outside-context probes.",
        },
        {
            "test_label": "positive_burden_later_specialized",
            "successes": int(burden_sign[0]["successes"]),
            "trials": int(burden_sign[0]["trials"]),
            "one_sided_binomial_p": float(burden_sign[0]["one_sided_binomial_p"]),
            "interpretation": "Later outside-context probes stayed positive in session-level HR residual burden.",
        },
    ]

    summary_table = [
        {
            "context_label": "later_specialized_outdoor",
            "n_runs": int(subset_map["later_specialized_outdoor"]["n_runs"]),
            "cadence_residual_pct": float(subset_map["later_specialized_outdoor"]["mean_cadence_residual_pct"]),
            "stride_residual_pct": float(subset_map["later_specialized_outdoor"]["mean_stride_residual_pct"]),
            "vertical_ratio_residual_pct": float(subset_map["later_specialized_outdoor"]["mean_vertical_ratio_residual_pct"]),
            "nearest_speed_cadence_diff_spm": float(nearest_map["later_specialized_outdoor"]["mean_cadence_diff_spm"]),
            "nearest_speed_stride_diff_m": float(nearest_map["later_specialized_outdoor"]["mean_stride_diff_m"]),
            "speed_band_cadence_diff_spm": float(speed_band_map["later_specialized_outdoor"]["mean_cadence_diff_spm"]),
            "speed_band_stride_diff_m": float(speed_band_map["later_specialized_outdoor"]["mean_stride_diff_m"]),
            "mean_hr_residual_bpm": float(burden_map["later_specialized_outdoor_probes"]["mean_hr_residual_bpm"]),
            "running_share_28d_activity_pct_mean": "",
        },
        {
            "context_label": "october_2025_outside_cluster",
            "n_runs": len(october_rows),
            "cadence_residual_pct": round(mean(october_cadence), 4),
            "stride_residual_pct": round(mean(october_stride), 4),
            "vertical_ratio_residual_pct": round(mean([float(row["vertical_ratio_residual_pct"]) for row in october_rows]), 4),
            "nearest_speed_cadence_diff_spm": "",
            "nearest_speed_stride_diff_m": "",
            "speed_band_cadence_diff_spm": "",
            "speed_band_stride_diff_m": "",
            "mean_hr_residual_bpm": round(mean(october_hr), 4),
            "running_share_28d_activity_pct_mean": round(mean([float(row["running_share_28d_activity_pct"]) for row in october_rows]), 4),
        },
        {
            "context_label": "april_2026_boundary_probe",
            "n_runs": len(april_rows),
            "cadence_residual_pct": round(mean(april_cadence), 4),
            "stride_residual_pct": round(mean(april_stride), 4),
            "vertical_ratio_residual_pct": round(mean([float(row["vertical_ratio_residual_pct"]) for row in april_rows]), 4),
            "nearest_speed_cadence_diff_spm": "",
            "nearest_speed_stride_diff_m": "",
            "speed_band_cadence_diff_spm": "",
            "speed_band_stride_diff_m": "",
            "mean_hr_residual_bpm": round(mean(april_hr), 4),
            "running_share_28d_activity_pct_mean": round(mean([float(row["running_share_28d_activity_pct"]) for row in april_rows]), 4),
        },
        {
            "context_label": "early_exploratory_outdoor",
            "n_runs": int(subset_map["early_outdoor_exploratory"]["n_runs"]),
            "cadence_residual_pct": float(subset_map["early_outdoor_exploratory"]["mean_cadence_residual_pct"]),
            "stride_residual_pct": float(subset_map["early_outdoor_exploratory"]["mean_stride_residual_pct"]),
            "vertical_ratio_residual_pct": float(subset_map["early_outdoor_exploratory"]["mean_vertical_ratio_residual_pct"]),
            "nearest_speed_cadence_diff_spm": float(nearest_map["early_outdoor_exploratory"]["mean_cadence_diff_spm"]),
            "nearest_speed_stride_diff_m": float(nearest_map["early_outdoor_exploratory"]["mean_stride_diff_m"]),
            "speed_band_cadence_diff_spm": float(speed_band_map["early_outdoor_exploratory"]["mean_cadence_diff_spm"]),
            "speed_band_stride_diff_m": float(speed_band_map["early_outdoor_exploratory"]["mean_stride_diff_m"]),
            "mean_hr_residual_bpm": round(mean([float(row["hr_residual_bpm"]) for row in early_rows]), 4),
            "running_share_28d_activity_pct_mean": round(mean([float(row["running_share_28d_activity_pct"]) for row in early_rows]), 4),
        },
    ]

    scope_claims = [
        {
            "claim_type": "supported",
            "claim": "Cadence behaves like the more preservable running control variable under the current higher-demand proxy.",
        },
        {
            "claim_type": "supported",
            "claim": "Stride behaves like the more sacrificial expression variable under the same probes.",
        },
        {
            "claim_type": "supported",
            "claim": "Cadence preservation alone does not guarantee low internal burden.",
        },
        {
            "claim_type": "supported",
            "claim": "Envelope support appears to modulate whether the same protected cadence pattern remains tolerable or becomes costly.",
        },
        {
            "claim_type": "not_supported",
            "claim": "Cadence alone explains all burden outcomes.",
        },
        {
            "claim_type": "not_supported",
            "claim": "Stride suppression itself is proven to be the causal burden source.",
        },
    ]

    discovered_questions = [
        {
            "question_id": "Q007",
            "status": "ADDRESSED",
            "answered_in": "Study 000G",
            "question": "Is cadence the preserved control variable while stride becomes the sacrificed expression variable?",
        },
        {
            "question_id": "Q017",
            "status": "ADDRESSED",
            "answered_in": "Study 000G",
            "question": "Can cadence remain mechanically preservable while still failing to protect internal burden outside the successful context?",
        },
        {
            "question_id": "Q023",
            "status": "OPEN",
            "answered_in": "",
            "question": "Which envelope-support factors convert a cadence-preserved outside-surface run from high-burden to low-burden expression?",
        },
    ]

    return {
        "study000g_primary_answer": primary_answer,
        "study000g_evidence_family_matrix": evidence_families,
        "study000g_mechanism_bridge": mechanism_bridge,
        "study000g_hypothesis_tests": hypothesis_tests,
        "study000g_summary_table": summary_table,
        "study000g_scope_claims": scope_claims,
        "study000g_discovered_questions": discovered_questions,
    }


def write_outputs(outputs: dict[str, list[dict[str, object]]]) -> None:
    out_dir = PACKAGE_DIR / "outputs"
    for stem, rows in outputs.items():
        write_csv(out_dir / f"{stem}.csv", rows, list(rows[0].keys()) if rows else [])


def build_figures(outputs: dict[str, list[dict[str, object]]]) -> None:
    fig_dir = PACKAGE_DIR / "figures"
    summary_rows = outputs["study000g_summary_table"]
    width = 820
    height = 360
    left = 70
    bottom = 280
    top = 50
    chart_height = bottom - top
    categories = ["later_specialized_outdoor", "october_2025_outside_cluster", "april_2026_boundary_probe", "early_exploratory_outdoor"]
    row_map = {row["context_label"]: row for row in summary_rows}
    max_abs = max(
        abs(float(row["cadence_residual_pct"])) for row in summary_rows
    )
    max_abs = max(max_abs, max(abs(float(row["stride_residual_pct"])) for row in summary_rows))
    max_abs = max(max_abs, 15.0)
    svg = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
        '<rect width="100%" height="100%" fill="#fbfaf6"/>',
        '<text x="28" y="28" font-size="16" font-family="Georgia" fill="#222">Cadence Preservation vs Stride Sacrifice</text>',
        f'<line x1="{left}" y1="{(top+bottom)/2}" x2="{width-30}" y2="{(top+bottom)/2}" stroke="#666" stroke-width="1"/>',
    ]
    bar_w = 28
    gap = 34
    zero_y = (top + bottom) / 2
    scale = (chart_height / 2 - 10) / max_abs
    for idx, label in enumerate(categories):
        row = row_map[label]
        x = left + idx * (bar_w * 2 + gap)
        cad = float(row["cadence_residual_pct"])
        stride = float(row["stride_residual_pct"])
        for offset, value, color in [(0, cad, "#4c78a8"), (bar_w + 6, stride, "#d62728")]:
            h = abs(value) * scale
            y = zero_y - h if value >= 0 else zero_y
            svg.append(f'<rect x="{x+offset}" y="{y:.2f}" width="{bar_w}" height="{h:.2f}" fill="{color}" rx="4"/>')
        svg.append(f'<text x="{x+bar_w}" y="312" font-size="10" font-family="Arial" text-anchor="middle">{escape_xml(label)}</text>')
    svg.extend([
        '<rect x="28" y="42" width="12" height="12" fill="#4c78a8"/><text x="46" y="52" font-size="11" font-family="Arial">Cadence residual</text>',
        '<rect x="28" y="60" width="12" height="12" fill="#d62728"/><text x="46" y="70" font-size="11" font-family="Arial">Stride residual</text>',
        '</svg>',
    ])
    (fig_dir / "figure01_cadence_stride_residuals.svg").write_text("\n".join(svg), encoding="utf-8")

    bridge_rows = outputs["study000g_mechanism_bridge"]
    width = 820
    height = 360
    left = 70
    bottom = 300
    top = 40
    chart_width = width - left - 40
    chart_height = bottom - top
    max_x = max(float(row["cadence_residual_pct"]) for row in bridge_rows) + 2
    max_y = max(float(row["hr_residual_bpm"]) for row in bridge_rows) + 2
    svg = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
        '<rect width="100%" height="100%" fill="#fffdf8"/>',
        '<text x="28" y="28" font-size="16" font-family="Georgia" fill="#222">Cadence Preservation vs HR Burden</text>',
    ]
    for row in bridge_rows:
        x = left + (float(row["cadence_residual_pct"]) / max_x) * chart_width
        y = bottom - (float(row["hr_residual_bpm"]) / max_y) * chart_height
        color = "#d62828" if row["cluster_label"] == "october_2025_outside_cluster" else "#2a9d8f"
        svg.append(f'<circle cx="{x:.2f}" cy="{y:.2f}" r="6" fill="{color}"/>')
        svg.append(f'<text x="{x+8:.2f}" y="{y-4:.2f}" font-size="10" font-family="Arial">{row["calendar_date"]}</text>')
    svg.extend([
        '<rect x="28" y="42" width="12" height="12" fill="#d62828"/><text x="46" y="52" font-size="11" font-family="Arial">October outside-envelope cluster</text>',
        '<rect x="28" y="60" width="12" height="12" fill="#2a9d8f"/><text x="46" y="70" font-size="11" font-family="Arial">April boundary probe</text>',
        '</svg>',
    ])
    (fig_dir / "figure02_cadence_vs_burden.svg").write_text("\n".join(svg), encoding="utf-8")

    width = 860
    height = 340
    left = 70
    bottom = 285
    top = 50
    chart_height = bottom - top
    bridge_sorted = bridge_rows
    max_neighbors = max(int(row["treadmill_neighbors_21d"]) for row in bridge_sorted)
    svg = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
        '<rect width="100%" height="100%" fill="#fbfcff"/>',
        '<text x="28" y="28" font-size="16" font-family="Georgia" fill="#222">Envelope Support and Burden Outcome</text>',
    ]
    bar_w = 85
    gap = 24
    for idx, row in enumerate(bridge_sorted):
        x = left + idx * (bar_w + gap)
        h = 0 if max_neighbors == 0 else (int(row["treadmill_neighbors_21d"]) / max_neighbors) * chart_height
        y = bottom - h
        color = "#e76f51" if row["burden_state"] == "positive_burden" else "#2a9d8f"
        svg.append(f'<rect x="{x}" y="{y:.2f}" width="{bar_w}" height="{h:.2f}" fill="{color}" rx="4"/>')
        svg.append(f'<text x="{x+bar_w/2:.2f}" y="305" font-size="10" font-family="Arial" text-anchor="middle">{row["calendar_date"]}</text>')
    svg.extend([
        '<rect x="28" y="42" width="12" height="12" fill="#e76f51"/><text x="46" y="52" font-size="11" font-family="Arial">Positive burden state</text>',
        '<rect x="28" y="60" width="12" height="12" fill="#2a9d8f"/><text x="46" y="70" font-size="11" font-family="Arial">Low-burden state</text>',
        '</svg>',
    ])
    (fig_dir / "figure03_envelope_support_neighbors.svg").write_text("\n".join(svg), encoding="utf-8")


def build_reports(outputs: dict[str, list[dict[str, object]]]) -> None:
    reports_dir = PACKAGE_DIR / "reports"
    summary_rows = outputs["study000g_summary_table"]
    hypothesis_rows = outputs["study000g_hypothesis_tests"]
    bridge_rows = outputs["study000g_mechanism_bridge"]
    october = [row for row in bridge_rows if row["cluster_label"] == "october_2025_outside_cluster"]
    april = [row for row in bridge_rows if row["cluster_label"] == "april_2026_boundary_probe"]

    summary_text = """# Study 000G Plain-Language Summary

Study `000G` tests whether cadence behaves like the more protected running control variable in this altered-mechanics archive, while stride behaves like the more sacrificial expression variable.

The data supports that model. In the later specialized outside-context probes, cadence stayed above treadmill-expected values in every case while stride stayed below expected values in every case. But cadence preservation did not automatically keep burden low. The October 2025 outside-envelope cluster kept the same protected-cadence pattern while still showing strongly positive HR residual burden. The April 2026 boundary probe showed the same mechanical pattern with much lower burden, which means cadence protection alone is not the whole story. Envelope support appears to matter.
"""
    (reports_dir / "STUDY000G_PLAIN_LANGUAGE_SUMMARY.md").write_text(summary_text, encoding="utf-8")

    results_text = f"""# Study 000G Results

## Primary Answer

`Yes, supported.`

Cadence behaves like the more preservable running control variable, stride behaves like the more sacrificial expression variable, and cadence preservation alone does not fully protect internal burden.

## Key Evidence

- later specialized cadence above expected: `{hypothesis_rows[0]["successes"]}/{hypothesis_rows[0]["trials"]}`, `p = {hypothesis_rows[0]["one_sided_binomial_p"]}`
- later specialized stride below expected: `{hypothesis_rows[1]["successes"]}/{hypothesis_rows[1]["trials"]}`, `p = {hypothesis_rows[1]["one_sided_binomial_p"]}`
- later specialized positive HR residual burden: `{hypothesis_rows[2]["successes"]}/{hypothesis_rows[2]["trials"]}`, `p = {hypothesis_rows[2]["one_sided_binomial_p"]}`

Nearest-speed pairing and speed-band pairing agreed with the directional result:

- nearest-speed later cadence difference: `+16.0 spm`
- nearest-speed later stride difference: `-0.1042 m`
- speed-band later cadence difference: `+15.79 spm`
- speed-band later stride difference: `-0.0976 m`

## Dissociation Between Mechanical And Physiological Protection

The strongest mechanism-level observation is that the same protected cadence pattern did not guarantee the same burden outcome.

- October 2025 outside-envelope cluster:
  - mean cadence residual: `{round(mean([float(row["cadence_residual_pct"]) for row in october]), 2)}%`
  - mean stride residual: `{round(mean([float(row["stride_residual_pct"]) for row in october]), 2)}%`
  - mean HR residual burden: `{round(mean([float(row["hr_residual_bpm"]) for row in october]), 2)} bpm`

- April 2026 boundary probe:
  - cadence residual: `{round(mean([float(row["cadence_residual_pct"]) for row in april]), 2)}%`
  - stride residual: `{round(mean([float(row["stride_residual_pct"]) for row in april]), 2)}%`
  - HR residual burden: `{round(mean([float(row["hr_residual_bpm"]) for row in april]), 2)} bpm`

That means cadence preservation appears sufficient for continued running expression, but not sufficient by itself for complete burden protection.
"""
    (reports_dir / "STUDY000G_RESULTS.md").write_text(results_text, encoding="utf-8")

    methods_text = """# Study 000G Methods

Study `000G` was built as a flagship hypothesis test using already-audited source outputs from:

- `Microstudy B` for preserved-turnover and suppressed-stride evidence
- `Study 000E` for outside-context burden evidence
- `Study 000F` for operating-envelope boundary expression evidence

The study did not merely restate those packages. It integrated them into a mechanism test with five evidence families:

1. directional cadence-versus-stride preservation
2. matched treadmill and speed-band agreement
3. burden persistence despite cadence preservation
4. boundary-exception handling
5. envelope-modulated burden outcome

The strongest direct mechanism rows came from the later specialized outside-context probes and the April 2026 boundary probe.
"""
    (reports_dir / "STUDY000G_METHODS.md").write_text(methods_text, encoding="utf-8")

    discussion_text = """# Study 000G Discussion

The archive now supports a stronger mechanism model than before:

`the system appears to protect rhythm first`

and

`stride becomes the more sacrificial expression variable when stabilization demand rises`

But `000G` also shows why cadence protection is not the final answer. The October 2025 outside-envelope cluster and the April 2026 boundary probe both preserved the same basic cadence-up / stride-down pattern. What changed was burden. That means cadence preservation is mechanically meaningful, but physiological protection depends on something else as well, most likely envelope support or local embedding state.

So the study does not argue that cadence is everything. It argues that cadence is one of the most preservable variables in the archive, and that its preservation helps maintain successful expression without fully determining internal cost.
"""
    (reports_dir / "STUDY000G_DISCUSSION.md").write_text(discussion_text, encoding="utf-8")

    audit_text = """# Study 000G Audit

## Structural Audit

This package is structurally sound.

It includes:

- hypothesis-focused outputs
- figures
- manuscript/report set
- build script
- manifest

## Scientific Audit

Study `000G` asks:

`Is cadence the preserved control variable, and can preserved cadence fail to fully protect internal burden?`

The answer is:

`Yes, supported.`

Why:

- later specialized outside-context probes were directionally unanimous for cadence-up and stride-down
- paired and speed-band methods agreed
- burden remained positive in the later outside-context set
- the April 2026 boundary probe showed the same preserved-cadence pattern without the same burden magnitude

## Boundaries

- This is still an observational mechanism study.
- It does not prove cadence preservation is consciously strategic or biologically causal.
- It does not prove stride suppression is itself the direct burden source.
- It does support cadence protection as one mechanism helping maintain expression inside the broader operating-envelope framework.
"""
    (reports_dir / "STUDY000G_AUDIT.md").write_text(audit_text, encoding="utf-8")

    manuscript_text = """# Study 000G: The Cadence Protection Hypothesis

## Core Question

Is cadence the preserved control variable in this altered-mechanics running archive, and does stride become the more sacrificial expression variable when stabilization demand rises? If so, why does cadence preservation fail to fully protect internal burden?

## Answer

The archive supports that model. Cadence behaved like the more preservable running control variable, stride behaved like the more sacrificial expression variable, and cadence preservation alone did not guarantee low burden.

## Thesis

`Cadence protection appears to be one mechanism that helps maintain successful running expression within the operating envelope, but envelope support still modulates whether that mechanically preserved pattern remains physiologically tolerable.`
"""
    (PACKAGE_DIR / "manuscript" / "STUDY000G_MANUSCRIPT.md").write_text(manuscript_text, encoding="utf-8")


def build_readme() -> None:
    readme = """# STUDY_000G_CADENCE_PROTECTION_HYPOTHESIS_20260610

Flagship mechanism study testing whether cadence behaves like the preserved control variable in the altered-mechanics archive, while stride becomes the more sacrificial expression variable and burden remains incompletely protected.
"""
    (PACKAGE_DIR / "README.md").write_text(readme, encoding="utf-8")


def build_manifest(outputs: dict[str, list[dict[str, object]]]) -> None:
    manifest = {
        "package_name": PACKAGE_NAME,
        "generated_at": dt.datetime.now(dt.timezone.utc).isoformat(),
        "primary_answer": outputs["study000g_primary_answer"][0]["answer"],
        "hypothesis_tests": len(outputs["study000g_hypothesis_tests"]),
        "mechanism_rows": len(outputs["study000g_mechanism_bridge"]),
    }
    (PACKAGE_DIR / "manifest" / "study000g_state.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")


def update_question_log(outputs: dict[str, list[dict[str, object]]]) -> None:
    log_path = Path(r"C:\Study\Program Support\DISCOVERED_PROGRAM_QUESTIONS_20260609.txt")
    if not log_path.exists():
        return
    text = log_path.read_text(encoding="utf-8")
    replacements = {
        "Q007": {
            "Status: OPEN": "Status: ADDRESSED",
            "Answered In:\n": "",
        },
        "Q017": {
            "Status: OPEN": "Status: ADDRESSED",
            "Answered In:\n": "",
        },
    }
    for qid in ["Q007", "Q017"]:
        if qid in text and "Status: OPEN" in text:
            pass
    def replace_block(source: str, qid: str, answer_line: str) -> str:
        if qid not in source:
            return source
        lines = source.splitlines()
        out = []
        i = 0
        while i < len(lines):
            line = lines[i]
            out.append(line)
            if line.strip() == qid:
                j = i + 1
                while j < len(lines) and lines[j].strip():
                    current = lines[j]
                    if current.startswith("Status:"):
                        out.append("Status: ADDRESSED")
                    else:
                        out.append(current)
                    j += 1
                out.append("Answered In:")
                out.append("Study 000G")
                out.append("Answer:")
                out.append(answer_line)
                while j < len(lines) and lines[j].strip():
                    j += 1
                i = j - 1
            i += 1
        return "\n".join(out)

    text = replace_block(
        text,
        "Q007",
        "Yes. Cadence behaved like the more preservable control variable while stride behaved like the more sacrificial expression variable.",
    )
    text = replace_block(
        text,
        "Q017",
        "Yes. Cadence could remain mechanically preserved while burden still rose, which means cadence protection alone did not guarantee low internal cost.",
    )
    if "Q023" not in text:
        text = text.rstrip() + "\n\nQ023\nStatus: OPEN\nBorn From: Study 000G\nQuestion:\nWhich envelope-support factors convert a cadence-preserved outside-surface run from high-burden to low-burden expression?\nWhy It Matters:\nThis is the next strongest child question created by the cadence-protection mechanism test.\nNotes:\nThe October 2025 outside cluster and April 2026 boundary probe shared the same cadence-up / stride-down pattern but differed sharply in burden.\n"
    log_path.write_text(text, encoding="utf-8")


def copy_builder() -> None:
    shutil.copy2(Path(__file__), PACKAGE_DIR / "analysis" / Path(__file__).name)


def build_zip() -> None:
    if ZIP_PATH.exists():
        ZIP_PATH.unlink()
    shutil.make_archive(str(PACKAGE_DIR), "zip", root_dir=PACKAGE_DIR)


def main() -> None:
    if PACKAGE_DIR.exists():
        shutil.rmtree(PACKAGE_DIR)
    ensure_dirs()
    outputs = build_outputs()
    write_outputs(outputs)
    build_figures(outputs)
    build_reports(outputs)
    build_readme()
    build_manifest(outputs)
    update_question_log(outputs)
    copy_builder()
    build_zip()
    print(PACKAGE_DIR)
    print(ZIP_PATH)


if __name__ == "__main__":
    main()
