from __future__ import annotations

import csv
import json
import math
import shutil
from pathlib import Path


ROOT = Path(r"C:\BalancedTrim\analyzer")
STUDY_DIR = ROOT / "STUDY_000D_SELECTIVE_EXPRESSION_VS_NORMALIZATION_20260609"
SOURCE_DIR = Path(r"C:\Study\Studies")


def ensure_dirs() -> None:
    subdirs = [
        "analysis",
        "figures",
        "manifest",
        "manuscript",
        "outputs",
        "reports",
        "source_tables",
    ]
    STUDY_DIR.mkdir(exist_ok=True)
    for subdir in subdirs:
        (STUDY_DIR / subdir).mkdir(exist_ok=True)


def copy_sources() -> dict[str, Path]:
    source_map = {
        "study000a_phase_model.csv": SOURCE_DIR
        / "STUDY_000A_LONGITUDINAL_ADAPTATION_SYNTHESIS_20260608"
        / "outputs"
        / "study000a_phase_model.csv",
        "study000a_speed_gain_decomposition.csv": SOURCE_DIR
        / "STUDY_000A_LONGITUDINAL_ADAPTATION_SYNTHESIS_20260608"
        / "outputs"
        / "study000a_speed_gain_decomposition.csv",
        "study000a_window_change_summary.csv": SOURCE_DIR
        / "STUDY_000A_LONGITUDINAL_ADAPTATION_SYNTHESIS_20260608"
        / "outputs"
        / "study000a_window_change_summary.csv",
        "study000a_yearly_overview.csv": SOURCE_DIR
        / "STUDY_000A_LONGITUDINAL_ADAPTATION_SYNTHESIS_20260608"
        / "outputs"
        / "study000a_yearly_overview.csv",
        "v7_pillar_total_adaptation_ecology_yearly_v1.csv": SOURCE_DIR
        / "STUDY_000A_LONGITUDINAL_ADAPTATION_SYNTHESIS_20260608"
        / "source_tables"
        / "v7_pillar_total_adaptation_ecology_yearly_v1.csv",
        "study000b_window_summary.csv": SOURCE_DIR
        / "STUDY_000B_ADAPTIVE_EFFICIENCY_AND_INTERNAL_COST_20260609"
        / "outputs"
        / "study000b_window_summary.csv",
        "microstudy_b_subset_summary.csv": SOURCE_DIR
        / "STUDY_000B_MICROSTUDY_B_PRESERVED_TURNOVER_SUPPRESSED_STRIDE_20260609"
        / "outputs"
        / "microstudy_b_subset_summary.csv",
        "microstudy_b_sign_tests.csv": SOURCE_DIR
        / "STUDY_000B_MICROSTUDY_B_PRESERVED_TURNOVER_SUPPRESSED_STRIDE_20260609"
        / "outputs"
        / "microstudy_b_sign_tests.csv",
        "microstudy_b_adjacent_metric_summary.csv": SOURCE_DIR
        / "STUDY_000B_MICROSTUDY_B_PRESERVED_TURNOVER_SUPPRESSED_STRIDE_20260609"
        / "outputs"
        / "microstudy_b_adjacent_metric_summary.csv",
        "study000c_corrected_ecology_yearly.csv": SOURCE_DIR
        / "STUDY_000C_FULL_DATA_EXHAUSTIVE_PROGRAM_ANALYSIS_20260609"
        / "outputs"
        / "study000c_corrected_ecology_yearly.csv",
        "study000c_claim_stability_matrix.csv": SOURCE_DIR
        / "STUDY_000C_FULL_DATA_EXHAUSTIVE_PROGRAM_ANALYSIS_20260609"
        / "outputs"
        / "study000c_claim_stability_matrix.csv",
    }

    copied: dict[str, Path] = {}
    for name, src in source_map.items():
        dst = STUDY_DIR / "source_tables" / name
        shutil.copy2(src, dst)
        copied[name] = dst
    return copied


def read_csv(path: Path) -> list[dict[str, str]]:
    with path.open(newline="", encoding="utf-8") as handle:
        return list(csv.DictReader(handle))


def write_csv(path: Path, rows: list[dict[str, object]], fieldnames: list[str]) -> None:
    with path.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)


def pct(num: float) -> str:
    return f"{num:.2f}"


def load_key_rows(sources: dict[str, Path]) -> dict[str, dict[str, str]]:
    phase_rows = read_csv(sources["study000a_phase_model.csv"])
    speed_rows = read_csv(sources["study000a_speed_gain_decomposition.csv"])
    window_rows = read_csv(sources["study000a_window_change_summary.csv"])
    burden_rows = read_csv(sources["study000b_window_summary.csv"])
    micro_subset_rows = read_csv(sources["microstudy_b_subset_summary.csv"])
    sign_rows = read_csv(sources["microstudy_b_sign_tests.csv"])
    adjacent_rows = read_csv(sources["microstudy_b_adjacent_metric_summary.csv"])
    ecology_rows = read_csv(sources["study000c_corrected_ecology_yearly.csv"])
    claim_rows = read_csv(sources["study000c_claim_stability_matrix.csv"])

    return {
        "phase_rows": phase_rows,
        "speed_row": speed_rows[0],
        "window_rows": {row["metric"]: row for row in window_rows},
        "burden_rows": {row["window_label"]: row for row in burden_rows},
        "micro_subset_rows": {row["subset_label"]: row for row in micro_subset_rows},
        "sign_rows": {
            (row["subset_label"], row["direction_test"]): row for row in sign_rows
        },
        "adjacent_rows": {
            (row["subset_label"], row["metric"]): row for row in adjacent_rows
        },
        "ecology_rows": {row["year"]: row for row in ecology_rows},
        "claim_rows": {row["claim_id"]: row for row in claim_rows},
    }


def build_phase_environment_table(phase_rows: list[dict[str, str]]) -> list[dict[str, object]]:
    rows: list[dict[str, object]] = []
    for row in phase_rows:
        treadmill = float(row["treadmill_run_share_pct"])
        outdoor = float(row["outdoor_run_share_pct"])
        rows.append(
            {
                "phase_id": row["phase_id"],
                "phase_label": row["phase_label"],
                "dominant_modality": row["dominant_modality"],
                "mean_running_share_pct": round(float(row["mean_running_share_pct"]), 3),
                "mean_indoor_share_pct": round(float(row["mean_indoor_share_pct"]), 3),
                "treadmill_run_share_pct": treadmill,
                "outdoor_run_share_pct": outdoor,
                "stabilized_minus_outdoor_gap_pct_points": round(treadmill - outdoor, 3),
                "mean_running_hours_28d": round(float(row["mean_running_hours_28d"]), 3),
                "mean_total_activity_hours_28d": round(float(row["mean_total_activity_hours_28d"]), 3),
                "mean_resting_hr": round(float(row["mean_resting_hr"]), 3),
            }
        )
    return rows


def build_yearly_expression_table(ecology_rows: dict[str, dict[str, str]], yearly_rows: list[dict[str, str]]) -> list[dict[str, object]]:
    yearly_lookup = {row["year"]: row for row in yearly_rows}
    out: list[dict[str, object]] = []
    for year in ["2023", "2024", "2025", "2026"]:
        base = yearly_lookup[year]
        eco = ecology_rows[year]
        running_miles = float(base["running_distance_miles"])
        outdoor_miles = float(base["running_outdoor_distance_miles"])
        treadmill_miles = float(base["running_treadmill_distance_miles"])
        outdoor_share = (outdoor_miles / running_miles * 100.0) if running_miles else 0.0
        treadmill_share = (treadmill_miles / running_miles * 100.0) if running_miles else 0.0
        out.append(
            {
                "year": year,
                "running_distance_miles": round(running_miles, 3),
                "running_outdoor_distance_miles": round(outdoor_miles, 3),
                "running_treadmill_distance_miles": round(treadmill_miles, 3),
                "outdoor_running_share_pct": round(outdoor_share, 3),
                "treadmill_running_share_pct": round(treadmill_share, 3),
                "running_share_of_structured_hours": round(float(eco["running_share_of_structured_hours"]), 2),
                "indoor_share_of_structured_hours": round(float(eco["indoor_share_of_structured_hours"]), 2),
                "hybrid_indoor_cardio_count": int(eco["hybrid_indoor_cardio_count"]),
                "hybrid_indoor_cardio_pct_of_indoor_hours": round(float(eco["hybrid_indoor_cardio_pct_of_indoor_hours"]), 2),
            }
        )
    return out


def build_signature_test_tables(data: dict[str, object]) -> tuple[list[dict[str, object]], list[dict[str, object]]]:
    speed_row = data["speed_row"]
    window_rows = data["window_rows"]
    burden_rows = data["burden_rows"]
    micro_subset_rows = data["micro_subset_rows"]
    sign_rows = data["sign_rows"]
    adjacent_rows = data["adjacent_rows"]
    ecology_rows = data["ecology_rows"]

    early_burden = burden_rows["early_qc_window"]
    late_burden = burden_rows["late_qc_window"]
    later_micro = micro_subset_rows["later_specialized_outdoor"]

    evidence_rows = [
        {
            "evidence_id": "SE1",
            "evidence_family": "Ecological Breadth",
            "normalization_signature": "Usable running expression broadens across environments over time.",
            "selective_expression_signature": "Successful running expression becomes more concentrated in selected conditions.",
            "observed_result": "2025 running share of structured hours 21.00% vs 2026 84.18%; 2025 treadmill running share 92.78% vs 2026 98.74%.",
            "aligned_with": "selective_expression",
            "confidence": "high",
        },
        {
            "evidence_id": "SE2",
            "evidence_family": "Mechanical Gain Pathway",
            "normalization_signature": "Speed gain is distributed more evenly across multiple mechanical degrees of freedom.",
            "selective_expression_signature": "Speed gain is built mainly through the most accessible lever.",
            "observed_result": f"Cadence change {pct(float(speed_row['cadence_pct_change']))}% vs stride change {pct(float(speed_row['stride_pct_change']))}%; cadence share of speed gain {pct(float(speed_row['cadence_log_share_pct']))}%.",
            "aligned_with": "selective_expression",
            "confidence": "high",
        },
        {
            "evidence_id": "SE3",
            "evidence_family": "Higher-Demand Probe Behavior",
            "normalization_signature": "Later higher-demand probes preserve stride expression near stabilized-context expectations.",
            "selective_expression_signature": "Later higher-demand probes preserve turnover more successfully than stride expression.",
            "observed_result": f"Later outdoor cadence residual {later_micro['mean_cadence_residual_pct']}%; stride residual {later_micro['mean_stride_residual_pct']}%; cadence above expected 5/5; stride below expected 5/5.",
            "aligned_with": "selective_expression",
            "confidence": "high",
        },
        {
            "evidence_id": "SE4",
            "evidence_family": "Constraint Interpretation",
            "normalization_signature": "Constraint-like signals should dissolve as adaptation matures.",
            "selective_expression_signature": "Constraint-like signals persist and reappear across context shifts.",
            "observed_result": "Vertical ratio remained the lowest-variability mechanics signal in 000A and was above expected in all 8 outdoor QC-pass runs and 5/5 later specialized outdoor runs.",
            "aligned_with": "selective_expression",
            "confidence": "moderate_high",
        },
        {
            "evidence_id": "SE5",
            "evidence_family": "Internal Cost",
            "normalization_signature": "As specialization matures, unexplained session burden disappears.",
            "selective_expression_signature": "Efficiency improves, but burden does not disappear and can persist under higher-demand probes.",
            "observed_result": f"Speed-per-HR rose from {early_burden['speed_per_hr_mean']} to {late_burden['speed_per_hr_mean']}; HR residual shifted from {early_burden['hr_residual_bpm_mean']} bpm to {late_burden['hr_residual_bpm_mean']} bpm; later outdoor mean HR residual {later_micro['mean_hr_residual_pct']}%.",
            "aligned_with": "selective_expression",
            "confidence": "moderate_high",
        },
        {
            "evidence_id": "SE6",
            "evidence_family": "Ecology Correction",
            "normalization_signature": "Correction of hidden context should weaken the selective-expression model.",
            "selective_expression_signature": "Correction may refine ecology wording without overturning the core model.",
            "observed_result": f"2025 hybrid indoor-cardio hours were {ecology_rows['2025']['hybrid_indoor_cardio_hours']} and the early flagship window contained 7 hybrid sessions, but 000C retained phase structure, turnover-led gain, conserved mechanics, and mixed burden.",
            "aligned_with": "selective_expression",
            "confidence": "moderate_high",
        },
    ]

    decision_rows = [
        {
            "question_axis": "Did later adaptation broaden environmental expression?",
            "answer": "No",
            "key_evidence": "Running specialization intensified while treadmill running share rose and outdoor running share fell.",
            "supports": "selective_expression",
        },
        {
            "question_axis": "Did later speed gain distribute evenly across cadence and stride?",
            "answer": "No",
            "key_evidence": "Cadence carried 62.42% of the cadence-stride speed gain.",
            "supports": "selective_expression",
        },
        {
            "question_axis": "Did later higher-demand probes preserve stride expression?",
            "answer": "No",
            "key_evidence": "Later outdoor cadence remained above expected while stride remained below expected in 5/5 runs.",
            "supports": "selective_expression",
        },
        {
            "question_axis": "Did later adaptation erase session-level burden?",
            "answer": "No",
            "key_evidence": "Late window HR residuals shifted upward and later outdoor probes remained positive.",
            "supports": "selective_expression",
        },
        {
            "question_axis": "Did full-data correction overturn the main model?",
            "answer": "No",
            "key_evidence": "000C refined ecology wording but retained the main running conclusions.",
            "supports": "selective_expression",
        },
    ]

    later_gct = adjacent_rows[("later_specialized_outdoor", "ground_contact_time_ms")]
    later_vo = adjacent_rows[("later_specialized_outdoor", "vertical_oscillation_cm")]
    decision_rows.append(
        {
            "question_axis": "Did vertical-ratio rise reflect simple extra bounce?",
            "answer": "No",
            "key_evidence": f"Later outdoor GCT residual {later_gct['mean_residual_value']} and vertical-oscillation residual {later_vo['mean_residual_value']} were both below expected in 5/5 runs.",
            "supports": "selective_expression",
        }
    )

    return evidence_rows, decision_rows


def build_primary_conclusion(data: dict[str, object]) -> list[dict[str, object]]:
    speed_row = data["speed_row"]
    window_rows = data["window_rows"]
    burden_rows = data["burden_rows"]
    micro_subset_rows = data["micro_subset_rows"]
    ecology_rows = data["ecology_rows"]
    late_burden = burden_rows["late_qc_window"]
    later_micro = micro_subset_rows["later_specialized_outdoor"]

    return [
        {
            "study_id": "000D",
            "core_question": "Did the six-year system adapt through broad normalization or through selective expression?",
            "dataset_answer": "Selective expression",
            "summary_claim": "The full program aligns more strongly with selective expression than with broad normalization.",
            "ecology_anchor": f"2025 running share {ecology_rows['2025']['running_share_of_structured_hours']}% -> 2026 {ecology_rows['2026']['running_share_of_structured_hours']}%; treadmill running share 92.78% -> 98.74%.",
            "mechanics_anchor": f"Cadence carried {pct(float(speed_row['cadence_log_share_pct']))}% of cadence-stride speed gain; later outdoor cadence residual {later_micro['mean_cadence_residual_pct']}% and stride residual {later_micro['mean_stride_residual_pct']}%.",
            "burden_anchor": f"Late speed-per-HR {late_burden['speed_per_hr_mean']}; late HR residual {late_burden['hr_residual_bpm_mean']} bpm; later outdoor mean HR residual {later_micro['mean_hr_residual_pct']}%.",
            "full_data_anchor": "000C retained the main model after correcting hidden hybrid-cardio context.",
        }
    ]


def build_scope_claims() -> list[dict[str, object]]:
    return [
        {
            "claim_id": "D1",
            "claim_label": "Selective Expression Preferred",
            "question": "Does the corrected full program align more strongly with selective expression than with broad normalization?",
            "answer": "Yes",
            "status": "answered",
        },
        {
            "claim_id": "D2",
            "claim_label": "Environmental Narrowing",
            "question": "Did later successful running expression become more concentrated in stabilized context rather than broader in outdoor tolerance?",
            "answer": "Yes",
            "status": "answered",
        },
        {
            "claim_id": "D3",
            "claim_label": "Accessible Lever",
            "question": "Did later running gain rely more on cadence than on stride expansion?",
            "answer": "Yes",
            "status": "answered",
        },
        {
            "claim_id": "D4",
            "claim_label": "Persistent Constraint Under Higher Demand",
            "question": "Did higher-demand probes preserve turnover more readily than stride expression?",
            "answer": "Yes",
            "status": "answered",
        },
        {
            "claim_id": "D5",
            "claim_label": "Burden Not Erased",
            "question": "Did later adaptation erase unexplained session-level burden?",
            "answer": "No",
            "status": "answered",
        },
        {
            "claim_id": "D6",
            "claim_label": "Normative Comparison",
            "question": "Can this study prove how a normative comparison system would adapt under the same conditions?",
            "answer": "No",
            "status": "not_answered",
        },
    ]


def write_svg_bar_chart(path: Path, rows: list[dict[str, object]]) -> None:
    width = 920
    height = 420
    margin_left = 70
    margin_bottom = 60
    plot_width = 780
    plot_height = 280
    max_pct = 100.0
    bar_width = 28
    gap = 18
    phase_gap = 36
    x = margin_left
    svg_parts = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">',
        '<rect width="100%" height="100%" fill="white"/>',
        f'<text x="{width/2}" y="28" text-anchor="middle" font-family="Arial" font-size="18">Phase Running Share and Running Surface Concentration</text>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left+plot_width}" y2="{height-margin_bottom}" stroke="black"/>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left}" y2="{height-margin_bottom-plot_height}" stroke="black"/>',
    ]
    for tick in range(0, 101, 20):
        y = height - margin_bottom - (tick / max_pct) * plot_height
        svg_parts.append(f'<line x1="{margin_left-5}" y1="{y}" x2="{margin_left+plot_width}" y2="{y}" stroke="#dddddd"/>')
        svg_parts.append(f'<text x="{margin_left-10}" y="{y+4}" text-anchor="end" font-family="Arial" font-size="11">{tick}</text>')

    colors = {
        "running": "#1f77b4",
        "treadmill": "#2ca02c",
        "outdoor": "#d62728",
    }
    for row in rows:
        for label, key in [
            ("running", "mean_running_share_pct"),
            ("treadmill", "treadmill_run_share_pct"),
            ("outdoor", "outdoor_run_share_pct"),
        ]:
            value = float(row[key])
            bar_height = (value / max_pct) * plot_height
            y = height - margin_bottom - bar_height
            svg_parts.append(
                f'<rect x="{x}" y="{y}" width="{bar_width}" height="{bar_height}" fill="{colors[label]}"/>'
            )
            x += bar_width + gap
        center = x - (3 * (bar_width + gap) - gap) / 2 - gap / 2
        svg_parts.append(
            f'<text x="{center}" y="{height-margin_bottom+20}" text-anchor="middle" font-family="Arial" font-size="11">{row["phase_id"]}</text>'
        )
        x += phase_gap
    legend_y = height - 18
    legend_x = margin_left + 120
    for label, color in [("Running share", colors["running"]), ("Treadmill share", colors["treadmill"]), ("Outdoor share", colors["outdoor"])]:
        svg_parts.append(f'<rect x="{legend_x}" y="{legend_y-10}" width="12" height="12" fill="{color}"/>')
        svg_parts.append(f'<text x="{legend_x+18}" y="{legend_y}" font-family="Arial" font-size="12">{label}</text>')
        legend_x += 180
    svg_parts.append("</svg>")
    path.write_text("\n".join(svg_parts), encoding="utf-8")


def write_svg_signature_chart(path: Path, rows: list[dict[str, object]]) -> None:
    width = 980
    row_height = 44
    header_height = 50
    height = header_height + row_height * len(rows) + 30
    svg_parts = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">',
        '<rect width="100%" height="100%" fill="white"/>',
        '<text x="490" y="28" text-anchor="middle" font-family="Arial" font-size="18">Selective Expression Evidence Matrix</text>',
    ]
    headers = [("Evidence Family", 20), ("Observed Result", 230), ("Aligned With", 820)]
    for text, x in headers:
        svg_parts.append(f'<text x="{x}" y="52" font-family="Arial" font-size="12" font-weight="bold">{text}</text>')

    y = 70
    for row in rows:
        svg_parts.append(f'<line x1="20" y1="{y-14}" x2="960" y2="{y-14}" stroke="#dddddd"/>')
        svg_parts.append(f'<text x="20" y="{y}" font-family="Arial" font-size="12">{row["evidence_family"]}</text>')
        observed = row["observed_result"]
        observed_lines = split_text(observed, 70)
        oy = y
        for line in observed_lines:
            svg_parts.append(f'<text x="230" y="{oy}" font-family="Arial" font-size="11">{escape_xml(line)}</text>')
            oy += 13
        fill = "#d9ead3" if row["aligned_with"] == "selective_expression" else "#f4cccc"
        svg_parts.append(f'<rect x="820" y="{y-12}" width="120" height="18" rx="4" fill="{fill}" stroke="#999999"/>')
        svg_parts.append(f'<text x="880" y="{y+1}" text-anchor="middle" font-family="Arial" font-size="11">{row["aligned_with"]}</text>')
        y += row_height
    svg_parts.append("</svg>")
    path.write_text("\n".join(svg_parts), encoding="utf-8")


def write_svg_yearly_surface_chart(path: Path, rows: list[dict[str, object]]) -> None:
    width = 920
    height = 420
    margin_left = 90
    margin_bottom = 70
    plot_width = 720
    plot_height = 240
    bar_width = 110
    gap = 55
    colors = {
        "treadmill": "#2ca02c",
        "outdoor": "#d62728",
    }
    svg_parts = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">',
        '<rect width="100%" height="100%" fill="white"/>',
        f'<text x="{width/2}" y="28" text-anchor="middle" font-family="Arial" font-size="18">Yearly Running Surface Share Shows Narrowed Expression</text>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left+plot_width}" y2="{height-margin_bottom}" stroke="black"/>',
        f'<line x1="{margin_left}" y1="{height-margin_bottom}" x2="{margin_left}" y2="{height-margin_bottom-plot_height}" stroke="black"/>',
    ]
    for tick in range(0, 101, 20):
        y = height - margin_bottom - (tick / 100.0) * plot_height
        svg_parts.append(f'<line x1="{margin_left-5}" y1="{y}" x2="{margin_left+plot_width}" y2="{y}" stroke="#dddddd"/>')
        svg_parts.append(f'<text x="{margin_left-10}" y="{y+4}" text-anchor="end" font-family="Arial" font-size="11">{tick}%</text>')

    x = margin_left + 40
    for row in rows:
        treadmill = float(row["treadmill_running_share_pct"])
        outdoor = float(row["outdoor_running_share_pct"])
        treadmill_h = (treadmill / 100.0) * plot_height
        outdoor_h = (outdoor / 100.0) * plot_height
        base_y = height - margin_bottom
        svg_parts.append(
            f'<rect x="{x}" y="{base_y - treadmill_h}" width="{bar_width}" height="{treadmill_h}" fill="{colors["treadmill"]}"/>'
        )
        svg_parts.append(
            f'<rect x="{x}" y="{base_y - treadmill_h - outdoor_h}" width="{bar_width}" height="{outdoor_h}" fill="{colors["outdoor"]}"/>'
        )
        center = x + bar_width / 2
        svg_parts.append(
            f'<text x="{center}" y="{height-margin_bottom+20}" text-anchor="middle" font-family="Arial" font-size="11">{row["year"]}</text>'
        )
        svg_parts.append(
            f'<text x="{center}" y="{base_y - treadmill_h + 16}" text-anchor="middle" font-family="Arial" font-size="11" fill="white">{treadmill:.1f}%</text>'
        )
        if outdoor_h > 18:
            svg_parts.append(
                f'<text x="{center}" y="{base_y - treadmill_h - 4}" text-anchor="middle" font-family="Arial" font-size="10" fill="white">{outdoor:.1f}%</text>'
            )
        else:
            svg_parts.append(
                f'<text x="{center}" y="{base_y - treadmill_h - outdoor_h - 6}" text-anchor="middle" font-family="Arial" font-size="10">{outdoor:.1f}%</text>'
            )
        x += bar_width + gap

    legend_y = height - 20
    legend_x = margin_left + 180
    for label, color in [("Treadmill running share", colors["treadmill"]), ("Outdoor running share", colors["outdoor"])]:
        svg_parts.append(f'<rect x="{legend_x}" y="{legend_y-10}" width="12" height="12" fill="{color}"/>')
        svg_parts.append(f'<text x="{legend_x+18}" y="{legend_y}" font-family="Arial" font-size="12">{label}</text>')
        legend_x += 240
    svg_parts.append("</svg>")
    path.write_text("\n".join(svg_parts), encoding="utf-8")


def split_text(text: str, max_chars: int) -> list[str]:
    words = text.split()
    lines: list[str] = []
    current: list[str] = []
    length = 0
    for word in words:
        add_len = len(word) + (1 if current else 0)
        if length + add_len > max_chars:
            lines.append(" ".join(current))
            current = [word]
            length = len(word)
        else:
            current.append(word)
            length += add_len
    if current:
        lines.append(" ".join(current))
    return lines


def escape_xml(text: str) -> str:
    return (
        text.replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace('"', "&quot;")
    )


def write_manifest(copied: dict[str, Path]) -> None:
    manifest = {
        "study_id": "000D",
        "study_slug": "selective_expression_vs_normalization",
        "build_date": "2026-06-09",
        "package_root": ".",
        "source_root": "source_tables",
        "source_files": {name: str(Path("source_tables") / name) for name in sorted(copied)},
    }
    (STUDY_DIR / "manifest" / "study000d_project_state.json").write_text(
        json.dumps(manifest, indent=2), encoding="utf-8"
    )


def write_readme() -> None:
    text = """Study 000D: Selective Expression Versus Normalization

This package tests a single integrated program question:

Did the six-year altered-mechanics system adapt through broad normalization, or through selective expression?

The package is built from the existing flagship studies, the supporting microstudy, and the full-data corrective audit. It does not replace those packages. Instead, it asks whether the corrected program aligns more strongly with a normalization signature or with a selective-expression signature.
"""
    (STUDY_DIR / "README.md").write_text(text, encoding="utf-8")


def main() -> None:
    ensure_dirs()
    copied = copy_sources()
    data = load_key_rows(copied)

    phase_table = build_phase_environment_table(data["phase_rows"])
    yearly_rows = read_csv(copied["v7_pillar_total_adaptation_ecology_yearly_v1.csv"])
    yearly_expression = build_yearly_expression_table(data["ecology_rows"], yearly_rows)
    evidence_rows, decision_rows = build_signature_test_tables(data)
    conclusion_rows = build_primary_conclusion(data)
    scope_rows = build_scope_claims()

    write_csv(
        STUDY_DIR / "outputs" / "study000d_phase_environment_table.csv",
        phase_table,
        [
            "phase_id",
            "phase_label",
            "dominant_modality",
            "mean_running_share_pct",
            "mean_indoor_share_pct",
            "treadmill_run_share_pct",
            "outdoor_run_share_pct",
            "stabilized_minus_outdoor_gap_pct_points",
            "mean_running_hours_28d",
            "mean_total_activity_hours_28d",
            "mean_resting_hr",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000d_yearly_expression_table.csv",
        yearly_expression,
        [
            "year",
            "running_distance_miles",
            "running_outdoor_distance_miles",
            "running_treadmill_distance_miles",
            "outdoor_running_share_pct",
            "treadmill_running_share_pct",
            "running_share_of_structured_hours",
            "indoor_share_of_structured_hours",
            "hybrid_indoor_cardio_count",
            "hybrid_indoor_cardio_pct_of_indoor_hours",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000d_signature_evidence_matrix.csv",
        evidence_rows,
        [
            "evidence_id",
            "evidence_family",
            "normalization_signature",
            "selective_expression_signature",
            "observed_result",
            "aligned_with",
            "confidence",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000d_decision_table.csv",
        decision_rows,
        ["question_axis", "answer", "key_evidence", "supports"],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000d_primary_conclusion.csv",
        conclusion_rows,
        [
            "study_id",
            "core_question",
            "dataset_answer",
            "summary_claim",
            "ecology_anchor",
            "mechanics_anchor",
            "burden_anchor",
            "full_data_anchor",
        ],
    )
    write_csv(
        STUDY_DIR / "outputs" / "study000d_scope_claims.csv",
        scope_rows,
        ["claim_id", "claim_label", "question", "answer", "status"],
    )

    write_svg_bar_chart(STUDY_DIR / "figures" / "figure01_phase_environment.svg", phase_table)
    write_svg_signature_chart(
        STUDY_DIR / "figures" / "figure02_signature_evidence_matrix.svg", evidence_rows
    )
    write_svg_yearly_surface_chart(
        STUDY_DIR / "figures" / "figure03_yearly_running_surface_share.svg",
        yearly_expression,
    )

    shutil.copy2(Path(__file__), STUDY_DIR / "analysis" / Path(__file__).name)
    write_manifest(copied)
    write_readme()


if __name__ == "__main__":
    main()
