from __future__ import annotations

import csv
import datetime as dt
import json
import shutil
from pathlib import Path


ROOT = Path(r"C:\BalancedTrim\analyzer")
PACKAGE_NAME = "STUDY_000H_ENVELOPE_SUPPORT_FACTORS_20260610"
PACKAGE_DIR = ROOT / PACKAGE_NAME
ZIP_PATH = ROOT / f"{PACKAGE_NAME}.zip"

STUDY_F_OUTPUTS = Path(
    r"C:\Study\Studies\STUDY_000F_SUCCESSFUL_OPERATING_ENVELOPE_HYPOTHESIS_20260609\outputs"
)
STUDY_G_OUTPUTS = Path(
    r"C:\Study\Studies\STUDY_000G_CADENCE_PROTECTION_HYPOTHESIS_20260610\outputs"
)
STUDY_E_OUTPUTS = Path(
    r"C:\Study\Studies\STUDY_000E_BURDEN_OUTSIDE_SUCCESSFUL_CONTEXT_20260609\outputs"
)


def ensure_dirs() -> None:
    for subdir in ["analysis", "appendices", "figures", "manifest", "manuscript", "outputs", "reports", "source_tables"]:
        (PACKAGE_DIR / subdir).mkdir(parents=True, exist_ok=True)


def read_csv(path: Path) -> list[dict[str, str]]:
    with path.open(newline="", encoding="utf-8") as handle:
        return list(csv.DictReader(handle))


def write_csv(path: Path, rows: list[dict[str, object]], fieldnames: list[str]) -> None:
    with path.open("w", newline="", encoding="utf-8") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)


def mean(values: list[float]) -> float:
    return sum(values) / len(values) if values else 0.0


def round_or_blank(value: float | None, digits: int = 4) -> float | str:
    if value is None:
        return ""
    return round(value, digits)


def escape_xml(text: str) -> str:
    return (
        str(text)
        .replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace('"', "&quot;")
    )


def build_outputs() -> dict[str, list[dict[str, object]]]:
    boundary = read_csv(STUDY_F_OUTPUTS / "study000f_boundary_expression_table.csv")
    mech = read_csv(STUDY_G_OUTPUTS / "study000g_mechanism_bridge.csv")
    burden = read_csv(STUDY_E_OUTPUTS / "study000e_outdoor_probe_details.csv")

    october = [row for row in boundary if row["cluster_label"] == "october_2025_outside_cluster"]
    april = [row for row in boundary if row["cluster_label"] == "april_2026_boundary_probe"]
    cadence_protected = [row for row in mech if row["mechanical_pattern"] == "cadence_preserved_stride_suppressed"]
    burden_map = {row["calendar_date"]: row for row in burden}

    october_mean = {
        "distance_miles_norm": mean([float(row["distance_miles_norm"]) for row in october]),
        "running_share_28d_activity_pct": mean([float(row["running_share_28d_activity_pct"]) for row in october]),
        "treadmill_neighbors_21d": mean([float(row["treadmill_neighbors_21d"]) for row in october]),
        "total_activity_hours_28d_rebuilt": mean([float(row["total_activity_hours_28d_rebuilt"]) for row in october]),
        "speed_per_hr": mean([float(row["speed_per_hr"]) for row in october]),
        "hr_residual_bpm": mean([float(row["hr_residual_bpm"]) for row in october]),
        "cadence_residual_pct": mean([float(row["cadence_residual_pct"]) for row in october]),
        "stride_residual_pct": mean([float(row["stride_residual_pct"]) for row in october]),
        "next_day_resting_hr": mean([float(row["next_day_resting_hr"]) for row in october if row["next_day_resting_hr"] != ""]),
        "next_day_sleep_score": mean([float(row["next_day_sleep_score"]) for row in october if row["next_day_sleep_score"] != ""]),
        "next_day_hrv": mean([float(row["next_day_hrv"]) for row in october if row["next_day_hrv"] != ""]),
    }
    april_mean = {
        "distance_miles_norm": mean([float(row["distance_miles_norm"]) for row in april]),
        "running_share_28d_activity_pct": mean([float(row["running_share_28d_activity_pct"]) for row in april]),
        "treadmill_neighbors_21d": mean([float(row["treadmill_neighbors_21d"]) for row in april]),
        "total_activity_hours_28d_rebuilt": mean([float(row["total_activity_hours_28d_rebuilt"]) for row in april]),
        "speed_per_hr": mean([float(row["speed_per_hr"]) for row in april]),
        "hr_residual_bpm": mean([float(row["hr_residual_bpm"]) for row in april]),
        "cadence_residual_pct": mean([float(row["cadence_residual_pct"]) for row in april]),
        "stride_residual_pct": mean([float(row["stride_residual_pct"]) for row in april]),
        "next_day_resting_hr": mean([float(row["next_day_resting_hr"]) for row in april if row["next_day_resting_hr"] != ""]),
        "next_day_sleep_score": mean([float(row["next_day_sleep_score"]) for row in april if row["next_day_sleep_score"] != ""]),
        "next_day_hrv": mean([float(row["next_day_hrv"]) for row in april if row["next_day_hrv"] != ""]),
    }

    primary_answer = [
        {
            "question_id": "Q023",
            "study_id": "000H",
            "core_question": "What envelope-support factors seem to determine whether a cadence-preserved outside-surface run becomes high-burden or low-burden?",
            "answer": "high_local_embedding_and_stabilized_continuity_strongest_supported_candidates",
            "answer_plain": "The strongest supported support factors are high local running embedding and stabilized-context continuity. Longer bout structure appears to be a plausible secondary contributor, but is less isolated than embedding in the current data.",
        }
    ]

    candidate_factor_table = [
        {
            "candidate_factor": "local_running_share_28d_activity_pct",
            "october_2025_mean": round(october_mean["running_share_28d_activity_pct"], 4),
            "april_2026_value": round(april_mean["running_share_28d_activity_pct"], 4),
            "april_to_october_ratio": round(april_mean["running_share_28d_activity_pct"] / october_mean["running_share_28d_activity_pct"], 4),
            "support_strength": "strong",
            "why": "The low-burden cadence-protected boundary probe was deeply embedded in a running-specialized context, unlike the October cluster.",
        },
        {
            "candidate_factor": "treadmill_neighbors_21d",
            "october_2025_mean": round(october_mean["treadmill_neighbors_21d"], 4),
            "april_2026_value": round(april_mean["treadmill_neighbors_21d"], 4),
            "april_to_october_ratio": round(april_mean["treadmill_neighbors_21d"] / october_mean["treadmill_neighbors_21d"], 4),
            "support_strength": "strong",
            "why": "Dense stabilized-context continuity strongly separated the low-burden April probe from the high-burden October cluster.",
        },
        {
            "candidate_factor": "distance_miles_norm",
            "october_2025_mean": round(october_mean["distance_miles_norm"], 4),
            "april_2026_value": round(april_mean["distance_miles_norm"], 4),
            "april_to_october_ratio": round(april_mean["distance_miles_norm"] / october_mean["distance_miles_norm"], 4),
            "support_strength": "moderate",
            "why": "The low-burden cadence-protected probe was also much longer than the October probes, suggesting bout structure may matter, but it is not cleanly isolated from embedding.",
        },
        {
            "candidate_factor": "total_activity_hours_28d_rebuilt",
            "october_2025_mean": round(october_mean["total_activity_hours_28d_rebuilt"], 4),
            "april_2026_value": round(april_mean["total_activity_hours_28d_rebuilt"], 4),
            "april_to_october_ratio": round(april_mean["total_activity_hours_28d_rebuilt"] / october_mean["total_activity_hours_28d_rebuilt"], 4),
            "support_strength": "weak",
            "why": "Total recent activity volume was not dramatically different and does not organize the burden split as cleanly as embedding markers.",
        },
        {
            "candidate_factor": "speed_per_hr",
            "october_2025_mean": round(october_mean["speed_per_hr"], 6),
            "april_2026_value": round(april_mean["speed_per_hr"], 6),
            "april_to_october_ratio": round(april_mean["speed_per_hr"] / october_mean["speed_per_hr"], 4),
            "support_strength": "weak_to_moderate",
            "why": "The April probe was more efficient, but this looks more like a downstream expression signature than a primary support factor.",
        },
    ]

    support_case_table = []
    for row in cadence_protected:
        burden_row = burden_map.get(row["calendar_date"], {})
        support_case_table.append(
            {
                "calendar_date": row["calendar_date"],
                "cluster_label": row["cluster_label"],
                "burden_state": row["burden_state"],
                "cadence_residual_pct": round(float(row["cadence_residual_pct"]), 4),
                "stride_residual_pct": round(float(row["stride_residual_pct"]), 4),
                "hr_residual_bpm": round(float(row["hr_residual_bpm"]), 4),
                "running_share_28d_activity_pct": round(float(row["running_share_28d_activity_pct"]), 4),
                "treadmill_neighbors_21d": int(float(row["treadmill_neighbors_21d"] or 0)),
                "distance_miles_norm": round(float(burden_row.get("distance_miles_norm") or 0.0), 4),
                "matched_treadmill_count": int(float(burden_row.get("matched_treadmill_count") or 0)),
                "outdoor_minus_matched_treadmill_hr_bpm": round(float(burden_row.get("outdoor_minus_matched_treadmill_hr_bpm") or 0.0), 4),
                "next_day_resting_hr": burden_row.get("next_day_resting_hr", ""),
                "next_day_sleep_score": burden_row.get("next_day_sleep_score", ""),
                "next_day_hrv": burden_row.get("next_day_hrv", ""),
            }
        )

    evidence_family_matrix = [
        {
            "family_id": "H1",
            "family_label": "embedding_contrast",
            "observation": "The low-burden cadence-protected April probe had dramatically higher running-share embedding and treadmill-neighbor density than the October cluster.",
            "key_values": "running-share 91.08% vs 5.13%; treadmill neighbors 32 vs 1.75",
            "support_strength": "strong",
        },
        {
            "family_id": "H2",
            "family_label": "mechanics_constant_burden_variable",
            "observation": "Cadence-up and stride-down mechanics were similar across October and April, but burden was not.",
            "key_values": "October cadence +11.38%, stride -10.91%, HR residual +13.88 bpm; April cadence +10.22%, stride -9.28%, HR residual +0.69 bpm",
            "support_strength": "strong",
        },
        {
            "family_id": "H3",
            "family_label": "bout_length_candidate",
            "observation": "The low-burden cadence-protected case was substantially longer than the October cluster runs.",
            "key_values": "5.45 miles vs 2.09-mile October mean",
            "support_strength": "moderate",
        },
        {
            "family_id": "H4",
            "family_label": "total_activity_volume_nonseparator",
            "observation": "Total recent activity volume did not separate the states nearly as well as embedding markers.",
            "key_values": "17.16 vs 20.29 hours over 28 days",
            "support_strength": "weak",
        },
        {
            "family_id": "H5",
            "family_label": "recovery_context_favorable_but_posthoc",
            "observation": "Next-day recovery markers favored the April low-burden case, but they remain aftermath signals rather than primary pre-run support factors.",
            "key_values": "resting HR 40 vs 47, sleep 71 vs 64.25, HRV 69 vs 61.75",
            "support_strength": "moderate_secondary",
        },
    ]

    hypothesis_tests = [
        {
            "test_label": "embedding_markers_best_separate_burden_states",
            "result": "supported",
            "interpretation": "Local running-share context and treadmill-neighbor density separate low-burden from high-burden cadence-protected cases far more strongly than total recent activity volume.",
        },
        {
            "test_label": "cadence_protection_not_sufficient_by_itself",
            "result": "supported",
            "interpretation": "The same cadence-protected mechanical pattern can coexist with either high-burden or low-burden outcomes depending on support context.",
        },
        {
            "test_label": "bout_structure_plausible_secondary_factor",
            "result": "supported_but_not_isolated",
            "interpretation": "Longer bout structure is a plausible contributor to low-burden expression, but current data does not cleanly separate it from embedding support.",
        },
    ]

    scope_claims = [
        {
            "claim_type": "supported",
            "claim": "High local running embedding and stabilized-context continuity are the strongest supported envelope-support factors in the current dataset.",
        },
        {
            "claim_type": "supported",
            "claim": "Cadence protection can succeed mechanically while failing physiologically if envelope support is weak.",
        },
        {
            "claim_type": "supported",
            "claim": "Bout length is a plausible secondary support factor but is not isolated cleanly enough to outrank embedding support.",
        },
        {
            "claim_type": "not_supported",
            "claim": "A single support factor alone fully explains low-burden envelope success.",
        },
        {
            "claim_type": "not_supported",
            "claim": "Total recent activity volume is the main envelope-support driver.",
        },
    ]

    discovered_questions = [
        {
            "question_id": "Q023",
            "status": "ADDRESSED",
            "answered_in": "Study 000H",
            "question": "Which envelope-support factors convert a cadence-preserved outside-surface run from high-burden to low-burden expression?",
        },
        {
            "question_id": "Q024",
            "status": "OPEN",
            "answered_in": "",
            "question": "When embedding support and bout length diverge, which factor carries more weight in preserving low-burden expression?",
        },
    ]

    return {
        "study000h_primary_answer": primary_answer,
        "study000h_candidate_factor_table": candidate_factor_table,
        "study000h_support_case_table": support_case_table,
        "study000h_evidence_family_matrix": evidence_family_matrix,
        "study000h_hypothesis_tests": hypothesis_tests,
        "study000h_scope_claims": scope_claims,
        "study000h_discovered_questions": discovered_questions,
    }


def write_outputs(outputs: dict[str, list[dict[str, object]]]) -> None:
    out_dir = PACKAGE_DIR / "outputs"
    for stem, rows in outputs.items():
        write_csv(out_dir / f"{stem}.csv", rows, list(rows[0].keys()) if rows else [])


def build_figures(outputs: dict[str, list[dict[str, object]]]) -> None:
    fig_dir = PACKAGE_DIR / "figures"
    case_rows = outputs["study000h_support_case_table"]
    width = 920
    height = 360
    left = 80
    bottom = 290
    top = 50
    chart_width = width - left - 40
    chart_height = bottom - top
    max_x = max(float(row["running_share_28d_activity_pct"]) for row in case_rows) + 5
    max_y = max(float(row["hr_residual_bpm"]) for row in case_rows) + 3
    svg = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
        '<rect width="100%" height="100%" fill="#fbfaf6"/>',
        '<text x="28" y="28" font-size="16" font-family="Georgia" fill="#222">Embedding Support vs HR Burden</text>',
    ]
    for row in case_rows:
        x = left + (float(row["running_share_28d_activity_pct"]) / max_x) * chart_width
        y = bottom - (float(row["hr_residual_bpm"]) / max_y) * chart_height
        color = "#d62828" if row["burden_state"] == "positive_burden" else "#2a9d8f"
        svg.append(f'<circle cx="{x:.2f}" cy="{y:.2f}" r="6" fill="{color}"/>')
        svg.append(f'<text x="{x+8:.2f}" y="{y-4:.2f}" font-size="10" font-family="Arial">{row["calendar_date"]}</text>')
    svg.extend([
        '<rect x="28" y="42" width="12" height="12" fill="#d62828"/><text x="46" y="52" font-size="11" font-family="Arial">Positive-burden cadence-protected runs</text>',
        '<rect x="28" y="60" width="12" height="12" fill="#2a9d8f"/><text x="46" y="70" font-size="11" font-family="Arial">Low-burden cadence-protected run</text>',
        '</svg>',
    ])
    (fig_dir / "figure01_embedding_vs_burden.svg").write_text("\n".join(svg), encoding="utf-8")

    width = 900
    height = 360
    left = 70
    bottom = 290
    top = 45
    chart_height = bottom - top
    rows = outputs["study000h_candidate_factor_table"]
    max_ratio = max(float(row["april_to_october_ratio"]) for row in rows)
    bar_w = 120
    gap = 20
    svg = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
        '<rect width="100%" height="100%" fill="#fffdf8"/>',
        '<text x="28" y="26" font-size="16" font-family="Georgia" fill="#222">Candidate Envelope-Support Separation</text>',
    ]
    for idx, row in enumerate(rows):
        value = float(row["april_to_october_ratio"])
        h = (value / max_ratio) * chart_height if max_ratio else 0
        x = left + idx * (bar_w + gap)
        y = bottom - h
        color = "#4c78a8" if row["support_strength"] == "strong" else "#f4a261" if row["support_strength"] == "moderate" else "#bcb8b1"
        svg.append(f'<rect x="{x}" y="{y:.2f}" width="{bar_w}" height="{h:.2f}" fill="{color}" rx="4"/>')
        svg.append(f'<text x="{x+bar_w/2:.2f}" y="306" font-size="10" font-family="Arial" text-anchor="middle">{escape_xml(row["candidate_factor"])}</text>')
    svg.append('</svg>')
    (fig_dir / "figure02_candidate_factor_ratios.svg").write_text("\n".join(svg), encoding="utf-8")

    width = 860
    height = 340
    left = 70
    bottom = 285
    top = 45
    chart_height = bottom - top
    max_dist = max(float(row["distance_miles_norm"]) for row in case_rows)
    bar_w = 110
    gap = 24
    svg = [
        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
        '<rect width="100%" height="100%" fill="#fbfcff"/>',
        '<text x="28" y="26" font-size="16" font-family="Georgia" fill="#222">Bout Length in Cadence-Protected Cases</text>',
    ]
    for idx, row in enumerate(case_rows):
        value = float(row["distance_miles_norm"])
        h = (value / max_dist) * chart_height if max_dist else 0
        x = left + idx * (bar_w + gap)
        y = bottom - h
        color = "#d62828" if row["burden_state"] == "positive_burden" else "#2a9d8f"
        svg.append(f'<rect x="{x}" y="{y:.2f}" width="{bar_w}" height="{h:.2f}" fill="{color}" rx="4"/>')
        svg.append(f'<text x="{x+bar_w/2:.2f}" y="302" font-size="10" font-family="Arial" text-anchor="middle">{row["calendar_date"]}</text>')
    svg.append('</svg>')
    (fig_dir / "figure03_bout_length_cases.svg").write_text("\n".join(svg), encoding="utf-8")


def build_reports(outputs: dict[str, list[dict[str, object]]]) -> None:
    reports_dir = PACKAGE_DIR / "reports"
    factor_rows = outputs["study000h_candidate_factor_table"]
    case_rows = outputs["study000h_support_case_table"]
    october = [row for row in case_rows if row["cluster_label"] == "october_2025_outside_cluster"]
    april = [row for row in case_rows if row["cluster_label"] == "april_2026_boundary_probe"]
    strong_factors = [row["candidate_factor"] for row in factor_rows if row["support_strength"] == "strong"]

    summary_text = """# Study 000H Plain-Language Summary

Study `000H` asks what seems to separate high-burden from low-burden cadence-protected running. The answer is not that cadence changes more or less. The cadence-protected pattern is already there in both the October 2025 cluster and the April 2026 boundary probe. What changes more clearly is the amount of local running embedding around the run.

The strongest supported support factors are high running-share context and dense stabilized-context continuity. Longer bout length appears to help too, but the current data cannot isolate it as cleanly as embedding support.
"""
    (reports_dir / "STUDY000H_PLAIN_LANGUAGE_SUMMARY.md").write_text(summary_text, encoding="utf-8")

    results_text = f"""# Study 000H Results

## Primary Answer

The strongest supported envelope-support factors are:

- high local running-share embedding
- dense stabilized-context continuity

Longer bout structure is a plausible secondary factor, but the current data does not isolate it as cleanly as embedding support.

## Key Contrast

- October 2025 cadence-protected outside-envelope cluster:
  - mean running-share context: `{round(mean([float(row["running_share_28d_activity_pct"]) for row in october]), 2)}%`
  - mean treadmill neighbors in 21 days: `{round(mean([float(row["treadmill_neighbors_21d"]) for row in october]), 2)}`
  - mean distance: `{round(mean([float(row["distance_miles_norm"]) for row in october]), 2)} miles`
  - mean HR residual burden: `{round(mean([float(row["hr_residual_bpm"]) for row in october]), 2)} bpm`

- April 2026 cadence-protected boundary probe:
  - running-share context: `{round(mean([float(row["running_share_28d_activity_pct"]) for row in april]), 2)}%`
  - treadmill neighbors in 21 days: `{round(mean([float(row["treadmill_neighbors_21d"]) for row in april]), 2)}`
  - distance: `{round(mean([float(row["distance_miles_norm"]) for row in april]), 2)} miles`
  - HR residual burden: `{round(mean([float(row["hr_residual_bpm"]) for row in april]), 2)} bpm`

## Interpretation

`000H` does not weaken `000G`. It strengthens it. `000G` showed that cadence protection was real but insufficient. `000H` shows what seems to help that protection remain low-burden: dense local envelope support.
"""
    (reports_dir / "STUDY000H_RESULTS.md").write_text(results_text, encoding="utf-8")

    methods_text = """# Study 000H Methods

Study `000H` is a flagship mechanism follow-up built from already-audited outputs from:

- `Study 000E`
- `Study 000F`
- `Study 000G`

The study focused specifically on cadence-protected outside-surface or outside-context cases and compared:

- the October 2025 high-burden cadence-protected cluster
- the April 2026 low-burden cadence-protected boundary probe

Candidate support factors were evaluated comparatively rather than causally:

- running-share embedding
- treadmill-neighbor density
- bout length
- total recent activity volume
- next-day recovery context
"""
    (reports_dir / "STUDY000H_METHODS.md").write_text(methods_text, encoding="utf-8")

    discussion_text = """# Study 000H Discussion

The archive now points to a more complete mechanism chain:

`cadence protection helps preserve expression`

but

`envelope support helps determine whether that preserved expression remains low burden`

This matters because it means the archive is no longer choosing between cadence and context. It is showing how they interact. Rhythm protection appears to be one mechanism. Embedding support appears to be one of the conditions that lets that mechanism succeed more cleanly.

The most important caution is that `000H` identifies the strongest supported support factors, but does not prove a single exclusive switch. Bout length may still matter in a meaningful way, and the archive is not yet large enough to separate every support variable cleanly.
"""
    (reports_dir / "STUDY000H_DISCUSSION.md").write_text(discussion_text, encoding="utf-8")

    audit_text = """# Study 000H Audit

## Structural Audit

This package is structurally sound.

It includes:

- candidate-factor outputs
- support-case table
- figures
- manuscript/report set
- build script
- manifest

## Scientific Audit

Study `000H` asks:

`What envelope-support factors seem to determine whether a cadence-protected outside-surface run becomes high-burden or low-burden?`

The answer is:

`High local running embedding and stabilized-context continuity are the strongest supported candidates.`

Why:

- they separate the April boundary probe from the October cluster much more strongly than total recent activity volume
- the cadence-protected mechanical pattern stayed similar while burden diverged
- bout length looks relevant, but less isolated than embedding

## Boundaries

- This remains an observational mechanism study.
- It does not prove a single exclusive support factor.
- It does not fully resolve the relative weight of embedding versus bout length when they diverge.
- It does provide the strongest current answer to the envelope-support question created by `000G`.
"""
    (reports_dir / "STUDY000H_AUDIT.md").write_text(audit_text, encoding="utf-8")

    manuscript_text = """# Study 000H: Envelope Support Factors

## Core Question

What seems to determine whether a cadence-protected outside-surface run becomes high-burden or low-burden?

## Answer

The strongest supported envelope-support factors are high local running embedding and stabilized-context continuity. Longer bout structure appears to be a plausible secondary factor, but is not isolated as cleanly as embedding in the current dataset.

## Thesis

`Cadence protection appears to preserve running expression, but envelope support appears to determine whether that mechanically preserved expression remains physiologically tolerable.`
"""
    (PACKAGE_DIR / "manuscript" / "STUDY000H_MANUSCRIPT.md").write_text(manuscript_text, encoding="utf-8")


def build_readme() -> None:
    readme = """# STUDY_000H_ENVELOPE_SUPPORT_FACTORS_20260610

Flagship mechanism follow-up to Study 000G testing which support factors seem to separate high-burden from low-burden cadence-protected running expression.
"""
    (PACKAGE_DIR / "README.md").write_text(readme, encoding="utf-8")


def build_manifest(outputs: dict[str, list[dict[str, object]]]) -> None:
    manifest = {
        "package_name": PACKAGE_NAME,
        "generated_at": dt.datetime.now(dt.timezone.utc).isoformat(),
        "primary_answer": outputs["study000h_primary_answer"][0]["answer"],
        "support_case_rows": len(outputs["study000h_support_case_table"]),
        "candidate_factor_rows": len(outputs["study000h_candidate_factor_table"]),
    }
    (PACKAGE_DIR / "manifest" / "study000h_state.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")


def update_question_log() -> None:
    log_path = Path(r"C:\Study\Program Support\DISCOVERED_PROGRAM_QUESTIONS_20260609.txt")
    if not log_path.exists():
        return
    text = log_path.read_text(encoding="utf-8")
    if "Q023" in text:
        lines = text.splitlines()
        out = []
        i = 0
        while i < len(lines):
            line = lines[i]
            out.append(line)
            if line.strip() == "Q023":
                j = i + 1
                while j < len(lines) and lines[j].strip():
                    current = lines[j]
                    if current.startswith("Status:"):
                        out.append("Status: ADDRESSED")
                    else:
                        out.append(current)
                    j += 1
                out.append("Answered In:")
                out.append("Study 000H")
                out.append("Answer:")
                out.append("The strongest supported support factors were high local running embedding and stabilized-context continuity, with bout length as a plausible secondary contributor.")
                i = j - 1
            i += 1
        text = "\n".join(out)
    if "Q024" not in text:
        text = text.rstrip() + "\n\nQ024\nStatus: OPEN\nBorn From: Study 000H\nQuestion:\nWhen embedding support and bout length diverge, which factor carries more weight in preserving low-burden expression?\nWhy It Matters:\nThis is the cleanest remaining child question after the support-factor study.\nNotes:\nStudy 000H suggests both embedding and bout length matter, but embedding separated the current cases more strongly.\n"
    log_path.write_text(text, encoding="utf-8")


def copy_builder() -> None:
    shutil.copy2(Path(__file__), PACKAGE_DIR / "analysis" / Path(__file__).name)


def build_zip() -> None:
    if ZIP_PATH.exists():
        ZIP_PATH.unlink()
    shutil.make_archive(str(PACKAGE_DIR), "zip", root_dir=PACKAGE_DIR)


def main() -> None:
    if PACKAGE_DIR.exists():
        shutil.rmtree(PACKAGE_DIR)
    ensure_dirs()
    outputs = build_outputs()
    write_outputs(outputs)
    build_figures(outputs)
    build_reports(outputs)
    build_readme()
    build_manifest(outputs)
    update_question_log()
    copy_builder()
    build_zip()
    print(PACKAGE_DIR)
    print(ZIP_PATH)


if __name__ == "__main__":
    main()
