#!/usr/bin/env python3
"""Range-aware HTTP server for the VO Booth prototype.

Also accepts POST / DELETE / GET-listing under /workspace/ so the booth can
persist test recordings to disk while we iterate locally. This is a stand-in
for the eventual cloud upload path — takes live in vo-booth/workspace/ until
we wire R2 / Hetzner.
"""
import http.server, os, mimetypes, re, json, urllib.parse, shutil, sys, subprocess, tempfile
import urllib.request
from datetime import datetime, timezone

PORT = 8765
WORKSPACE_DIR = "workspace"

# Make scripts/ importable so we can call align_take from within the server.
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "scripts"))


def _load_env():
    """Parse ATRI-VOX-SUITE/.env (one dir up from vo-booth/) into a dict.
    Returns empty dict if the file is missing. Used by the AI-take endpoint to
    pick up ELEVENLABS_API_KEY / VOICE_ID / MODEL_ID without a python-dotenv dep.
    """
    env_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env"))
    out = {}
    if not os.path.isfile(env_path):
        return out
    with open(env_path) as f:
        for raw in f:
            line = raw.strip()
            if not line or line.startswith("#") or "=" not in line:
                continue
            k, _, v = line.partition("=")
            out[k.strip()] = v.strip().strip('"').strip("'")
    return out


def _safe_workspace_path(rel_path: str):
    """Resolve rel_path under ./workspace/, refuse anything that escapes."""
    root = os.path.abspath(os.path.join(os.getcwd(), WORKSPACE_DIR))
    target = os.path.abspath(os.path.join(os.getcwd(), rel_path.lstrip("/")))
    # target must be root itself or strictly inside it
    if target != root and not target.startswith(root + os.sep):
        return None
    return target


def _write_json_atomic(path: str, data) -> None:
    """Write `data` as JSON to `path` via temp-file + os.replace.

    Prevents partial writes on crash or ENOSPC from leaving a truncated /
    corrupt JSON file on disk. Temp file is created in the same directory so
    os.replace is atomic on the same filesystem. fsync before rename ensures
    the bytes are on disk before the rename commits.
    """
    dir_path = os.path.dirname(path) or "."
    fd, tmp = tempfile.mkstemp(prefix=".tmp-json-", suffix=".json", dir=dir_path)
    try:
        with os.fdopen(fd, "w") as f:
            json.dump(data, f, indent=2)
            f.flush()
            try:
                os.fsync(f.fileno())
            except OSError:
                # fsync can fail on exotic filesystems; the rename still protects
                # against partial writes within the same process.
                pass
        os.replace(tmp, path)
    except Exception:
        try:
            os.unlink(tmp)
        except OSError:
            pass
        raise


class RangeHandler(http.server.SimpleHTTPRequestHandler):
    # ---------- GET (range support + JSON listing) ----------
    def do_GET(self):
        url = urllib.parse.urlparse(self.path)

        # JSON directory listing under /workspace/…/ ?list
        if (url.path.startswith("/" + WORKSPACE_DIR + "/") or url.path == "/" + WORKSPACE_DIR + "/") \
                and url.query == "list":
            target = _safe_workspace_path(url.path)
            if target is None:
                return self.send_error(403, "path outside workspace")
            items = sorted(os.listdir(target)) if os.path.isdir(target) else []
            body = json.dumps(items).encode()
            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.send_header("Content-Length", str(len(body)))
            self.end_headers()
            self.wfile.write(body)
            return

        # Range support for video scrubbing
        path = self.translate_path(self.path)
        if not os.path.isfile(path):
            return super().do_GET()
        rng = self.headers.get("Range")
        if not rng:
            return super().do_GET()
        m = re.match(r"bytes=(\d+)-(\d*)", rng)
        if not m:
            return super().do_GET()
        size = os.path.getsize(path)
        start = int(m.group(1))
        end = int(m.group(2)) if m.group(2) else size - 1
        end = min(end, size - 1)
        length = end - start + 1
        ctype = mimetypes.guess_type(path)[0] or "application/octet-stream"
        self.send_response(206)
        self.send_header("Content-Type", ctype)
        self.send_header("Accept-Ranges", "bytes")
        self.send_header("Content-Range", f"bytes {start}-{end}/{size}")
        self.send_header("Content-Length", str(length))
        self.end_headers()
        with open(path, "rb") as f:
            f.seek(start)
            remaining = length
            while remaining > 0:
                chunk = f.read(min(64 * 1024, remaining))
                if not chunk:
                    break
                try:
                    self.wfile.write(chunk)
                except (BrokenPipeError, ConnectionResetError):
                    return
                remaining -= len(chunk)

    # ---------- POST (save a take under /workspace/… or trigger alignment) ----------
    def do_POST(self):
        url = urllib.parse.urlparse(self.path)

        # Auto-align endpoint: POST /align-take?character=X&key=Y
        if url.path == "/align-take":
            return self._handle_align(url)

        # Manual-shift endpoint: POST /shift-take?character=X&key=Y&shift_ms=N
        if url.path == "/shift-take":
            return self._handle_shift(url)

        # Non-destructive metadata patch for split takes: POST /update-take-meta?character=X&key=Y
        # JSON body with whitelisted keys merged into the take's meta JSON (audio file untouched).
        if url.path == "/update-take-meta":
            return self._handle_update_meta(url)

        # Cue timecode autocorrect: POST /autocorrect-cue?episode=X&line=N
        if url.path == "/autocorrect-cue":
            return self._handle_autocorrect_cue(url)

        # Manual cue-TC override (drag fallback): POST /set-cue-tc?episode=X&line=N&start=S&end=E
        if url.path == "/set-cue-tc":
            return self._handle_set_cue_tc(url)

        # AI-generated take via ElevenLabs: POST /generate-ai-take?character=X&episode=E&line=N
        if url.path == "/generate-ai-take":
            return self._handle_generate_ai_take(url)

        if not url.path.startswith("/" + WORKSPACE_DIR + "/"):
            return self.send_error(404, "POST only allowed under /workspace/ or /align-take")
        target = _safe_workspace_path(url.path)
        if target is None:
            return self.send_error(403, "path outside workspace")
        os.makedirs(os.path.dirname(target), exist_ok=True)
        length = int(self.headers.get("Content-Length", "0"))
        if length <= 0:
            return self.send_error(400, "empty body")
        with open(target, "wb") as f:
            remaining = length
            while remaining > 0:
                chunk = self.rfile.read(min(64 * 1024, remaining))
                if not chunk:
                    break
                f.write(chunk)
                remaining -= len(chunk)

        # Fresh webm means any prior .original.webm backup is stale — remove it so the
        # next align/shift captures the new recording as its pristine source.
        if target.endswith(".webm") and not target.endswith(".original.webm"):
            stale = target[:-len(".webm")] + ".original.webm"
            if os.path.isfile(stale):
                try: os.remove(stale)
                except OSError: pass

        body = json.dumps({"ok": True, "path": url.path, "bytes": length}).encode()
        self.send_response(201)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    # ---------- DELETE (remove a single file or a whole character folder) ----------
    def do_DELETE(self):
        url = urllib.parse.urlparse(self.path)
        if not url.path.startswith("/" + WORKSPACE_DIR + "/"):
            return self.send_error(404, "DELETE only allowed under /workspace/")
        target = _safe_workspace_path(url.path)
        if target is None:
            return self.send_error(403, "path outside workspace")
        if os.path.isdir(target):
            shutil.rmtree(target)
        elif os.path.isfile(target):
            os.remove(target)
        self.send_response(204)
        self.end_headers()

    # ---------- Align / shift helpers ----------
    def _resolve_take_paths(self, url):
        """Shared validation + path resolution for /align-take and /shift-take.
        Returns (character, key, char_dir, webm_path, meta_path, backup_path, meta_dict)
        or None (after sending error).
        """
        params = urllib.parse.parse_qs(url.query)
        character = (params.get("character", [""])[0] or "").strip()
        key = (params.get("key", [""])[0] or "").strip()
        if not character or not re.fullmatch(r"[A-Za-z0-9_\-]+", character):
            self.send_error(400, "invalid character"); return None
        if not key or not re.fullmatch(r"[A-Za-z0-9_\-]+", key):
            self.send_error(400, "invalid key"); return None
        char_dir = os.path.join(os.getcwd(), WORKSPACE_DIR, character)
        webm_path = os.path.join(char_dir, f"{key}.webm")
        meta_path = os.path.join(char_dir, f"{key}.json")
        backup_path = os.path.join(char_dir, f"{key}.original.webm")
        if not os.path.isfile(webm_path):
            self.send_error(404, f"take not found: {webm_path}"); return None
        if not os.path.isfile(meta_path):
            self.send_error(404, f"meta not found: {meta_path}"); return None
        with open(meta_path) as f:
            meta = json.load(f)
        # Back up the original once (first shift of any kind)
        if not os.path.isfile(backup_path):
            shutil.copyfile(webm_path, backup_path)
        return (character, key, char_dir, webm_path, meta_path, backup_path, meta, params)

    def _apply_shift(self, info, webm_path, meta, meta_path, source, overrode_manual=False):
        """Commit a shift: caller already wrote the new webm. Now update meta JSON + respond.

        source: 'manual_shift' | 'auto_align' — drives the manual_lock flag and history tag.
        overrode_manual: when True, this is an auto-align that forcibly replaced a prior manual shift.
        """
        now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        meta["applied_shift_ms"] = int(info["shift_ms"])
        meta["manual_lock"] = (source == "manual_shift")
        meta["last_edit_source"] = source
        meta["last_edit_at"] = now_iso
        if "preroll_ms" in info and "detected_onset_ms" in info:
            meta["vo_offset_in_take_ms"] = int(info["preroll_ms"])

        entry = dict(info)
        entry["source"] = source
        entry["at"] = now_iso
        if overrode_manual:
            entry["overrode_manual"] = True
        history = meta.setdefault("alignment_history", [])
        history.append(entry)
        # L6 — keep the last 50 entries inline (up from 5 — bump gives a proper
        # session-length trail without bloating meta JSON for the common case).
        # Anything older is appended to {key}.history.jsonl alongside the take
        # as a sibling file, so the full audit trail survives indefinitely.
        if len(history) > 50:
            overflow = history[:-50]
            meta["alignment_history"] = history[-50:]
            try:
                char_dir = os.path.dirname(meta_path)
                key_base = os.path.basename(meta_path).rsplit(".json", 1)[0]
                jsonl_path = os.path.join(char_dir, f"{key_base}.history.jsonl")
                with open(jsonl_path, "a") as jf:
                    for ov in overflow:
                        jf.write(json.dumps(ov) + "\n")
            except OSError as _e:
                # Overflow logging is best-effort — never fail a shift because
                # the sibling log couldn't be written.
                pass

        _write_json_atomic(meta_path, meta)

        body = json.dumps({"ok": True, **entry}).encode()
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    def _handle_align(self, url):
        """POST /align-take?character=<folder>&key=<cue_key>[&force=1]
        Auto-detects voice onset and shifts so voice lands at preroll_ms.

        If meta.manual_lock is true and force=1 is NOT passed, returns 200 with
        {skipped: true, reason: 'manual_lock', existing: {...}} and does nothing.
        This protects user-curated manual drags from being silently clobbered.
        """
        resolved = self._resolve_take_paths(url)
        if not resolved: return
        character, key, char_dir, webm_path, meta_path, backup_path, meta, params = resolved

        # H5 — /align-take rewrites the whole audio file. A split take carries
        # per-part file-offset markers in splits_file_ms; rewriting the file
        # invalidates those offsets and corrupts playback. Refuse here so a
        # misrouted client can't silently destroy split metadata. The client
        # already has a per-part alignSplitTake path for this case.
        splits = meta.get("splits_file_ms")
        if isinstance(splits, list) and len(splits) > 0:
            body = json.dumps({
                "ok": False,
                "error": "split_take_refused",
                "message": "align-take cannot rewrite a split take; use per-part alignment on the client.",
                "splits_file_ms": splits,
            }).encode()
            self.send_response(409)
            self.send_header("Content-Type", "application/json")
            self.send_header("Content-Length", str(len(body)))
            self.end_headers()
            self.wfile.write(body)
            return

        force_raw = (params.get("force", [""])[0] or "").strip().lower()
        force = force_raw in ("1", "true", "yes")
        if meta.get("manual_lock") and not force:
            body = json.dumps({
                "ok": True,
                "skipped": True,
                "reason": "manual_lock",
                "existing": {
                    "applied_shift_ms": int(meta.get("applied_shift_ms") or 0),
                    "last_edit_at": meta.get("last_edit_at"),
                    "last_edit_source": meta.get("last_edit_source"),
                },
            }).encode()
            self.send_response(200)
            self.send_header("Content-Type", "application/json")
            self.send_header("Content-Length", str(len(body)))
            self.end_headers()
            self.wfile.write(body)
            return

        overrode_manual = bool(meta.get("manual_lock")) and force
        preroll_ms = int(meta.get("preroll_ms", 0) or 0)
        # Cue duration = green focal area width. Drives the sanity check that
        # keeps align from shoving voice outside the green zone when onset
        # detection fails.
        group_in = int(meta.get("group_in_ms", 0) or 0)
        group_out = int(meta.get("group_out_ms", 0) or 0)
        cue_duration_ms = max(0, group_out - group_in)
        try:
            from align_take import align_take_to_cue
        except Exception as e:
            return self.send_error(500, f"align_take import failed: {e}")
        tmp_path = webm_path + ".tmp.webm"
        try:
            info = align_take_to_cue(
                backup_path, tmp_path, preroll_ms,
                cue_duration_ms=cue_duration_ms,
            )
            os.replace(tmp_path, webm_path)
        except Exception as e:
            if os.path.exists(tmp_path):
                try: os.remove(tmp_path)
                except OSError: pass
            return self.send_error(500, f"alignment failed: {e}")
        self._apply_shift(info, webm_path, meta, meta_path,
                          source="auto_align", overrode_manual=overrode_manual)

    def _handle_shift(self, url):
        """POST /shift-take?character=<folder>&key=<cue_key>&shift_ms=<int>
        shift_ms is the ABSOLUTE shift from the original backup: positive trims the
        front of the audio, negative pads silence at the front. Lets the client drag
        the waveform to a new position and save it.
        """
        resolved = self._resolve_take_paths(url)
        if not resolved: return
        character, key, char_dir, webm_path, meta_path, backup_path, meta, params = resolved
        raw = (params.get("shift_ms", [""])[0] or "").strip()
        try:
            shift_ms = int(raw)
        except ValueError:
            return self.send_error(400, f"invalid shift_ms: {raw!r}")
        # Sanity bound: limit to ±10 s to protect against accidental huge values
        if abs(shift_ms) > 10_000:
            return self.send_error(400, f"shift_ms out of range (|x|>10000): {shift_ms}")
        try:
            from align_take import shift_take
        except Exception as e:
            return self.send_error(500, f"align_take import failed: {e}")
        tmp_path = webm_path + ".tmp.webm"
        try:
            info = shift_take(backup_path, tmp_path, shift_ms)
            os.replace(tmp_path, webm_path)
        except Exception as e:
            if os.path.exists(tmp_path):
                try: os.remove(tmp_path)
                except OSError: pass
            return self.send_error(500, f"shift failed: {e}")
        # Manual shift adjusts vo_offset_in_take_ms: whatever the original offset was,
        # trimming moves the voice earlier in the file (subtract shift from offset).
        orig_offset = int(meta.get("vo_offset_in_take_ms", meta.get("preroll_ms", 0)) or 0)
        # If shift just came in and previous applied_shift_ms existed, we're resetting to absolute.
        # Original (unshifted) vo offset is orig_offset + previous applied_shift.
        prev_applied = int(meta.get("applied_shift_ms", 0) or 0)
        original_offset = orig_offset + prev_applied
        meta["vo_offset_in_take_ms"] = original_offset - shift_ms
        self._apply_shift(info, webm_path, meta, meta_path, source="manual_shift")

    def _handle_update_meta(self, url):
        """POST /update-take-meta?character=<folder>&key=<cue_key>
        JSON body with a dict of patch keys to merge into the take's meta JSON
        file. The audio webm is NOT touched — this is non-destructive metadata
        editing for split takes (splits_file_ms, part_shifts_ms).

        Whitelisted keys only; unknown keys are rejected with 400.
        """
        resolved = self._resolve_take_paths(url)
        if not resolved: return
        character, key, char_dir, webm_path, meta_path, backup_path, meta, params = resolved
        try:
            length = int(self.headers.get("Content-Length") or 0)
        except ValueError:
            length = 0
        if length <= 0 or length > 100_000:
            return self.send_error(400, "invalid body length")
        try:
            raw = self.rfile.read(length).decode("utf-8")
            patch = json.loads(raw)
        except Exception as e:
            return self.send_error(400, f"bad json body: {e}")
        if not isinstance(patch, dict):
            return self.send_error(400, "body must be a JSON object")
        allowed = {"splits_file_ms", "part_shifts_ms", "applied_shift_ms", "sample_rate"}
        for k in patch.keys():
            if k not in allowed:
                return self.send_error(400, f"disallowed key: {k}")
        # Shallow-merge patch into meta.
        for k, v in patch.items():
            meta[k] = v
        meta["last_edit_source"] = "update_take_meta"
        meta["last_edit_at"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        _write_json_atomic(meta_path, meta)
        body = json.dumps({"ok": True}).encode()
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    # ---------- Cue timecode autocorrect ----------
    def _handle_autocorrect_cue(self, url):
        """POST /autocorrect-cue?episode=E&line=N[&start_back_ms=1500&start_forward_ms=3000&...]
        Runs autocorrect_cue.autocorrect_cue on the reference waveform (prefers
        voice_waveform.json over waveform.json) and persists the result keyed by
        "{episode}-{line}" in corrections.json so the client picks it up on reload.
        """
        params = urllib.parse.parse_qs(url.query)
        episode = (params.get("episode", [""])[0] or "").strip()
        line_raw = (params.get("line", [""])[0] or "").strip()
        if not episode or not re.fullmatch(r"[A-Za-z0-9_\-]+", episode):
            return self.send_error(400, "invalid episode")
        try:
            line = int(line_raw)
        except ValueError:
            return self.send_error(400, f"invalid line: {line_raw!r}")

        # Load script_batch1.json + a waveform from the served root.
        # Prefer voice_waveform.json (produced from HTDemucs voice stem — music can't fool
        # Fix TC) and fall back to the mixed waveform.json if the voice version is absent.
        script_path = os.path.join(os.getcwd(), "script_batch1.json")
        voice_waveform_path = os.path.join(os.getcwd(), "voice_waveform.json")
        mixed_waveform_path = os.path.join(os.getcwd(), "waveform.json")
        waveform_path = voice_waveform_path if os.path.isfile(voice_waveform_path) else mixed_waveform_path
        if not os.path.isfile(script_path) or not os.path.isfile(waveform_path):
            return self.send_error(500, "script or waveform file missing from server root")

        with open(script_path) as f:
            script = json.load(f)
        cue = next((c for c in script if c.get("episode") == episode and int(c.get("line")) == line), None)
        if cue is None:
            return self.send_error(404, f"cue not found: {episode}-{line}")
        with open(waveform_path) as f:
            waveform = json.load(f)

        try:
            from autocorrect_cue import autocorrect_cue as _do_correct
        except Exception as e:
            return self.send_error(500, f"autocorrect_cue import failed: {e}")

        opts = {}
        int_keys = (
            "start_back_ms", "start_forward_ms", "end_back_ms", "end_forward_ms",
            "voice_min_ms", "silence_min_ms", "voice_lookback_ms",
            "smoothing_ms", "min_spacing_ms", "distance_bias_ms",
        )
        for k in int_keys:
            v = (params.get(k, [""])[0] or "").strip()
            if v:
                try: opts[k] = int(v)
                except ValueError: pass
        dfv = (params.get("depth_factor", [""])[0] or "").strip()
        if dfv:
            try: opts["depth_factor"] = float(dfv)
            except ValueError: pass
        gtv = (params.get("gate_threshold", [""])[0] or "").strip()
        if gtv:
            try: opts["gate_threshold"] = float(gtv)
            except ValueError: pass

        # Pass the cue's claimed duration — enables duration-aware candidate
        # scoring so short voice bursts (breath, HTDemucs bleed, previous-line
        # tail, onomatopoeia) can't beat the real sustained line on proximity
        # alone. Without this, Fix TC snaps to the nearest RMS rise and may
        # leave the cue stuck on a stray 150ms peak even when the real voice
        # is clearly visible elsewhere in the search window.
        expected_duration_s = float(cue["end"]) - float(cue["start"])
        if "expected_duration_s" not in opts and expected_duration_s > 0:
            opts["expected_duration_s"] = expected_duration_s
        try:
            info = _do_correct(waveform, float(cue["start"]), float(cue["end"]), **opts)
        except Exception as e:
            return self.send_error(500, f"autocorrect failed: {e}")

        # Persist into corrections.json under the cue key
        ckey = f"{episode}-{line}"
        corr_path = os.path.join(os.getcwd(), "corrections.json")
        corrections = {}
        if os.path.isfile(corr_path):
            try:
                with open(corr_path) as f:
                    corrections = json.load(f)
            except Exception:
                corrections = {}
        corrections[ckey] = {
            "start": info["corrected_start"],
            "end":   info["corrected_end"],
            "original_start": info["original_start"],
            "original_end":   info["original_end"],
            "delta_start_ms": info["delta_start_ms"],
            "delta_end_ms":   info["delta_end_ms"],
            "params": info["params"],
        }
        _write_json_atomic(corr_path, corrections)

        body = json.dumps({"ok": True, "key": ckey, **info}).encode()
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    def _handle_set_cue_tc(self, url):
        """POST /set-cue-tc?episode=E&line=N[&start=S][&end=E]
        Writes a manual cue-timecode override into corrections.json under
        "{episode}-{line}". Used by the drag-handle fallback when Fix TC can't
        nail the edge. `start` and `end` are seconds (float); either one or
        both may be supplied — only those passed are updated.
        """
        params = urllib.parse.parse_qs(url.query)
        episode = (params.get("episode", [""])[0] or "").strip()
        line_raw = (params.get("line", [""])[0] or "").strip()
        if not episode or not re.fullmatch(r"[A-Za-z0-9_\-]+", episode):
            return self.send_error(400, "invalid episode")
        try:
            line = int(line_raw)
        except ValueError:
            return self.send_error(400, f"invalid line: {line_raw!r}")

        def _float_or_none(name):
            v = (params.get(name, [""])[0] or "").strip()
            if not v: return None
            try: return float(v)
            except ValueError: return "invalid"

        new_start = _float_or_none("start")
        new_end   = _float_or_none("end")
        if new_start == "invalid" or new_end == "invalid":
            return self.send_error(400, "start/end must be floats (seconds)")
        if new_start is None and new_end is None:
            return self.send_error(400, "provide at least one of start / end")

        # Look up the script cue to record the original bounds + sanity-check line exists.
        script_path = os.path.join(os.getcwd(), "script_batch1.json")
        if not os.path.isfile(script_path):
            return self.send_error(500, "script_batch1.json missing")
        with open(script_path) as f:
            script = json.load(f)
        cue = next((c for c in script if c.get("episode") == episode and int(c.get("line")) == line), None)
        if cue is None:
            return self.send_error(404, f"cue not found: {episode}-{line}")
        orig_start = float(cue["start"])
        orig_end   = float(cue["end"])

        ckey = f"{episode}-{line}"
        corr_path = os.path.join(os.getcwd(), "corrections.json")
        corrections = {}
        if os.path.isfile(corr_path):
            try:
                with open(corr_path) as f:
                    corrections = json.load(f)
            except Exception:
                corrections = {}

        # Merge onto any prior correction so a second drag on the same cue preserves
        # the untouched edge. Priority: new value > prior correction > script original.
        prior = corrections.get(ckey) or {}
        prior_start = prior.get("start") if isinstance(prior.get("start"), (int, float)) else orig_start
        prior_end   = prior.get("end")   if isinstance(prior.get("end"),   (int, float)) else orig_end

        final_start = float(new_start) if new_start is not None else float(prior_start)
        final_end   = float(new_end)   if new_end   is not None else float(prior_end)
        if final_end <= final_start:
            return self.send_error(400, f"end ({final_end}) must be > start ({final_start})")

        corrections[ckey] = {
            "start": final_start,
            "end":   final_end,
            "original_start": orig_start,
            "original_end":   orig_end,
            "delta_start_ms": int(round((final_start - orig_start) * 1000)),
            "delta_end_ms":   int(round((final_end   - orig_end)   * 1000)),
            "source": "manual_drag",
        }
        _write_json_atomic(corr_path, corrections)

        body = json.dumps({"ok": True, "key": ckey, **corrections[ckey]}).encode()
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    # ---------- AI-generated take (ElevenLabs) ----------
    def _handle_generate_ai_take(self, url):
        """POST /generate-ai-take?character=<folder>&episode=E&line=N
        Generates a take for the cue via ElevenLabs TTS, transcodes MP3→webm/opus,
        and writes {key}.webm + {key}.json into workspace/<character>/ in the same
        shape autoSaveTake produces — so the existing UI, align, and shift logic
        treat the result like any other take.
        """
        params = urllib.parse.parse_qs(url.query)
        character = (params.get("character", [""])[0] or "").strip()
        episode = (params.get("episode", [""])[0] or "").strip()
        lines_raw = (params.get("lines", [""])[0] or "").strip()
        line_raw = (params.get("line", [""])[0] or "").strip()
        if not character or not re.fullmatch(r"[A-Za-z0-9_\-]+", character):
            return self.send_error(400, "invalid character")
        if not episode or not re.fullmatch(r"[A-Za-z0-9_\-]+", episode):
            return self.send_error(400, "invalid episode")
        # Accept either lines=1,2,3 (group) or line=1 (single cue, legacy).
        try:
            if lines_raw:
                line_nums = [int(x) for x in lines_raw.split(",") if x.strip()]
                if not line_nums:
                    raise ValueError("empty lines")
            elif line_raw:
                line_nums = [int(line_raw)]
            else:
                return self.send_error(400, "missing line or lines parameter")
        except ValueError:
            return self.send_error(400, f"invalid line/lines: {lines_raw or line_raw!r}")

        env = _load_env()
        api_key = env.get("ELEVENLABS_API_KEY", "")
        voice_id = env.get("ELEVENLABS_VOICE_ID", "")
        model_id = env.get("ELEVENLABS_MODEL_ID", "eleven_multilingual_v2")
        if not api_key or not voice_id:
            return self.send_error(500, "ELEVENLABS_API_KEY or ELEVENLABS_VOICE_ID missing from .env")

        # Resolve cues from script_batch1.json, applying corrections.json overlay so
        # timecodes match what the UI shows (Fix TC / drag can have moved them).
        script_path = os.path.join(os.getcwd(), "script_batch1.json")
        corr_path = os.path.join(os.getcwd(), "corrections.json")
        if not os.path.isfile(script_path):
            return self.send_error(500, "script_batch1.json missing from server root")
        with open(script_path) as f:
            script = json.load(f)
        corrections = {}
        if os.path.isfile(corr_path):
            try:
                with open(corr_path) as f:
                    corrections = json.load(f)
            except Exception:
                corrections = {}

        group_cues = []
        for ln in line_nums:
            cue = next((c for c in script if c.get("episode") == episode and int(c.get("line")) == ln), None)
            if cue is None:
                return self.send_error(404, f"cue not found: {episode}-{ln}")
            c_start = float(cue["start"]); c_end = float(cue["end"])
            corr = corrections.get(f"{episode}-{ln}")
            if isinstance(corr, dict):
                if isinstance(corr.get("start"), (int, float)): c_start = float(corr["start"])
                if isinstance(corr.get("end"),   (int, float)): c_end   = float(corr["end"])
            group_cues.append({"line": ln, "start": c_start, "end": c_end,
                               "phrase": str(cue.get("phrase", "")).strip(),
                               "character_name": cue.get("character", "")})
        if not all(g["phrase"] for g in group_cues):
            return self.send_error(400, "one or more cues has empty phrase")

        # Group bounds = first cue's start to last cue's end; phrase = joined with spaces.
        cue_start = group_cues[0]["start"]
        cue_end = group_cues[-1]["end"]
        phrase = " ".join(g["phrase"] for g in group_cues)
        line = line_nums[0]  # used only for legacy single-line meta fields

        # Call ElevenLabs TTS
        tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}?output_format=mp3_44100_128"
        payload = json.dumps({"text": phrase, "model_id": model_id}).encode()
        req = urllib.request.Request(tts_url, data=payload, method="POST",
            headers={
                "xi-api-key": api_key,
                "Content-Type": "application/json",
                "Accept": "audio/mpeg",
            })
        gen_start = datetime.now(timezone.utc)
        try:
            with urllib.request.urlopen(req, timeout=60) as resp:
                mp3_bytes = resp.read()
        except urllib.error.HTTPError as e:
            detail = ""
            try: detail = e.read().decode("utf-8", errors="replace")[:500]
            except Exception: pass
            return self.send_error(502, f"ElevenLabs HTTP {e.code}: {detail}")
        except Exception as e:
            return self.send_error(502, f"ElevenLabs request failed: {e}")
        gen_ms = int((datetime.now(timezone.utc) - gen_start).total_seconds() * 1000)

        # Transcode MP3 → webm/opus so existing playback/align/shift code treats this
        # like any normal take. ffmpeg is already a hard dep of admin-cli/.
        char_dir = os.path.join(os.getcwd(), WORKSPACE_DIR, character)
        os.makedirs(char_dir, exist_ok=True)
        # Group key matches the client's take-key convention: "ep-line1_line2_line3".
        key = f"{episode}-{'_'.join(str(ln) for ln in line_nums)}"
        webm_path = os.path.join(char_dir, f"{key}.webm")
        meta_path = os.path.join(char_dir, f"{key}.json")

        # Pad silence around the TTS voice so the take has the same shape as a human
        # recording: PREROLL_MS of silence, then voice, then POSTROLL_MS of silence.
        # This keeps A/B playback alignment identical (video starts at cue.start-preroll,
        # take plays from 0; voice lands exactly at cue.start because it's preroll ms in).
        PREROLL_MS = 2000
        POSTROLL_MS = 1500
        FADE_MS = 1500
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tf:
            tf.write(mp3_bytes)
            mp3_tmp = tf.name
        try:
            proc = subprocess.run(
                ["ffmpeg", "-y", "-loglevel", "error",
                 "-i", mp3_tmp,
                 "-af", f"adelay={PREROLL_MS}:all=1,apad=pad_dur={POSTROLL_MS/1000}",
                 "-c:a", "libopus", "-b:a", "96k",
                 webm_path],
                capture_output=True, text=True, timeout=30,
            )
            if proc.returncode != 0:
                return self.send_error(500, f"ffmpeg failed: {proc.stderr[:500]}")
        finally:
            try: os.remove(mp3_tmp)
            except OSError: pass

        # Clear any stale .original.webm from a prior human recording — this AI bytes
        # file is now the authoritative source for align/shift.
        stale = os.path.join(char_dir, f"{key}.original.webm")
        if os.path.isfile(stale):
            try: os.remove(stale)
            except OSError: pass

        # Extract duration + build waveform peaks so the rec-waveform strip renders the
        # same way it does for a human take. Peaks are video-timeline keyed (t, pk).
        import array, struct
        try:
            pcm = subprocess.run(
                ["ffmpeg", "-loglevel", "error", "-i", webm_path,
                 "-f", "f32le", "-ac", "1", "-ar", "16000", "pipe:1"],
                capture_output=True, timeout=30, check=True,
            )
            samples = array.array("f"); samples.frombytes(pcm.stdout)
            audio_duration_s = len(samples) / 16000.0
        except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
            return self.send_error(500, f"ffmpeg PCM decode failed: {getattr(e, 'stderr', b'')[:300]}")

        # Assemble meta. AI take is now silence-padded to match human-take shape:
        # record_in_ms = cue start - PREROLL, voice offset = PREROLL, total file
        # duration = PREROLL + TTS + POSTROLL. This is what A/B playback assumes.
        now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
        group_in_ms = int(round(cue_start * 1000))
        group_out_ms = int(round(cue_end * 1000))
        audio_dur_ms = int(round(audio_duration_s * 1000))
        record_in_ms = group_in_ms - PREROLL_MS
        record_out_ms = record_in_ms + audio_dur_ms

        # Peaks at 60/sec (matches live animation-frame sampling rate), placed on the
        # video reference timeline starting at record_in_ms.
        peaks = []
        peak_rate = 60
        chunk = 16000 // peak_rate  # 266 samples
        start_t = record_in_ms / 1000.0
        if chunk > 0:
            nsamples = len(samples)
            for i in range(0, nsamples, chunk):
                end = min(i + chunk, nsamples)
                pk = 0.0
                for j in range(i, end):
                    a = samples[j]
                    if a < 0: a = -a
                    if a > pk: pk = a
                if pk > 1.0: pk = 1.0
                peaks.append({"t": round(start_t + (i // chunk) / peak_rate, 4),
                              "pk": round(pk, 4)})

        meta = {
            "episode": episode,
            "lines": line_nums,
            "joined": len(line_nums) > 1,
            "character": group_cues[0]["character_name"],
            "phrase": phrase,
            "group_in_ms": group_in_ms,
            "group_out_ms": group_out_ms,
            "lines_detail": [{
                "line": g["line"],
                "in_ms": int(round(g["start"] * 1000)),
                "out_ms": int(round(g["end"] * 1000)),
                "offset_in_take_ms": PREROLL_MS + int(round((g["start"] - group_cues[0]["start"]) * 1000)),
                "phrase": g["phrase"],
            } for g in group_cues],
            "preroll_ms": PREROLL_MS,
            "postroll_ms": POSTROLL_MS,
            "fade_in_ms": min(FADE_MS, PREROLL_MS),
            "fade_out_ms": min(FADE_MS, POSTROLL_MS),
            "record_in_ms": record_in_ms,
            "record_out_ms": record_out_ms,
            "vo_offset_in_take_ms": PREROLL_MS,
            "recorded_at": now_iso,
            "channel": 1,
            "peaks": peaks,
            "manual_lock": False,
            "last_edit_source": "ai_generated",
            "last_edit_at": now_iso,
            "source": "ai_elevenlabs",
            "ai_generation": {
                "voice_id": voice_id,
                "model_id": model_id,
                "chars_billed": len(phrase),
                "generation_ms": gen_ms,
                "generated_at": now_iso,
            },
        }
        _write_json_atomic(meta_path, meta)

        body = json.dumps({
            "ok": True,
            "key": key,
            "character": character,
            "phrase": phrase,
            "chars_billed": len(phrase),
            "generation_ms": gen_ms,
        }).encode()
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    def end_headers(self):
        self.send_header("Accept-Ranges", "bytes")
        # Dev server: never let the browser cache HTML/CSS/JS so edits show up on refresh.
        self.send_header("Cache-Control", "no-store, must-revalidate")
        super().end_headers()


if __name__ == "__main__":
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    os.makedirs(WORKSPACE_DIR, exist_ok=True)
    with http.server.ThreadingHTTPServer(("", PORT), RangeHandler) as httpd:
        print(f"VO Booth server: http://localhost:{PORT}/vo-booth.html")
        print(f"Workspace:       {os.path.abspath(WORKSPACE_DIR)}")
        print("Press Ctrl+C to stop.")
        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            pass