
    #iJ              !           d Z ddlZddlZdZdZdZdZdZdZdZ	dZ
dZdZdZd	Zd
Zd Zd Zd Zd Zd Zd Zeeeeeee	eeeedfdedededededededededededededed ed!ef d"Zd# Zed$k    r e             dS dS )%u)  Auto-correct cue start / end timecodes by scanning a reference waveform.

Primary algorithm — **hysteresis edge detection** (requires a clean voice-only
stem, e.g. HTDemucs `voice_waveform.json`):

  - Cue START = silence → voice rising edge closest to the claimed start.
  - Cue END   = voice → silence falling edge closest to the claimed end.

Thresholds are derived dynamically from the search window's peak distribution
so the detector auto-scales across quiet dialogue and shouted scenes.

Fallback — **nearest local minimum** (for intra-phrase cue splits where the
voice is continuous across the bound, so no rising/falling edge exists):

  - Smooth the peaks (box filter), enumerate local minima in the window,
    rank by depth + proximity to the claimed bound, return the best one.

The fallback is what v1 used exclusively when the waveform was mixed audio and
music made edge detection unreliable. Now that we read voice-only stems, edges
are usable and preferred.

Waveform structure: { "sampleRate": 100, "peaks": [0..1, ...] } at 10 ms steps.
    Ni  i  x   i  g333333?(   g      ?   c           	         t          dt          t          |dz  |z                                }|dz  }t          |           }dg|z  }t	          | dt          ||dz                                }t          ||dz             }t          |          D ]C}	||z  ||	<   |	|z   dz   }
|	|z
  }|
|k     r|| |
         z  }|dz  }|dk    r|| |         z  }|dz  }D|S )N                r   )maxintroundlensumminrange)peakssrsmoothing_msswhalfNout
window_sumcountiadd_irem_is               [/Users/a11/Documents/CLAUDE_ENVIRONMENTS/ATRI-VOX-SUITE/vo-booth/scripts/autocorrect_cue.py	_smoothedr   6   s    	QE,-23344	5	5B7DE

A%!)CU1SD1H---.//J4!8E1XX 3 3e#AD1D199%,&J
A::%,&J
J    c           	      ^   g }d }t          t          |dz   d          t          |t          |           dz
                      D ]h}| |         | |dz
           k     rQ| |         | |dz            k    r<|#||z
  |k     r| |         | |         k     r||d<   |}Q|                    |           |}i|S )Nr   )r   r   r   r   append)smoothi0i1min_spacingminimaprevks          r   _local_minimar+   H   s    FD3rAvq>>3r3v;;?#;#;<< ' '!9va!e}$$fQUm)C)CQX$<$<!9vd|++!"F2J1DMM!QdMr    c           	          t          t          ||z                      }t          t          ||z                      }	t          d||	z
            }
t          t	          |           ||	z             }||
dz   k    rd S t          | ||          t          |
|                   }|dk    rd S t          dt          t          |dz  |z                                }t          |
||          }||z  fd|D             }|sd S t          dt          t          |dz  |z                                }d }t          d          }|D ]1}t          ||z
            }||z  |z  }|         |z   }||k     r|}|}2|S )Nr      r   r   c                 ,    g | ]}|         k    |S  r/   ).0r*   	depth_capr$   s     r   
<listcomp>z%_find_nearest_min.<locals>.<listcomp>e   s'    BBB6!9	+A+A!+A+A+Ar    inf)	r   r   r   r   r   r   r+   floatabs)r   r   target_swindow_sr   min_spacing_msdepth_factordistance_bias_mstarget_iwinr%   r&   
window_maxr'   
candidatesbias_samplesbest
best_scorer*   distdist_penaltyscorer1   r$   s                         @@r   _find_nearest_minrE   U   s    5B''((H
eHrM""
#
#C	Q3		B	SZZC	(	(B	R!V||tub,//FVBrE]##JQtaU>D#82#=>>??@@Kvr2{;;J\)IBBBBBZBBBJ tq#e$4t$;b$@AABBCCLDuJ ) )1x<  |+z9q	L(:JqKr    c           	      *   | ||         }|sdS t          |          }|t          t          |          dz
  t          t          |          dz                               }t	          d|t
          z            }t	          d|t          z            }||fS )N)g{Gz?gQ?r   g?g{Gz?g{Gz?)sortedr   r   r   r   DEFAULT_VOICE_FACTORDEFAULT_SILENCE_FACTOR)r   r%   r&   windowsorted_wp90voice_threshsilence_threshs           r   _dynamic_thresholdsrO   x   s    2b5\F zf~~H
3s8}}q(#c(mmc.A*B*BCC
DCtS#7788L|.DDEEN''r    c           
      >   t          t          ||z                      }t          d|t          t          ||z                      z
            }t          t	          |           |t          t          ||z                      z             }	|	|z
  dk     rdS t          | ||	          \  }
}t          dt          t          |dz  |z                                }t          dt          t          |dz  |z                                }d}t          d          }t          ||z   |          }t          |	|z
  t	          |           |z
            }t          ||          D ]}d}t          |||z             D ]}| |         |
k    r|dz  }||dz  k     r5d}t          ||z
  |          D ]}| |         |k     r|dz  }||dz  k     rht          ||z
            }||k     r|}|}|dS |}|dk    r)| |dz
           |k    r|dz  }|dk    r| |dz
           |k    |S )u  Find silence → voice transition index closest to target_s within the
    asymmetric window [target - back_s, target + forward_s].

    Two-pass detection:
      1. Anchor scan — find a position where sustained voice follows sustained
         silence (robust against mid-phrase peaks / HTDemucs bleed).
      2. Onset snap — from the anchor, walk BACKWARD through every non-silent
         sample to find the true first audible sample of that voice burst.
         This removes the "lands late" bias: if a phrase ramps up gradually,
         the anchor sits where ≥70% of the next 120 ms is above voice_thresh,
         which is already ~60 ms into the speech. Walking back through the
         ramp snaps to the earliest non-silent sample without any directional
         preference.
    r      Nr   r   r3   gffffff?	r   r   r   r   r   rO   r4   r   r5   )r   r   r6   back_s	forward_svoice_min_mssilence_min_msr;   r%   r&   rM   rN   	voice_minsilence_minr@   	best_distlohir   voice_countjsilence_countrB   onsets                           r   _find_rising_edger`      sb     5B''((H	Q3uVb[11222	3	3B	SZZCi"n(=(=$>$>>	?	?B	Bw||t#6ub"#E#E L.As5!4r!9::;;<<IaU>D#82#=>>??@@KDeI	R+{	+	+B	R)^SZZ)3	4	4B2r]] ' 'q!i-(( 	! 	!AQx,&&q S((q;** 	# 	#AQx.((";,,,1x<  )IQd|t
 E
!))eai(>99
 !))eai(>99Lr    c           
         t          t          ||z                      }t          d|t          t          ||z                      z
            }t          t	          |           |t          t          ||z                      z             }	|	|z
  dk     rdS t          | ||	          \  }
}t          dt          t          |dz  |z                                }t          dt          t          |dz  |z                                }d}t          d          }t          ||          }t          |	|z
  t	          |           |z
            }t          ||          D ]}d}t          |||z             D ]}| |         |k     r|dz  }||dz  k     r5d}t          t          d||z
            |          D ]}| |         |k    r| |         }||
k     rvt          ||z
            }||k     r|}|}|dS t	          |           }|}||k     r#| |         |k    r|dz  }||k     r| |         |k    t          d||z
            }||k    r)| |dz
           |k    r|dz  }||k    r| |dz
           |k    |S )	u  Find where voice gives way to sustained silence, closest to target_s.

    Mirror of `_find_rising_edge` for the trailing boundary:

      1. Anchor scan — find a position where sustained silence follows recent
         voice activity. Voice endings TAPER (loud → quiet → silent over ~500
         ms), so there's no sharp edge. Anchor detects: (a) sustained silence
         of `silence_min_ms` starting at index i, and (b) voice activity
         somewhere in the preceding `voice_lookback_ms` window. Max-based
         lookback tolerates taper — the phrase can be quiet at i-1 as long as
         it was loud earlier in the lookback.

      2. Offset snap — from the anchor, refine so the returned index is
         exactly "one past the last audible sample". The 85%-silent rule in
         the anchor tolerates a few stray tail peaks inside [i, i+silence_min],
         and the anchor may land a few samples after the true tail because
         the silence needed to sustain. Snap:
           (a) walk FORWARD while peaks[i] > silence_thresh — captures any
               straggler tail peaks the 85% rule allowed past the anchor;
           (b) walk BACKWARD while peaks[i-1] ≤ silence_thresh — pulls back
               past any silent samples inside the anchor region so the result
               lands immediately after the last audible peak.
         The back-walk is bounded to the lookback window so it can't cross a
         real silence gap into a previous voice burst.
    r   rQ   Nr   r   r3   g333333?r
   rR   )r   r   r6   rS   rT   rV   voice_lookback_msr;   r%   r&   rM   rN   rX   voice_lookbackr@   rY   rZ   r[   r   r^   r]   prev_maxrB   r   offset
back_limits                             r   _find_voice_endrg      s   6 5B''((H	Q3uVb[11222	3	3B	SZZCi"n(=(=$>$>>	?	?B	Bw||t#6ub"#E#E L.aU>D#82#=>>??@@KC&7$&>&C D DEEFFNDeI	R	 	 B	R+s5zzK7	8	8B2r]] ' 'q!k/** 	# 	#AQx.((";--- s1a.011155 	$ 	$AQx("" 8l""1x<  )IQd|t 	E

AF
1**v77! 1**v77
 Q~-..J
:

%
"3~"E"E! :

%
"3~"E"EMr    r
   waveformoriginal_start_soriginal_end_sstart_back_msstart_forward_msend_back_msend_forward_msrU   rV   rb   r   r8   r9   r:   gate_thresholdreturnc                 D   t          |                     d          pd          }| d         }rdk    rfd|D             }|dz  }|dz  }|dz  }|dz  }||z   dz  }t          |||||||          }|dnd }|t          |||||
|||          }|d	nd
}t	          |||||||	          }|dnd }|t          |||||
|||          }|d	nd
}|||z  nd }|||z  nd }||n|}||n|}||k    rKt          ||z
            }t          ||z
            }||k    r|}|r| dnd
}n|}|r| dnd
}||k    r|}|}||||t          t          ||z
  dz                      t          t          ||z
  dz                      |d u|d u||d|||||||	|
|||dd
S )N
sampleRated   r   r   c                 $    g | ]}|k    r|nd S )r
   r/   )r0   pro   s     r   r2   z#autocorrect_cue.<locals>.<listcomp>(  s'    BBBqa>))sBBBr    g     @@g       @rising_edge	local_minnone	voice_end	_revertedr   )startendrk   rl   rm   rn   rU   rV   rb   r   r8   r9   r:   ro   )
original_startoriginal_endcorrected_startcorrected_enddelta_start_msdelta_end_msonset_foundoffset_foundmethodparams)r   getr`   rE   rg   r5   r   ) rh   ri   rj   rk   rl   rm   rn   rU   rV   rb   r   r8   r9   r:   ro   r   r   start_back_sstart_forward_s
end_back_send_forward_sfallback_window_s	start_idxstart_methodend_idx
end_methodr   r   final_start	final_endstart_delta	end_deltas                  `                 r   autocorrect_cuer     s    
X\\,''.3	/	/BWE
  C.1,,BBBBEBBB#/L&/O!/J$/M%73> "%-=".".@ @I %.$9==tL%eR1ACT&2N&24DF F	 '0&;{{eR(-,.?A AG !( 3J#E2~?P$0.$02BD D %,$7[[V
*3*?y2~~TO&-&9Wr\\tM%4%@//FVK%2%@--nI K+(8899)677	""*K9EQl55556LL&I5?KJ1111VJ##*K&I +(&$e[3C%Ct$KLLMMeY>%At$KLLMM)5'5(<<* 0&,(,!2(,( 0,
 
  r    c                     t          j        t                                          d                   } |                     d           |                     dt
          d           |                     dt
          d           |                     dt          t          	           |                     d
t          t          	           |                     dt          t          	           |                     dt          t          	           |                     dt          t          	           |                     dt          t          	           |                     dt          t          	           |                     dt          t          	           |                     dt          t          	           |                     dt
          t           	           |                     dt          t"          	           |                     dt
          dd           |                                 }t'          |j                  5 }t+          j        |          }d d d            n# 1 swxY w Y   t/          ||j        |j        |j        |j        |j        |j        |j        |j        |j         |j!        |j"        |j#        |j$        |j%                  }tM          t+          j'        |d                     d S )Nr   )descriptionrh   z--startT)typerequiredz--endz--start-back-ms)r   defaultz--start-forward-msz--end-back-msz--end-forward-msz--voice-min-msz--silence-min-msz--voice-lookback-msz--smoothing-msz--min-spacing-msz--depth-factorz--distance-bias-msz--gate-thresholdr
   z4pre-zero peaks below this value (0..1); 0 = disabled)r   r   helpr}   r	   )indent)(argparseArgumentParser__doc__
splitlinesadd_argumentr4   r   DEFAULT_START_BACK_MSDEFAULT_START_FORWARD_MSDEFAULT_END_BACK_MSDEFAULT_END_FORWARD_MSDEFAULT_VOICE_MIN_MSDEFAULT_SILENCE_MIN_MSDEFAULT_VOICE_LOOKBACK_MSDEFAULT_SMOOTHING_MSDEFAULT_MIN_SPACING_MSDEFAULT_DEPTH_FACTORDEFAULT_DISTANCE_BIAS_MS
parse_argsopenrh   jsonloadr   r{   r|   rk   rl   rm   rn   rU   rV   rb   r   r8   r9   r:   ro   printdumps)apargsfwfresults        r   _clir   z  s   		 W-?-?-A-A!-D	E	E	EBOOJOOIEDO999OOGEDO999OO%s>SOTTTOO(s>VOWWWOOOs>QORRROO&s>TOUUUOO$s>ROSSSOO&s>TOUUUOO)s>WOXXXOO$s>ROSSSOO&s>TOUUUOO$u>ROSSSOO(s>VOWWWOO&ucO  Q Q Q==??D	dm		 Yq\\              
DJ(.$*&*0&*&.*  F 
$*VA
&
&
&'''''s   >II#&I#__main__)r   r   r   r   r   r   r   r   r   r   rH   rI   r   r   r   r   r   r+   rE   rO   r`   rg   dictr4   r   r   r   __name__r/   r    r   <module>r      s   .                     $
 
 
  F( ( (: : :zM M Mj *?,D':*@(<*@-F(<*@*>,D,/c cd ce cU c#&c&)c "%c %(	c
 #&c %(c (+c #&c %(c #(c '*c %*c 59c c c cL$( $( $(N zDFFFFF r    