@@ -451,6 +451,9 @@ def generate_strength(self):
451451 )
452452
453453 self .fade_out_window : torch .Tensor = 1 - self .fade_in_window
454+ self .sola_denominator_kernel = torch .ones (
455+ 1 , 1 , self .crossfade_frame , device = self .device , dtype = torch .float32
456+ )
454457 # The size will change from the previous result, so the record will be deleted.
455458 self .sola_buffer = torch .zeros (
456459 self .crossfade_frame , device = self .device , dtype = torch .float32
@@ -513,10 +516,7 @@ def process_audio(
513516 ].float ()
514517 cor_nom = F .conv1d (conv_input , self .sola_buffer [None , None , :])
515518 cor_den = torch .sqrt (
516- F .conv1d (
517- conv_input ** 2 ,
518- torch .ones (1 , 1 , self .crossfade_frame , device = self .device ),
519- )
519+ F .conv1d (conv_input ** 2 , self .sola_denominator_kernel )
520520 + 1e-8
521521 )
522522 sola_offset = torch .argmax (cor_nom [0 , 0 ] / cor_den [0 , 0 ])
@@ -543,12 +543,9 @@ def process_audio(
543543 # Apply sin² fade-in over crossfade_frame duration from onset.
544544 fade_len = min (block_size - onset_sample , self .crossfade_frame )
545545 if fade_len > 0 :
546- t = torch .linspace (
547- 0.0 , 1.0 , steps = fade_len , device = self .device , dtype = torch .float32
548- )
549- audio [onset_sample : onset_sample + fade_len ] *= (
550- torch .sin (0.5 * np .pi * t ) ** 2
551- )
546+ audio [onset_sample : onset_sample + fade_len ] *= self .fade_in_window [
547+ :fade_len
548+ ]
552549 else :
553550 audio [: self .crossfade_frame ] *= self .fade_in_window
554551 audio [: self .crossfade_frame ] += self .sola_buffer * self .fade_out_window
0 commit comments