I have 2 audio files recorded using laptop mic and one with external mic. Laptop mic recording starts after external mic. The time difference could be 2-60 seconds.
So I wrote this code which is pretty accurate, but when I bring both audio tracks (one from the video that uses laptop mic) and the newly adjusted audio, there is still like 50ms delay. Why could this be?
Sometimes it seems there is positive delay, and sometimes negative.
from scipy.signal import correlatefrom scipy.signal import fftconvolvefrom scipy import signalfrom pydub import AudioSegmentfrom pydub.utils import mediainfoimport numpy as npdef findOffset(audio1, audio2): correlation = signal.correlate(audio2, audio1, mode="full") lags = signal.correlation_lags(audio2.size, audio1.size, mode="full") lag = lags[np.argmax(correlation)] return lagdef adjustAudio(audio_segment, lag, frame_rate): # Convert lag from samples to milliseconds, rounding to nearest integer at the last step ms_lag = round((lag / frame_rate) * 1000) if lag > 0: # Audio needs to start later: pad audio at the beginning silence = AudioSegment.silent(duration=ms_lag, frame_rate=frame_rate) adjusted_audio = silence + audio_segment else: # Audio needs to start earlier: trim audio from the beginning adjusted_audio = audio_segment[abs(ms_lag):] # Use abs to convert negative lag to positive return adjusted_audiodef alignAudioTrack(audioFile, newAudioFile, lag): audio_data, rate, audio_segment = loadAudio(audioFile, return_segment=True) # Adjust the AudioSegment based on lag, ensuring frame_rate is passed correctly adjusted_audio = adjustAudio(audio_segment, lag, rate) # Fetch original bitrate bitrate = mediainfo(audioFile)['bit_rate'] # Save the adjusted audio preserving the original bitrate adjusted_audio.export(newAudioFile, format="mp3", bitrate=bitrate)audio1, rate1 = loadAudio(audioFile1)audio2, rate2 = loadAudio(audioFile2)lag = findOffset(audio1, audio2)alignedAudioFile = os.path.join(newAudioDir, f"{baseName}_aligned.mp3")alignAudioTrack(origAudioFile, alignedAudioFile, lag)