This is my code:
def mask(mask_cols):""" This function masks specific columns of the pupil dataframe.""" masked_df = pd.read_csv(con.CLEANED_PUPIL_GAZE) masked_df = mask_pupil_first_derivative(masked_df, threshold=3.0, mask_cols=['diameter', 'diameter_3d']) return masked_dfdef smooth(columns_to_smooth):""" This function smooths two columns of the interpolated pupil dataframe. :param columns_to_smooth: diameter and diamete 3d columns.""" original_df = mask(mask_cols=['diameter', 'diameter_3d']) smooth_df = original_df logging.basicConfig(filename='filtering_errors.log', level=logging.ERROR) print(smooth_df) print("Data will be now be smoothed with a low-pass BW filter.") nyquist = 0.5 * con.sample_rate_ET cutoff_high = 4.0 # Example: Upper bound for smoothing cutoff_low = 0.01 # Example: Lower bound for smoothing # Calculate normalized cutoff frequencies normalized_cutoff_low = cutoff_low / nyquist normalized_cutoff_high = cutoff_high / nyquist for column in columns_to_smooth: try: # Check for NaNs before filtering if smooth_df[column].isnull().any(): raise ValueError(f"NaN values detected in column {column} before filtering") # Apply Butterworth filter to the selected column b, a = butter(N=3, Wn=[normalized_cutoff_low, normalized_cutoff_high], btype='bandpass', fs=con.sample_rate_ET) smooth_values = filtfilt(b, a, smooth_df[column]) # Check for NaNs after filtering if np.isnan(smooth_values).any(): raise ValueError(f"NaN values detected after filtering column {column}") # Update the column in smooth_df with the filtered values smooth_df[column] = smooth_values print(smooth_values) except ValueError as e: print(f"Error: {e}") # Handle the error, e.g., log the error, skip this column, or set NaNs as appropriate # Save the smoothed data to a new CSV file smooth_df.to_csv(con.SMOOTHED_PUPIL, index=False) # Plot the frequency response of the filter for the current column w, h = freqs(b, a) plt.semilogx(w, 20 * np.log10(abs(h))) plt.title(f'Butterworth filter frequency response for column {column}') plt.xlabel('Frequency [radians / second]') plt.ylabel('Amplitude [dB]') plt.margins(0, 0.1) plt.grid(which='both', axis='both') plt.axvline(100, color='green') # cutoff frequency plt.show() # Assuming 'original_df' contains the original data before smoothing and 'smooth_df' contains the smoothed data # Plot original and smoothed data for comparison plt.plot(original_df['pupil_timestamp'], original_df['diameter'], label='Original') plt.plot(smooth_df['pupil_timestamp'], smooth_df['diameter'], label='Smoothed') plt.xlabel('Timestamp') plt.ylabel('Diameter') plt.title('Comparison of Original and Smoothed Data') plt.legend() plt.show() # Calculate summary statistics original_mean = original_df['diameter'].mean() smoothed_mean = smooth_df['diameter'].mean() original_std = original_df['diameter'].std() smoothed_std = smooth_df['diameter'].std() # Print summary statistics print(f"Original Mean: {original_mean}, Smoothed Mean: {smoothed_mean}") print(f"Original Standard Deviation: {original_std}, Smoothed Standard Deviation: {smoothed_std}")
I am working with eye-tracking data with an average sampling rate of 120 Hz (but it could be different for each dataset). The original and smoothed dataset have the same mean and standard deviation and the graphs are overlaid.
There is no smoothing going on. What should I do?