I am working on my first Full Machine Learning, I am now trying to process raw data, to transform it to supervised learning.In one of the steps, in order to resamble the data : I have used the resamble, then the apply method and passed The sampling dictionary specifies aggregation functions for different columns.as the following :
import pandas as pd from glob import glob# --------------------------------------------------------------# Turn into function# --------------------------------------------------------------files = glob("../../data/raw/MetaMotion/MetaMotion/*.csv")data_path = "../../data/raw/MetaMotion/MetaMotion"def read_data_from_files(files): acc_df = pd.DataFrame() gyr_df = pd.DataFrame() acc_set = 1 gyr_set = 1 for f in files: participant = (f.split("-")[0].replace(data_path, ""))[-1] label = f.split("-")[1] category = f.split("-")[2].rstrip("123").rstrip("_MetaWear_2019") df = pd.read_csv(f) df["participant"] = participant df["label"] = label df["category"] = category if "Accelerometer" in f: df["set"] = acc_set acc_set =+ 1 acc_df = pd.concat([acc_df, df]) if "Gyroscope" in f: df["set"] = gyr_set gyr_set =+ 1 gyr_df = pd.concat([gyr_df, df]) acc_df.index = pd.to_datetime(acc_df["epoch (ms)"], unit="ms") gyr_df.index = pd.to_datetime(gyr_df["epoch (ms)"], unit="ms") del acc_df["epoch (ms)"] del acc_df["time (01:00)"] del acc_df["elapsed (s)"] del gyr_df["epoch (ms)"] del gyr_df["time (01:00)"] del gyr_df["elapsed (s)"] return acc_df, gyr_dfacc_df, gyr_df = read_data_from_files(files)# --------------------------------------------------------------# Merging datasets# --------------------------------------------------------------data_merged = pd.concat([acc_df.iloc[:,:3], gyr_df], axis=1)data_merged.dropna()data_merged.info()#rename columnsdata_merged.columns = {"acc_x","acc_y","acc_z","gyr_x","gyr_y","gyr_z","label","category","participant","set",}# --------------------------------------------------------------# Resample data (frequency conversion)# --------------------------------------------------------------# Accelerometer: 12.500HZ# Gyroscope: 25.000Hzsampling = {"acc_x": "mean","acc_y": "mean","acc_z": "mean","gyr_x": "mean","gyr_y": "mean","gyr_z": "mean","label": "last","category": "last","participant": "last","set": "last",}(data_merged[:1000].resample(rule="200ms")).apply(sampling)resampled_data = data_merged[:1000].resample(rule="200ms").agg(sampling, numeric_only=False)