def time_lag(data, lags):"""Transforms the dataset to a time series of grid information and spits back the time lagged time seriesdata - the full name of the csv file"""time_orig = pd.to_datetime('1900-01-01')
df = pd.read_csv(data)df.columns = ['time', 'wind_u10', 'wind_v10', 'slp', 'weight', 'surge'] # reorganize the matrixdf_new = df.loc[df['weight'] == df['weight'].unique()[0]]df_new.drop(['weight'], axis = 1, inplace=True) #, 'surge'for i in range(1,10): df_sub = df.loc[df['weight'] == df['weight'].unique()[i]] df_sub.drop(['weight', 'surge'], axis = 1, inplace=True) df_new = pd.merge(df_new, df_sub, on='time')# lag the time series datalagged_df = df_new.copy() # to prevent modifying original matrixfor j in range(lags): #lagged.drop(j, axis = 0, inplace = True) lagged_df['time'] = lagged_df['time']+4 # remove the last row since there is no match for it in df_new lagged_df.drop(lagged_df.tail(1).index.item(), axis = 0, inplace = True) # remove the topmost row from df_new to match lagged df_new.drop(df_new.head(1).index.item(), axis = 0, inplace = True) # merge lagged data with df_new df_new = pd.merge(df_new, lagged_df, on = 'time', how = 'outer', \ suffixes = ('_left', '_right'))df_new = df_new.T.reset_index(drop=True).Tind = df_new.loc[pd.isna(df_new[df_new.shape[1]-1]), :].indexdf_new.drop(ind, inplace=True)# storm surge time series datasurge_ts = pd.DataFrame(df.loc[df['weight'] == \ df['weight'].unique()[0]][['time', 'surge']])# remove missing/NaN valuessurge_ts.reset_index(inplace=True) # reset index for subsetting isnanssurge_ts.drop(['index'], axis = 1, inplace=True) indx = surge_ts.loc[pd.isna(surge_ts["surge"]), :].indexdf_new.drop(indx, inplace=True)surge_ts.drop(indx, inplace=True)# filter surge according to df_newlagged_time = list(df_new[0])time_df_new = [float(x) for x in df_new[0]]time_surge_ts = [float(x) for x in surge_ts['time']]time_both = []for k in lagged_time: if ((k in time_df_new) & (k in time_surge_ts)): time_both.append(int(k))surge_ts = surge_ts[surge_ts['time'].isin(time_both)]dt = pd.DataFrame(columns = ['date']);for i in surge_ts.index: dt.loc[i, 'date'] = time_orig + \ datetime.timedelta(hours = int(surge_ts.loc[i, 'time']))surge_ts['date'] = dtdf_new = df_new[df_new[0].isin([x*1.0 for x in time_both])]df_new.drop(4, axis = 1, inplace = True) # remove the un-lagged surge datareturn df_new, surge_tsdata = 'stormdata.csv'x, surge = time_lag(data,3)