Quantcast
Viewing all articles
Browse latest Browse all 14069

LSTM model, using SHAP to explain a prediction

I am using LSTM model to detect anomalies in the network, namely DDOS attacks.I have trained the model on a dataset. The model is working and I am getting correct answers from it. But I can't get the SHAP explanation out.

Model training:

#Change features with object or string value into numeric numbersord_feat = ['protocol_type', 'service', 'flag']#Nom_feat = column value with 0 or 1nom_feat = ['land', 'logged_in', 'is_host_login', 'is_guest_login']num_feat = ['src_bytes','dst_bytes','wrong_fragment','urgent','hot','num_failed_logins','num_compromised','root_shell','su_attempted','num_root','num_file_creations','num_shells','num_access_files','num_outbound_cmds','count','srv_count','serror_rate','srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate','diff_srv_rate','srv_diff_host_rate','dst_host_count','dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate','dst_host_same_src_port_rate','dst_host_srv_diff_host_rate','dst_host_serror_rate','dst_host_srv_serror_rate','dst_host_rerror_rate','dst_host_srv_rerror_rate']X_train, y_train = df.drop(columns=['class'], axis=1, inplace=False), df['class'].valuesohe = OneHotEncoder(sparse=False)oe = OrdinalEncoder()ohe.fit(X_train[nom_feat].values)oe.fit(X_train[ord_feat].values)scalar = StandardScaler()scalar.fit(X_train[num_feat].values)X_train_nom = ohe.transform(X_train[nom_feat].values)X_train_ord = oe.transform(X_train[ord_feat].values)X_train_num = scalar.transform(X_train[num_feat].values)X_train = np.concatenate([X_train_ord, X_train_num, X_train_nom], axis=1)#SVM Approach-------------------------------------------------------------------from sklearn import svmclassifier = svm.SVC(kernel = "linear")classifier.fit(X_train, y_train)y_predict = classifier.predict(X_test)from sklearn import metricsprint("SVM ACCURACY : ",metrics.accuracy_score(y_test, y_predict))#SVM Approach Done--------------------------------------------------------------#reshape Train dataset into 3d arrayX_train = X_train.reshape((X_train.shape[0],1,X_train.shape[1]))y_train = y_train.reshape((y_train.shape[0],1,1))#Scale the num, ord, nom datasetsX_test, y_test = df_val.drop(columns=['class'], axis = 1, inplace=False), df_val['class'].valuesohe.fit(X_test[nom_feat].values)oe.fit(X_test[ord_feat].values)#reshape Test dataset into 3d arrayscalar.fit(X_test[num_feat].values)X_test_nom = ohe.transform(X_test[nom_feat].values)X_test_ord = oe.transform(X_test[ord_feat].values)X_test_num = scalar.transform(X_test[num_feat].values)X_test = np.concatenate([X_test_ord, X_test_num, X_test_nom], axis=1)X_test.shapeX_test = X_test.reshape((X_test.shape[0],1,X_test.shape[1]))y_test = y_test.reshape((y_test.shape[0],1,1))model = Sequential()#50 time steps, and 2 features#LSTM INOUT (Batch size, Time steps, units)model.add(LSTM(units = 44, input_shape=(1,44), return_sequences=True))model.add(Dense(1))model.add(Dense(1))model.compile(loss="mean_absolute_error", optimizer = 'adam', metrics = ["accuracy"])# model.summary()history = model.fit(X_train, y_train, epochs=30, validation_data = (X_test, y_test))# validation_data=(X_test, y_test)

Next, in the main application where I use the model, I need to derive an explanation of why the package is harmful

Init shap.Explainer

   def initEncodersAndScaler(self):    df = pd.read_csv("attack_test.csv")    # Подготовкаданных    X, Y = df.drop(columns=['class'], axis = 1, inplace=False), df['class'].values# Обучениеэнкодеровимасштабировщикаself.ohe.fit(X[nom_feat].values)self.oe.fit(X[ord_feat].values)#reshape Test dataset into 3d arrayself.scalar.fit(X[num_feat].values)X_test_nom = self.ohe.transform(X[nom_feat].values)X_test_ord = self.oe.transform(X[ord_feat].values)X_test_num = self.scalar.transform(X[num_feat].values)X = np.concatenate([X_test_ord, X_test_num, X_test_nom], axis=1)    # Создаеммаскерсправильнымразмеромprint(X.shape)self.masker = shap.maskers.Independent(data = X)  self.explainer = shap.Explainer(self.model, self.masker)  # Создаемобъектобъяснителя

And that's where I use Explainer:

def run(self):    for index, row in self.data.iterrows():        if self.stop_flag:            break  # Еслиустановленфлаг, прекращаемвыполнение        # Создаемобъектспризнакамипакета        packet_features = NetworkPacketFeatures(row.to_dict())        # Подготовкаданныхдляпредсказания        features = packet_features.to_array()        print(features.shape)        explanation = packet_features.explainer(features)        shap_values = explanation.values  # Получаемзначения SHAP        print(f"SHAP Values дляпакета: {shap_values}")  # Выводимзначения SHAP        features = features.reshape((features.shape[0], 1, features.shape[1]))  # Подготовкаданных        prediction = packet_features.model.predict(features)  # Предсказание        scale_pred = prediction[0][0][0]        self.update_signal.emit(packet_features, scale_pred)  # Отправкасигнала

features contains only 1 package from the whole list!Also the model accepts features after

features = features.reshape((features.shape[0], 1, features.shape[1]))  # Data preparation

But I tried doing reshape before sending to explainer and it didn't help. I had errors with arrays not being sized correctly, I had errors with explainer being considered a numpy array for some reason.Now with the code I have, the error is like this

ValueError: Input 0 of layer "sequential_5" is incompatible with the layer: expected shape=(None, 1, 44), found shape=(3324, 44)Aborted (core dumped)

Viewing all articles
Browse latest Browse all 14069

Trending Articles