Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 23131

Trying to use the multiprocessing library in Python but I am running into issues where it freezes but throws no error

$
0
0

So I've written this code after consulting chatgpt and it works for the most part:

import numpy as npfrom sklearn.datasets import make_classificationfrom sklearn.model_selection import train_test_splitfrom sklearn.ensemble import RandomForestClassifierfrom sklearn.metrics import accuracy_scorefrom multiprocessing import Pool, cpu_countdef evaluate_subset(model,scoring, X_in, y_in, subset = None):    #model = RandomForestClassifier(n_estimators=100, random_state=42)    list_scores = []    for train_index, test_index in skf.split(X_in, y_in):        X_train, y_train = X_in.values[train_index], y_in.values[train_index]        X_test,  y_test  = X_in.values[test_index],  y_in.values[test_index]        model.fit(X_train[:, subset], y_train)        y_pred = model.predict(X_test[:, subset])        list_scores.append(scoring(y_test, y_pred))    return np.mean(list_scores)def stepwise_add_selection(model, scoring, X_in, y_in, n_processes=None):    if n_processes is None:        n_processes = cpu_count()    pool = Pool(processes=n_processes)    remaining_features = set(range(X_in.shape[1]))    selected_features = []    best_accuracy = 0    while remaining_features:        results = []        for feature in remaining_features:            subset = selected_features + [feature]            results.append(pool.apply_async(evaluate_subset, args=(model, scoring, X_in, y_in, subset)))        accuracies = [res.get() for res in results]        best_index = np.argmax(accuracies)        print("Current Best")        print(max(accuracies))        print("Previous Best")        print(best_accuracy)        print(selected_features)        if best_accuracy < max(accuracies):            selected_features.append(list(remaining_features)[(best_index)])            best_accuracy = accuracies[best_index]        else:            break    pool.close()    pool.join()    return selected_features, best_accuracy

However, I am trying to create another greedy search which removes features:

def stepwise_feature_removal(model, scoring, X_in, y_in, n_processes=None):    remaining_features = set(range(X_train.shape[1]))    selected_features = list(remaining_features)    best_accuracy = evaluate_subset(model, scoring, X_in, y_in, selected_features)    print("Initial accuracy score:", best_accuracy)    while remaining_features:        results = []        worst_feature = None        pool = Pool(processes=n_processes)        for feature in remaining_features:            temp_features = selected_features[:]            temp_features.remove(feature)            results.append(pool.apply_async(evaluate_subset, args=(model, scoring, X_in, y_in, temp_features)))        pool.close()        pool.join()        accuracies = [res.get() for res in results]        best_index = np.argmax(accuracies)        if accuracies[best_index] > best_accuracy:            best_accuracy = accuracies[best_index]            worst_feature = temp_features[best_index]        print("Current Best")        print(accuracy)        print("Previous Best")        print(best_accuracy)        print("Feature removed:")        print(worst_feature)        if worst_feature is not None:            selected_features.remove(worst_feature)            remaining_features.remove(worst_feature)        else:            break    return selected_features, best_accuracy

In the feature removal approach the issue I am running into is that the program just stops running. It does not give an indication that there is an error or anything. I added both pool.close() and pool.join() but its not fixing the issue.

Thanks ahead of time.

I am trying to write a greedy feature reduction function that works similar to the greedy feature addition function. Not sure why it is freezing so that would be helpful as well.

Edit: I should've clarified the issue occurs when I run this code with imblearn packages. Without imblearn the multiprocessing can occur and the program runs.

def use_pipeline(clf, resample = False):    if resample == False:        pipe = make_pipeline(MinMaxScaler(), clf)    else:        pipe = make_pipeline(resample, MinMaxScaler(), clf)    return pipesm  = SMOTE (random_state=38)pipe_clf = use_pipeline(clf1, sm)stepwise_feature_removal(pipe_clf, matthews_corrcoef, X_train, y_train, 15)

Viewing all articles
Browse latest Browse all 23131

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>