I have four trained models. 2 VotingClassifier models and 2 StackingClassifier models. They are already trained and saved in joblib format. I can even load them and use them for prediction (in prod, using their predict methods). But when I try to stack all of them to create a new StackingClassifier I got not fitted error.
Code:
class JoblibModelWrapper(BaseEstimator, ClassifierMixin): def __init__(self, model): self.model = model def fit(self, X, y=None): # dont fit because self.model is already fitted return self def predict(self, X): return self.model.predict(X) def predict_proba(self, X): if hasattr(self.model, "predict_proba"): return self.model.predict_proba(X) else: raise RuntimeError("Le modèle sous-jacent ne supporte pas predict_proba")import osimport joblibimport pandas as pdmodels_directory = '/content/drive/MyDrive/X/preprod_models'joblib_models = {}files = os.listdir(models_directory)total_models_count = sum(1 for filename in files if filename.endswith('.joblib'))current_model_index = 0for filename in files: model_path = os.path.join(models_directory, filename) model_name = filename[:-7] # remove .joblib # load trained models already saved in joblib format (they are used in prod today, and works perfectly when we call .predict method) model = joblib.load(model_path) joblib_models[model_name] = model current_model_index += 1 print(f'Modèle {current_model_index} / {total_models_count} processed: {model_name}')print('Total processed: {total_models_count}')wrapped_joblib_models = [ (name.replace('', '_').replace('__', '_'), JoblibModelWrapper(model)) for name, model in joblib_models.items()]cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)stacking_models = StackingClassifier( estimators=wrapped_joblib_models , final_estimator=LogisticRegression(solver='saga', max_iter=10000), cv=cv, verbose=3, passthrough=True, stack_method='predict', # n_jobs=-1)def evaluate_model(model_name, X_train, y_train, X_test, y_test, X_train_no_encoded, model = None): print(f'======================================{model_name}=============================') print(mapping) threshold = 0.0005 model_path = f'/content/drive/MyDrive/X/prod_models/{model_name}.joblib' model.fit(X_train, y_train) dump(model, f'/content/drive/MyDrive/X/prod_models/{model_name}.joblib') print('modèle sauvegardé') plot_confusion_matrix_normalized(model, X_test, y_test) plot_confusion_matrix_normalized(model, X_train, y_train) # Courbe ROC n_classes = len(np.unique(y_train)) # Nombre de classes uniques dans y_train if hasattr(model, "predict_proba"): plot_multiclass_roc_curve(model, X_test, y_test, n_classes) plot_multiclass_roc_curve(model, X_train, y_train, n_classes)evaluate_model('PROD_ALL_TRAINED_MODELS_STACKED', X_train_balanced, y_train_balanced, X_test_scaled, y_test, X_train, model=stacking_models)I got this error
---------------------------------------------------------------------------======================================PROD_ALL_TRAINED_MODELS_STACKED============================={'X': 0, 'Y': 1, 'Z': 2}modèle sauvegardé---------------------------------------------------------------------------NotFittedError Traceback (most recent call last)<ipython-input-19-89e6497ba9b5> in <cell line: 30>() 28 ) 29 ---> 30 evaluate_model('PROD_ALL_TRAINED_MODELS_STACKED', X_train_balanced, y_train_balanced, X_test_scaled, y_test, X_train, model=stacking_models) 31 32 # all_models[0]8 frames/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any) 1620 1621 if not _is_fitted(estimator, attributes, all_or_any):-> 1622 raise NotFittedError(msg % {"name": type(estimator).__name__}) 1623 1624 NotFittedError: This StackingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.