I've trained a NER model and save it as .keras file, when I try to load the model, it pops up this error
TypeError: <class 'modeling.NERModel'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.config={'module': 'modeling', 'class_name': 'NERModel', 'config': {'trainable': True, 'dtype': 'float32'}, 'registered_name': 'NERModel', 'build_config': {'input_shape': [None, None]}, 'compile_config': {'optimizer': 'adam', 'loss': {'module': 'modeling', 'class_name': 'CustomNonPaddingTokenLoss', 'config': {'name': 'custom_ner_loss', 'reduction': 'sum'}, 'registered_name': 'CustomNonPaddingTokenLoss'}, 'loss_weights': None, 'metrics': None, 'weighted_metrics': None, 'run_eagerly': False, 'steps_per_execution': 1, 'jit_compile': False}}.Exception encountered: Unable to revive model from config. When overriding the `get_config()` method, make sure that the returned config contains all items used as arguments in the constructor to <class 'modeling.NERModel'>, which is the default behavior. You can override this default behavior by defining a `from_config(cls, config)` class method to specify how to create an instance of NERModel from its config.Received config={'trainable': True, 'dtype': 'float32'}Error encountered during deserialization: NERModel.__init__() got an unexpected keyword argument 'trainable'Below is my NER model code and I face no problem when training.
import osfrom tensorflow import kerasimport tensorflow as tffrom keras import layersclass TransformerBlock(layers.Layer): def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): super().__init__() self.att = keras.layers.MultiHeadAttention( num_heads=num_heads, key_dim=embed_dim ) self.ffn = keras.Sequential( [ keras.layers.Dense(ff_dim, activation="relu"), keras.layers.Dense(embed_dim), ] ) self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6) self.dropout1 = keras.layers.Dropout(rate) self.dropout2 = keras.layers.Dropout(rate) def call(self, inputs, training=False): attn_output = self.att(inputs, inputs) attn_output = self.dropout1(attn_output, training=training) out1 = self.layernorm1(inputs + attn_output) ffn_output = self.ffn(out1) ffn_output = self.dropout2(ffn_output, training=training) return self.layernorm2(out1 + ffn_output)class TokenAndPositionEmbedding(layers.Layer): def __init__(self, maxlen, vocab_size, embed_dim): super().__init__() self.token_emb = keras.layers.Embedding( input_dim=vocab_size, output_dim=embed_dim ) self.pos_emb = keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim) def call(self, inputs): maxlen = tf.shape(inputs)[-1] positions = tf.range(start=0, limit=maxlen, delta=1) position_embeddings = self.pos_emb(positions) token_embeddings = self.token_emb(inputs) return token_embeddings + position_embeddingsclass NERModel(keras.Model): def __init__( self, num_tags, vocab_size, maxlen=1000, embed_dim=32, num_heads=2, ff_dim=32 ): super().__init__() self.embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim) self.transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim) self.dropout1 = layers.Dropout(0.1) self.ff = layers.Dense(ff_dim, activation="relu") self.dropout2 = layers.Dropout(0.1) self.ff_final = layers.Dense(num_tags, activation="softmax") def call(self, inputs, training=False): x = self.embedding_layer(inputs) x = self.transformer_block(x) x = self.dropout1(x, training=training) x = self.ff(x) x = self.dropout2(x, training=training) x = self.ff_final(x) return xclass CustomNonPaddingTokenLoss(keras.losses.Loss): def __init__(self, reduction='sum', name="custom_ner_loss"): super().__init__(reduction=reduction, name=name) def call(self, y_true, y_pred): loss_fn = keras.losses.SparseCategoricalCrossentropy( from_logits=False, reduction=self.reduction # Pass the reduction argument here ) loss = loss_fn(y_true, y_pred) mask = tf.cast((y_true > 0), dtype=tf.float32) loss = loss * mask return tf.reduce_sum(loss) / tf.reduce_sum(mask)def save_model(model, filepath): if os.path.exists(filepath): filepath = filepath[:-6] +"1" + filepath[-6:] model.save(filepath)When I try to run predict and load the trained model, the error above shows up. I tried to change the save type (h5) but there are a different error, therefore I change back to .keras file and try to solve this first.
import reimport pickleimport kerasimport tensorflow as tfimport numpy as npfrom data_preprocess import map_record_to_training_datafrom modeling import CustomNonPaddingTokenLossdef lookup(tokens): # Load the list from the file with open('./resources/vocabulary.pkl', 'rb') as f: loaded_list = pickle.load(f) # The StringLookup class will convert tokens to token IDs lookup_layer = keras.layers.StringLookup(vocabulary=loaded_list) return lookup_layer(tokens)def format_datatype(data): tokens = [re.sub(r'[;,]', '', d) for d in data.split('')] #default is 0, since is for prediction ner_tags = [0 for d in data.split('')] #tab to separate string_input = str(len(tokens))+"\t"+"\t".join(tokens)+"\t"+"\t".join(map(str, ner_tags)) string_input = tf.data.Dataset.from_tensor_slices([string_input]) finalize_input = (string_input.map(map_record_to_training_data) .map(lambda x, y: (lookup(x), y)) .padded_batch(1) ) return finalize_inputdef prediction(data): # Load model # Load the model loaded_model = tf.keras.models.load_model("./resources/trained_model/ner_model.keras", custom_objects={'CustomNonPaddingTokenLoss': CustomNonPaddingTokenLoss}) print(loaded_model.summary()) all_predicted_tag_ids = [] for x, _ in data: print("Input Tensor Info:") print("Data Type:", x.dtype) print("Shape:", x.shape) output = loaded_model(x, training=False) predictions = np.argmax(output, axis=-1) predictions = np.reshape(predictions, [-1]) all_predicted_tag_ids.append(predictions) all_predicted_tag_ids = np.concatenate(all_predicted_tag_ids) ner_labels = ["[PAD]", "N", "M", "other"] mapping = dict(zip(range(len(ner_labels)), ner_labels)) predicted_tags = [mapping[tag] for tag in all_predicted_tag_ids] return predicted_tagssample_input = "Hi, my name is David"print(result)print(sample_input.split(''))print(len(result))