I'm trying to implement the paper:https://arxiv.org/pdf/2206.01161.pdfto see how it affects models.
I implemented gradcam in tensorflow:
def compute_gradients_and_capture(img, gradients_last_conv, last_conv_layer_output): # This is a vector where each entry is the mean intensity of the gradient # over a specific feature map channel pooled_grads = tf.reduce_mean(gradients_last_conv, axis=(0, 1, 2)) # Each channel in the feature map array is multiplied by "how important this channel is" # with regard to the top predicted class. The channels are then summed to obtain the heatmap. heatmap = tf.reduce_sum(tf.multiply(pooled_grads, last_conv_layer_output), axis=-1) # Normalizing the heatmap between 0 & 1 for visualization heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap) # Resizing heatmap to match the input image dimensions heatmap_resized = tf.image.resize(heatmap[..., tf.newaxis], (img.shape[1], img.shape[2])) heatmap_resized = tf.squeeze(heatmap_resized) # Remove the last dimension after resize return heatmap_resized.numpy()
And I'm trying the following way to train my model:
def run_step(image_ten, seg_map, class_name, model, optimizer, is_train): last_conv_layer_name = find_last_conv_layer(model) last_conv_layer = model.get_layer(last_conv_layer_name) grad_model = Model(model.inputs, [last_conv_layer.output, model.output]) with tf.GradientTape(persistent=True) as tape: last_conv_layer_output, preds = grad_model(image_ten) max_score = tf.reduce_max(preds[0], axis=1) # This is the gradient of the output neuron (top predicted or chosen) # with regard to the output feature map of the last conv layer grads = tape.gradient(max_score, last_conv_layer_output) heatmap = compute_gradients_and_capture(image_ten, grads, last_conv_layer_output) # Adapted for TF segmentation_loss = calculate_segmentation_loss(heatmap, seg_map, 2, 0.3) if is_train: gradients = tape.gradient(segmentation_loss, grad_model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) del tape return segmentation_loss
But for reason I keep getting that the gradients is a list of None.Any ideas what the problem may be?
I'm thinking that the problem with my calculation of the heatmap (differentiability maybe), as when changing the segmentation loss to just class CrossEntropy loss the code worked