import os

import numpy as np

import torch

from PIL import Image

import matplotlib.pyplot as plt

from torchvision import models

from torchvision import transforms

from utils import GradCAM, show_cam_on_image, center_crop_img

from resnet1 import Mymodel

def main():

model = Mymodel(num_classes=7) #导入自己的模型,num_classes数为自己数据集的类别数

weights_dict = torch.load("自己模型的训练权重", map_location='cpu')

model.load_state_dict(weights_dict, strict=False) #加载自己模型的权重

# target_layers = [model.backbone.layer4]

target_layers = [model.backbone.layer4[-1]] #定义目标层

# model = models.mobilenet_v3_large(pretrained=True)

# target_layers = [model.features[-1]]

# model = models.vgg16(pretrained=True)

# target_layers = [model.features]

# model = models.resnet34(pretrained=True)

# target_layers = [model.layer4]

# model = models.regnet_y_800mf(pretrained=True)

# target_layers = [model.trunk_output]

# model = models.efficientnet_b0(pretrained=True)

# target_layers = [model.features]

data_transform = transforms.Compose([

transforms.ToTensor(),

transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# load image

img_path = "导入图片路径"

assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)

img = Image.open(img_path).convert('RGB')

img = np.array(img, dtype=np.uint8)

img = center_crop_img(img, 448) #将导入的图片reshape到自己想要的尺寸

# [C, H, W]

img_tensor = data_transform(img)

# expand batch dimension

# [C, H, W] -> [N, C, H, W]

input_tensor = torch.unsqueeze(img_tensor, dim=0)

cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)

# target_category = 281 # tabby, tabby cat

# target_category = 254 # pug, pug-dog

target_category = 4

grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)

grayscale_cam = grayscale_cam[0, :]

visualization = show_cam_on_image(img.astype(dtype=np.float32) / 255.,

grayscale_cam,

use_rgb=True)

plt.imshow(visualization)

plt.show()

if __name__ == '__main__':

main()

下面是grad_cam的代码,注意:如果自己的模型是多输出的,要选择模型的指定输出。

import cv2

import numpy as np

class ActivationsAndGradients:

""" Class for extracting activations and

registering gradients from targeted intermediate layers """

def __init__(self, model, target_layers, reshape_transform):

self.model = model

self.gradients = []

self.activations = []

self.reshape_transform = reshape_transform

self.handles = []

for target_layer in target_layers:

self.handles.append(

target_layer.register_forward_hook(

self.save_activation))

# Backward compatibility with older pytorch versions:

if hasattr(target_layer, 'register_full_backward_hook'):

self.handles.append(

target_layer.register_full_backward_hook(

self.save_gradient))

else:

self.handles.append(

target_layer.register_backward_hook(

self.save_gradient))

def save_activation(self, module, input, output):

activation = output

if self.reshape_transform is not None:

activation = self.reshape_transform(activation)

self.activations.append(activation.cpu().detach())

def save_gradient(self, module, grad_input, grad_output):

# Gradients are computed in reverse order

grad = grad_output[0]

if self.reshape_transform is not None:

grad = self.reshape_transform(grad)

self.gradients = [grad.cpu().detach()] + self.gradients

def __call__(self, x):

self.gradients = []

self.activations = []

return self.model(x)

def release(self):

for handle in self.handles:

handle.remove()

class GradCAM:

def __init__(self,

model,

target_layers,

reshape_transform=None,

use_cuda=False):

self.model = model.eval()

self.target_layers = target_layers

self.reshape_transform = reshape_transform

self.cuda = use_cuda

if self.cuda:

self.model = model.cuda()

self.activations_and_grads = ActivationsAndGradients(

self.model, target_layers, reshape_transform)

""" Get a vector of weights for every channel in the target layer.

Methods that return weights channels,

will typically need to only implement this function. """

@staticmethod

def get_cam_weights(grads):

return np.mean(grads, axis=(2, 3), keepdims=True)

@staticmethod

def get_loss(output, target_category):

loss = 0

output = output[2] #注意:如果模型是多输出,需要选择自己想要的输出

for i in range(len(target_category)):

loss = loss + output[i, target_category[i]]

return loss

def get_cam_image(self, activations, grads):

weights = self.get_cam_weights(grads)

weighted_activations = weights * activations

cam = weighted_activations.sum(axis=1)

return cam

@staticmethod

def get_target_width_height(input_tensor):

width, height = input_tensor.size(-1), input_tensor.size(-2)

return width, height

def compute_cam_per_layer(self, input_tensor):

activations_list = [a.cpu().data.numpy()

for a in self.activations_and_grads.activations]

grads_list = [g.cpu().data.numpy()

for g in self.activations_and_grads.gradients]

target_size = self.get_target_width_height(input_tensor)

cam_per_target_layer = []

# Loop over the saliency image from every layer

for layer_activations, layer_grads in zip(activations_list, grads_list):

cam = self.get_cam_image(layer_activations, layer_grads)

cam[cam < 0] = 0 # works like mute the min-max scale in the function of scale_cam_image

scaled = self.scale_cam_image(cam, target_size)

cam_per_target_layer.append(scaled[:, None, :])

return cam_per_target_layer

def aggregate_multi_layers(self, cam_per_target_layer):

cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)

cam_per_target_layer = np.maximum(cam_per_target_layer, 0)

result = np.mean(cam_per_target_layer, axis=1)

return self.scale_cam_image(result)

@staticmethod

def scale_cam_image(cam, target_size=None):

result = []

for img in cam:

img = img - np.min(img)

img = img / (1e-7 + np.max(img))

if target_size is not None:

img = cv2.resize(img, target_size)

result.append(img)

result = np.float32(result)

return result

def __call__(self, input_tensor, target_category=None):

if self.cuda:

input_tensor = input_tensor.cuda()

# 正向传播得到网络输出logits(未经过softmax)

output = self.activations_and_grads(input_tensor)

if isinstance(target_category, int):

target_category = [target_category] * input_tensor.size(0)

if target_category is None:

target_category = np.argmax(output.cpu().data.numpy(), axis=-1)

print(f"category id: {target_category}")

else:

assert (len(target_category) == input_tensor.size(0))

self.model.zero_grad()

loss = self.get_loss(output, target_category)

loss.backward(retain_graph=True)

# In most of the saliency attribution papers, the saliency is

# computed with a single target layer.

# Commonly it is the last convolutional layer.

# Here we support passing a list with multiple target layers.

# It will compute the saliency image for every image,

# and then aggregate them (with a default mean aggregation).

# This gives you more flexibility in case you just want to

# use all conv layers for example, all Batchnorm layers,

# or something else.

cam_per_layer = self.compute_cam_per_layer(input_tensor)

return self.aggregate_multi_layers(cam_per_layer)

def __del__(self):

self.activations_and_grads.release()

def __enter__(self):

return self

def __exit__(self, exc_type, exc_value, exc_tb):

self.activations_and_grads.release()

if isinstance(exc_value, IndexError):

# Handle IndexError here...

print(

f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")

return True

def show_cam_on_image(img: np.ndarray,

mask: np.ndarray,

use_rgb: bool = False,

colormap: int = cv2.COLORMAP_JET) -> np.ndarray:

""" This function overlays the cam mask on the image as an heatmap.

By default the heatmap is in BGR format.

:param img: The base image in RGB or BGR format.

:param mask: The cam mask.

:param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.

:param colormap: The OpenCV colormap to be used.

:returns: The default image with the cam overlay.

"""

heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)

if use_rgb:

heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

heatmap = np.float32(heatmap) / 255

if np.max(img) > 1:

raise Exception(

"The input image should np.float32 in the range [0, 1]")

cam = heatmap + img

cam = cam / np.max(cam)

return np.uint8(255 * cam)

def center_crop_img(img: np.ndarray, size: int):

h, w, c = img.shape

if w == h == size:

return img

if w < h:

ratio = size / w

new_w = size

new_h = int(h * ratio)

else:

ratio = size / h

new_h = size

new_w = int(w * ratio)

img = cv2.resize(img, dsize=(new_w, new_h))

if new_w == size:

h = (new_h - size) // 2

img = img[h: h+size]

else:

w = (new_w - size) // 2

img = img[:, w: w+size]

return img

相关阅读

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: