diff --git a/docs/source/cams.rst b/docs/source/cams.rst index c4898b80..6b7a569d 100644 --- a/docs/source/cams.rst +++ b/docs/source/cams.rst @@ -7,7 +7,7 @@ torchcam.cams CAM -------- -Related to activation-based class activation maps. +Methods related to activation-based class activation maps. .. autoclass:: CAM @@ -17,12 +17,11 @@ Related to activation-based class activation maps. Grad-CAM -------- -Related to gradient-based class activation maps. +Methods related to gradient-based class activation maps. .. autoclass:: GradCAM - .. autoclass:: GradCAMpp .. autoclass:: SmoothGradCAMpp diff --git a/docs/source/index.rst b/docs/source/index.rst index ac35da8b..d44e71d0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,9 +3,15 @@ Torchcam documentation The :mod:`torchcam` package gives PyTorch users the possibility to visualize the spatial influence on classification outputs. +.. toctree:: + :maxdepth: 2 + :caption: Getting Started + + installing + .. toctree:: :maxdepth: 1 - :caption: Package Reference + :caption: Package Documentation cams utils diff --git a/docs/source/installing.rst b/docs/source/installing.rst new file mode 100644 index 00000000..180d2965 --- /dev/null +++ b/docs/source/installing.rst @@ -0,0 +1,36 @@ + +************ +Installation +************ + +This library requires Python 3.6 or newer. + +Via Python Package +================== + +Install the last stable release of the package using pip: + +.. code:: bash + + pip install torchcam + + +Via Conda +========= + +Install the last stable release of the package using conda: + +.. code:: bash + + conda install -c frgfm torchcam + + +Via Git +======= + +Install the library in developper mode: + +.. code:: bash + + git clone https://github.com/frgfm/torch-cam.git + pip install -e torch-cam/. diff --git a/torchcam/cams/cam.py b/torchcam/cams/cam.py index 7ee39e47..682297b3 100644 --- a/torchcam/cams/cam.py +++ b/torchcam/cams/cam.py @@ -117,7 +117,18 @@ def __repr__(self): class CAM(_CAM): - """Implements a class activation map extractor as described in https://arxiv.org/abs/1512.04150 + """Implements a class activation map extractor as described in `"Learning Deep Features for Discriminative + Localization" `_. + + The Class Activation Map (CAM) is defined for image classification models that have global pooling at the end + of the visual feature extraction block. The localization map is computed as follows: + + .. math:: + L^{(c)}_{CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big) + + where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + position :math:`(x, y)`, + and :math:`w_k^{(c)}` is the weight corresponding to class :math:`c` for unit :math:`k`. Example:: >>> from torchvision.models import resnet18 @@ -150,7 +161,29 @@ def _get_weights(self, class_idx, scores=None): class ScoreCAM(_CAM): - """Implements a class activation map extractor as described in https://arxiv.org/abs/1910.01279 + """Implements a class activation map extractor as described in `"Score-CAM: + Score-Weighted Visual Explanations for Convolutional Neural Networks" `_. + + The localization map is computed as follows: + + .. math:: + L^{(c)}_{Score-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big) + + with the coefficient :math:`w_k^{(c)}` being defined as: + + .. math:: + w_k^{(c)} = softmax(Y^{(c)}(M) - Y^{(c)}(X_b)) + + where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + position :math:`(x, y)`, :math:`Y^{(c)}(X)` is the model output score for class :math:`c` before softmax + for input :math:`X`, :math:`X_b` is a baseline image, + and :math:`M` is defined as follows: + + .. math:: + M = \\Big(\\frac{M^{(d)} - \\min M^{(d)}}{\\max M^{(d)} - \\min M^{(d)}} \\odot X \\Big)_{1 \\leq d \\leq D} + + where :math:`\\odot` refers to the element-wise multiplication, :math:`M^{(d)}` is the upsampled version of + :math:`A_d` on node :math:`d`, and :math:`D` is the number of channels on the target convolutional layer. Example:: >>> from torchvision.models import resnet18 diff --git a/torchcam/cams/gradcam.py b/torchcam/cams/gradcam.py index b28ebfda..33ea1c76 100644 --- a/torchcam/cams/gradcam.py +++ b/torchcam/cams/gradcam.py @@ -55,7 +55,23 @@ def _get_weights(self, class_idx, scores): class GradCAM(_GradCAM): - """Implements a class activation map extractor as described in https://arxiv.org/pdf/1710.11063.pdf + """Implements a class activation map extractor as described in `"Grad-CAM: Visual Explanations from Deep Networks + via Gradient-based Localization" `_. + + The localization map is computed as follows: + + .. math:: + L^{(c)}_{Grad-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big) + + with the coefficient :math:`w_k^{(c)}` being defined as: + + .. math:: + w_k^{(c)} = \\frac{1}{H \\cdot W} \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W + \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} + + where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + position :math:`(x, y)`, + and :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax. Example:: >>> from torchvision.models import resnet18 @@ -86,7 +102,32 @@ def _get_weights(self, class_idx, scores): class GradCAMpp(_GradCAM): - """Implements a class activation map extractor as described in https://arxiv.org/pdf/1710.11063.pdf + """Implements a class activation map extractor as described in `"Grad-CAM++: Improved Visual Explanations for + Deep Convolutional Networks" `_. + + The localization map is computed as follows: + + .. math:: + L^{(c)}_{Grad-CAM++}(x, y) = \\sum\\limits_k w_k^{(c)} A_k(x, y) + + with the coefficient :math:`w_k^{(c)}` being defined as: + + .. math:: + w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot + ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big) + + where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + position :math:`(x, y)`, + :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax, + and :math:`\\alpha_k^{(c)}(i, j)` being defined as: + + .. math:: + \\alpha_k^{(c)}(i, j) = \\frac{1}{\\sum\\limits_{i, j} \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}} + = \\frac{\\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2}}{2 \\cdot + \\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2} + \\sum\\limits_{a,b} A_k (a,b) \\cdot + \\frac{\\partial^3 Y^{(c)}}{(\\partial A_k(i,j))^3}} + + if :math:`\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} = 1` else :math:`0`. Example:: >>> from torchvision.models import resnet18 @@ -122,8 +163,41 @@ def _get_weights(self, class_idx, scores): class SmoothGradCAMpp(_GradCAM): - """Implements a class activation map extractor as described in https://arxiv.org/pdf/1908.01224.pdf - with a personal correction to the paper (alpha coefficient numerator) + """Implements a class activation map extractor as described in `"Smooth Grad-CAM++: An Enhanced Inference Level + Visualization Technique for Deep Convolutional Neural Network Models" `_ + with a personal correction to the paper (alpha coefficient numerator). + + The localization map is computed as follows: + + .. math:: + L^{(c)}_{Smooth Grad-CAM++}(x, y) = \\sum\\limits_k w_k^{(c)} A_k(x, y) + + with the coefficient :math:`w_k^{(c)}` being defined as: + + .. math:: + w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot + ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big) + + where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + position :math:`(x, y)`, + :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax, + and :math:`\\alpha_k^{(c)}(i, j)` being defined as: + + .. math:: + \\alpha_k^{(c)}(i, j) + = \\frac{\\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2}}{2 \\cdot + \\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2} + \\sum\\limits_{a,b} A_k (a,b) \\cdot + \\frac{\\partial^3 Y^{(c)}}{(\\partial A_k(i,j))^3}} + = \\frac{\\frac{1}{n} \\sum\\limits_{m=1}^n D^{(c, 2)}_k(i, j)}{ + \\frac{2}{n} \\sum\\limits_{m=1}^n D^{(c, 2)}_k(i, j) + \\sum\\limits_{a,b} A_k (a,b) \\cdot + \\frac{1}{n} \\sum\\limits_{m=1}^n D^{(c, 3)}_k(i, j)} + + if :math:`\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} = 1` else :math:`0`. Here :math:`D^{(c, p)}_k(i, j)` + refers to the p-th partial derivative of the class score of class :math:`c` relatively to the activation in layer + :math:`k` at position :math:`(i, j)`, and :math:`n` is the number of samples used to get the gradient estimate. + + Please note the difference in the numerator of :math:`\\alpha_k^{(c)}(i, j)`, + which is actually :math:`\\frac{1}{n} \\sum\\limits_{k=1}^n D^{(c, 1)}_k(i,j)` in the paper. Example:: >>> from torchvision.models import resnet18