From e34fc4249ff230ffac561d1c22d43dbc8900db09 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <fgfm03@hotmail.fr>
Date: Sun, 31 Oct 2021 18:51:32 +0100
Subject: [PATCH] docs: Fixed LaTeX syntax in docstrings (#107)

---
 torchcam/methods/activation.py | 38 ++++++++++----------
 torchcam/methods/gradient.py   | 66 +++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/torchcam/methods/activation.py b/torchcam/methods/activation.py
index 111f188d..457ab1eb 100644
--- a/torchcam/methods/activation.py
+++ b/torchcam/methods/activation.py
@@ -25,7 +25,7 @@ class CAM(_CAM):
     of the visual feature extraction block. The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+        L^{(c)}_{CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)} A_k(x, y)\Big)
 
     where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
     position :math:`(x, y)`,
@@ -96,7 +96,7 @@ class ScoreCAM(_CAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{Score-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+        L^{(c)}_{Score-CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)} A_k(x, y)\Big)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
@@ -109,10 +109,10 @@ class ScoreCAM(_CAM):
     and :math:`M_k` is defined as follows:
 
     .. math::
-        M_k = \\frac{U(A_k) - \\min\\limits_m U(A_m)}{\\max\\limits_m  U(A_m) - \\min\\limits_m  U(A_m)})
-        \\odot X
+        M_k = \frac{U(A_k) - \min\limits_m U(A_m)}{\max\limits_m  U(A_m) - \min\limits_m  U(A_m)})
+        \odot X
 
-    where :math:`\\odot` refers to the element-wise multiplication and :math:`U` is the upsampling operation.
+    where :math:`\odot` refers to the element-wise multiplication and :math:`U` is the upsampling operation.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -216,12 +216,12 @@ class SSCAM(ScoreCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{SS-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+        L^{(c)}_{SS-CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)} A_k(x, y)\Big)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
     .. math::
-        w_k^{(c)} = \\frac{1}{N} \\sum\\limits_1^N softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b))
+        w_k^{(c)} = \frac{1}{N} \sum\limits_1^N softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b))
 
     where :math:`N` is the number of samples used to smooth the weights,
     :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
@@ -230,12 +230,12 @@ class SSCAM(ScoreCAM):
     and :math:`M_k` is defined as follows:
 
     .. math::
-        M_k = \\Bigg(\\frac{U(A_k) - \\min\\limits_m U(A_m)}{\\max\\limits_m  U(A_m) - \\min\\limits_m  U(A_m)} +
-        \\delta\\Bigg) \\odot X
+        M_k = \Bigg(\frac{U(A_k) - \min\limits_m U(A_m)}{\max\limits_m  U(A_m) - \min\limits_m  U(A_m)} +
+        \delta\Bigg) \odot X
 
-    where :math:`\\odot` refers to the element-wise multiplication, :math:`U` is the upsampling operation,
-    :math:`\\delta \\sim \\mathcal{N}(0, \\sigma^2)` is the random noise that follows a 0-mean gaussian distribution
-    with a standard deviation of :math:`\\sigma`.
+    where :math:`\odot` refers to the element-wise multiplication, :math:`U` is the upsampling operation,
+    :math:`\delta \sim \mathcal{N}(0, \sigma^2)` is the random noise that follows a 0-mean gaussian distribution
+    with a standard deviation of :math:`\sigma`.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -306,12 +306,12 @@ class ISCAM(ScoreCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{ISS-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+        L^{(c)}_{ISS-CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)} A_k(x, y)\Big)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
     .. math::
-        w_k^{(c)} = \\sum\\limits_{i=1}^N \\frac{i}{N} softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b))
+        w_k^{(c)} = \sum\limits_{i=1}^N \frac{i}{N} softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b))
 
     where :math:`N` is the number of samples used to smooth the weights,
     :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
@@ -320,12 +320,12 @@ class ISCAM(ScoreCAM):
     and :math:`M_k` is defined as follows:
 
     .. math::
-        M_k = \\Bigg(\\frac{U(A_k) - \\min\\limits_m U(A_m)}{\\max\\limits_m  U(A_m) - \\min\\limits_m  U(A_m)} +
-        \\delta\\Bigg) \\odot X
+        M_k = \Bigg(\frac{U(A_k) - \min\limits_m U(A_m)}{\max\limits_m  U(A_m) - \min\limits_m  U(A_m)} +
+        \delta\Bigg) \odot X
 
-    where :math:`\\odot` refers to the element-wise multiplication, :math:`U` is the upsampling operation,
-    :math:`\\delta \\sim \\mathcal{N}(0, \\sigma^2)` is the random noise that follows a 0-mean gaussian distribution
-    with a standard deviation of :math:`\\sigma`.
+    where :math:`\odot` refers to the element-wise multiplication, :math:`U` is the upsampling operation,
+    :math:`\delta \sim \mathcal{N}(0, \sigma^2)` is the random noise that follows a 0-mean gaussian distribution
+    with a standard deviation of :math:`\sigma`.
 
     Example::
         >>> from torchvision.models import resnet18
diff --git a/torchcam/methods/gradient.py b/torchcam/methods/gradient.py
index 729906d6..32d70451 100644
--- a/torchcam/methods/gradient.py
+++ b/torchcam/methods/gradient.py
@@ -72,13 +72,13 @@ class GradCAM(_GradCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{Grad-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+        L^{(c)}_{Grad-CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)} A_k(x, y)\Big)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
     .. math::
-        w_k^{(c)} = \\frac{1}{H \\cdot W} \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W
-        \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}
+        w_k^{(c)} = \frac{1}{H \cdot W} \sum\limits_{i=1}^H \sum\limits_{j=1}^W
+        \frac{\partial Y^{(c)}}{\partial A_k(i, j)}
 
     where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
     position :math:`(x, y)`,
@@ -114,26 +114,26 @@ class GradCAMpp(_GradCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{Grad-CAM++}(x, y) = \\sum\\limits_k w_k^{(c)} A_k(x, y)
+        L^{(c)}_{Grad-CAM++}(x, y) = \sum\limits_k w_k^{(c)} A_k(x, y)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
     .. math::
-        w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot
-        ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big)
+        w_k^{(c)} = \sum\limits_{i=1}^H \sum\limits_{j=1}^W \alpha_k^{(c)}(i, j) \cdot
+        ReLU\Big(\frac{\partial Y^{(c)}}{\partial A_k(i, j)}\Big)
 
     where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
     position :math:`(x, y)`,
     :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax,
-    and :math:`\\alpha_k^{(c)}(i, j)` being defined as:
+    and :math:`\alpha_k^{(c)}(i, j)` being defined as:
 
     .. math::
-        \\alpha_k^{(c)}(i, j) = \\frac{1}{\\sum\\limits_{i, j} \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}}
-        = \\frac{\\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2}}{2 \\cdot
-        \\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2} + \\sum\\limits_{a,b} A_k (a,b) \\cdot
-        \\frac{\\partial^3 Y^{(c)}}{(\\partial A_k(i,j))^3}}
+        \alpha_k^{(c)}(i, j) = \frac{1}{\sum\limits_{i, j} \frac{\partial Y^{(c)}}{\partial A_k(i, j)}}
+        = \frac{\frac{\partial^2 Y^{(c)}}{(\partial A_k(i,j))^2}}{2 \cdot
+        \frac{\partial^2 Y^{(c)}}{(\partial A_k(i,j))^2} + \sum\limits_{a,b} A_k (a,b) \cdot
+        \frac{\partial^3 Y^{(c)}}{(\partial A_k(i,j))^3}}
 
-    if :math:`\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} = 1` else :math:`0`.
+    if :math:`\frac{\partial Y^{(c)}}{\partial A_k(i, j)} = 1` else :math:`0`.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -183,34 +183,34 @@ class SmoothGradCAMpp(_GradCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{Smooth Grad-CAM++}(x, y) = \\sum\\limits_k w_k^{(c)} A_k(x, y)
+        L^{(c)}_{Smooth Grad-CAM++}(x, y) = \sum\limits_k w_k^{(c)} A_k(x, y)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
     .. math::
-        w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot
-        ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big)
+        w_k^{(c)} = \sum\limits_{i=1}^H \sum\limits_{j=1}^W \alpha_k^{(c)}(i, j) \cdot
+        ReLU\Big(\frac{\partial Y^{(c)}}{\partial A_k(i, j)}\Big)
 
     where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
     position :math:`(x, y)`,
     :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax,
-    and :math:`\\alpha_k^{(c)}(i, j)` being defined as:
+    and :math:`\alpha_k^{(c)}(i, j)` being defined as:
 
     .. math::
-        \\alpha_k^{(c)}(i, j)
-        = \\frac{\\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2}}{2 \\cdot
-        \\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2} + \\sum\\limits_{a,b} A_k (a,b) \\cdot
-        \\frac{\\partial^3 Y^{(c)}}{(\\partial A_k(i,j))^3}}
-        = \\frac{\\frac{1}{n} \\sum\\limits_{m=1}^n D^{(c, 2)}_k(i, j)}{
-        \\frac{2}{n} \\sum\\limits_{m=1}^n D^{(c, 2)}_k(i, j) + \\sum\\limits_{a,b} A_k (a,b) \\cdot
-        \\frac{1}{n} \\sum\\limits_{m=1}^n D^{(c, 3)}_k(i, j)}
-
-    if :math:`\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} = 1` else :math:`0`. Here :math:`D^{(c, p)}_k(i, j)`
+        \alpha_k^{(c)}(i, j)
+        = \frac{\frac{\partial^2 Y^{(c)}}{(\partial A_k(i,j))^2}}{2 \cdot
+        \frac{\partial^2 Y^{(c)}}{(\partial A_k(i,j))^2} + \sum\limits_{a,b} A_k (a,b) \cdot
+        \frac{\partial^3 Y^{(c)}}{(\partial A_k(i,j))^3}}
+        = \frac{\frac{1}{n} \sum\limits_{m=1}^n D^{(c, 2)}_k(i, j)}{
+        \frac{2}{n} \sum\limits_{m=1}^n D^{(c, 2)}_k(i, j) + \sum\limits_{a,b} A_k (a,b) \cdot
+        \frac{1}{n} \sum\limits_{m=1}^n D^{(c, 3)}_k(i, j)}
+
+    if :math:`\frac{\partial Y^{(c)}}{\partial A_k(i, j)} = 1` else :math:`0`. Here :math:`D^{(c, p)}_k(i, j)`
     refers to the p-th partial derivative of the class score of class :math:`c` relatively to the activation in layer
     :math:`k` at position :math:`(i, j)`, and :math:`n` is the number of samples used to get the gradient estimate.
 
-    Please note the difference in the numerator of :math:`\\alpha_k^{(c)}(i, j)`,
-    which is actually :math:`\\frac{1}{n} \\sum\\limits_{k=1}^n D^{(c, 1)}_k(i,j)` in the paper.
+    Please note the difference in the numerator of :math:`\alpha_k^{(c)}(i, j)`,
+    which is actually :math:`\frac{1}{n} \sum\limits_{k=1}^n D^{(c, 1)}_k(i,j)` in the paper.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -311,14 +311,14 @@ class XGradCAM(_GradCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{XGrad-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+        L^{(c)}_{XGrad-CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)} A_k(x, y)\Big)
 
     with the coefficient :math:`w_k^{(c)}` being defined as:
 
     .. math::
-        w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W
-        \\Big( \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} \\cdot
-        \\frac{A_k(i, j)}{\\sum\\limits_{m=1}^H \\sum\\limits_{n=1}^W A_k(m, n)} \\Big)
+        w_k^{(c)} = \sum\limits_{i=1}^H \sum\limits_{j=1}^W
+        \Big( \frac{\partial Y^{(c)}}{\partial A_k(i, j)} \cdot
+        \frac{A_k(i, j)}{\sum\limits_{m=1}^H \sum\limits_{n=1}^W A_k(m, n)} \Big)
 
     where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
     position :math:`(x, y)`,
@@ -357,12 +357,12 @@ class LayerCAM(_GradCAM):
     The localization map is computed as follows:
 
     .. math::
-        L^{(c)}_{Layer-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)}(x, y) \\cdot A_k(x, y)\\Big)
+        L^{(c)}_{Layer-CAM}(x, y) = ReLU\Big(\sum\limits_k w_k^{(c)}(x, y) \cdot A_k(x, y)\Big)
 
     with the coefficient :math:`w_k^{(c)}(x, y)` being defined as:
 
     .. math::
-        w_k^{(c)}(x, y) = ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}(x, y)\\Big)
+        w_k^{(c)}(x, y) = ReLU\Big(\frac{\partial Y^{(c)}}{\partial A_k(i, j)}(x, y)\Big)
 
     where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at
     position :math:`(x, y)`,