This repository contains an implementation of many attention mechanism models.
- Published Initial Attention Models, 2024-8-12.
pip install AttentionMechanism
git clone https://github.com/gongyan1/Attention-Mechanism-Pytorch
python demo.py
Beyond Self-attention: External Attention using Two Linear Layers for Visual Tasks (TPAMI 2022)
from AttentionMechanism.model.attention.ExternalAttention import ExternalAttention
import torch
input=torch.randn(50,49,512)
ea = ExternalAttention(d_model=512,S=8)
output=ea(input)
print(output.shape)
Attention Is All You Need (arXiv 2017.06.03)
from AttentionMechanism.model.attention.SelfAttention import ScaledDotProductAttention
import torch
input=torch.randn(50,49,512)
sa = ScaledDotProductAttention(d_model=512, d_k=512, d_v=512, h=8)
output=sa(input,input,input)
print(output.shape)
SimAM: A Simple, Parameter-Free Attention Module for Convolutional Neural Networks (ICML 2021)
from AttentionMechanism.model.attention.SimplifiedSelfAttention import SimplifiedScaledDotProductAttention
import torch
input=torch.randn(50,49,512)
ssa = SimplifiedScaledDotProductAttention(d_model=512, h=8)
output=ssa(input,input,input)
print(output.shape)
Squeeze-and-Excitation Networks (CVPR 2018)
from AttentionMechanism.model.attention.SEAttention import SEAttention
import torch
input=torch.randn(50,512,7,7)
se = SEAttention(channel=512,reduction=8)
output=se(input)
print(output.shape)
Selective Kernel Networks (CVPR 2019)
from AttentionMechanism.model.attention.SKAttention import SKAttention
import torch
input=torch.randn(50,512,7,7)
se = SKAttention(channel=512,reduction=8)
output=se(input)
print(output.shape)
CBAM: Convolutional Block Attention Module (ECCV 2018)
from AttentionMechanism.model.attention.CBAM import CBAMBlock
import torch
input=torch.randn(50,512,7,7)
kernel_size=input.shape[2]
cbam = CBAMBlock(channel=512,reduction=16,kernel_size=kernel_size)
output=cbam(input)
print(output.shape)
BAM: Bottleneck Attention Module (arXiv 2018.07.18)
from AttentionMechanism.model.attention.BAM import BAMBlock
import torch
input=torch.randn(50,512,7,7)
bam = BAMBlock(channel=512,reduction=16,dia_val=2)
output=bam(input)
print(output.shape)
ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks (CVPR 2020)
from AttentionMechanism.model.attention.ECAAttention import ECAAttention
import torch
input=torch.randn(50,512,7,7)
eca = ECAAttention(kernel_size=3)
output=eca(input)
print(output.shape)
Dual Attention Network for Scene Segmentation (CVPR 2019)
from AttentionMechanism.model.attention.DANet import DAModule
import torch
input=torch.randn(50,512,7,7)
danet=DAModule(d_model=512,kernel_size=3,H=7,W=7)
print(danet(input).shape)
EPSANet: An Efficient Pyramid Split Attention Block on Convolutional Neural Network (ACCV 2022)
from AttentionMechanism.model.attention.PSA import PSA
import torch
input=torch.randn(50,512,7,7)
psa = PSA(channel=512,reduction=8)
output=psa(input)
print(output.shape)
ResT: An Efficient Transformer for Visual Recognition (NeurIPS 2021)
from AttentionMechanism.model.attention.EMSA import EMSA
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,64,512)
emsa = EMSA(d_model=512, d_k=512, d_v=512, h=8,H=8,W=8,ratio=2,apply_transform=True)
output=emsa(input,input,input)
print(output.shape)
SA-NET: SHUFFLE ATTENTION FOR DEEP CONVOLUTIONAL NEURAL NETWORKS (ICASSP 2021)
from AttentionMechanism.model.attention.ShuffleAttention import ShuffleAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
se = ShuffleAttention(channel=512,G=8)
output=se(input)
print(output.shape)
MUSE: Parallel Multi-Scale Attention for Sequence to Sequence Learning (arXiv 2019.10.17)
from AttentionMechanism.model.attention.MUSEAttention import MUSEAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,49,512)
sa = MUSEAttention(d_model=512, d_k=512, d_v=512, h=8)
output=sa(input,input,input)
print(output.shape)
from AttentionMechanism.model.attention.SGE import SpatialGroupEnhance
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
sge = SpatialGroupEnhance(groups=8)
output=sge(input)
print(output.shape)
A2-Nets: Double Attention Networks (NeurIPS 2018)
from AttentionMechanism.model.attention.A2Atttention import DoubleAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
a2 = DoubleAttention(512,128,128,True)
output=a2(input)
print(output.shape)
An Attention Free Transformer (arXiv 2021.09.21)
from AttentionMechanism.model.attention.AFT import AFT_FULL
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,49,512)
aft_full = AFT_FULL(d_model=512, n=49)
output=aft_full(input)
print(output.shape)
VOLO: Vision Outlooker for Visual Recognition (TPAMI 2022)
from AttentionMechanism.model.attention.OutlookAttention import OutlookAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,28,28,512)
outlook = OutlookAttention(dim=512)
output=outlook(input)
print(output.shape)
Vision Permutator: A Permutable MLP-Like Architecture for Visual Recognition (TPAMI 2022)
from AttentionMechanism.model.attention.ViP import WeightedPermuteMLP
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(64,8,8,512)
seg_dim=8
vip=WeightedPermuteMLP(512,seg_dim)
out=vip(input)
print(out.shape)
CoAtNet: Marrying Convolution and Attention for All Data Sizes (NeurIPS 2021)
None
from AttentionMechanism.model.attention.CoAtNet import CoAtNet
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(1,3,224,224)
mbconv=CoAtNet(in_ch=3,image_size=224)
out=mbconv(input)
print(out.shape)
Scaling Local Self-Attention for Parameter Efficient Visual Backbones (CVPR 2021)
from AttentionMechanism.model.attention.HaloAttention import HaloAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(1,512,8,8)
halo = HaloAttention(dim=512,
block_size=2,
halo_size=1,)
output=halo(input)
print(output.shape)
Polarized Self-Attention: Towards High-quality Pixel-wise Regression (arXiv 2021.07.08)
from AttentionMechanism.model.attention.PolarizedSelfAttention import ParallelPolarizedSelfAttention,SequentialPolarizedSelfAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(1,512,7,7)
psa = SequentialPolarizedSelfAttention(channel=512)
output=psa(input)
print(output.shape)
Contextual Transformer Networks for Visual Recognition (TPAMI 2022)
from AttentionMechanism.model.attention.CoTAttention import CoTAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
cot = CoTAttention(dim=512,kernel_size=3)
output=cot(input)
print(output.shape)
Residual Attention: A Simple but Effective Method for Multi-Label Recognition (ICCV2021)
from AttentionMechanism.model.attention.ResidualAttention import ResidualAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
resatt = ResidualAttention(channel=512,num_class=1000,la=0.2)
output=resatt(input)
print(output.shape)
S²-MLPv2: Improved Spatial-Shift MLP Architecture for Vision (arXiv 2021.08.02)
from AttentionMechanism.model.attention.S2Attention import S2Attention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
s2att = S2Attention(channels=512)
output=s2att(input)
print(output.shape)
Global Filter Networks for Image Classification (NeurIPS 2021)
25.3. Usage Code - Implemented by Wenliang Zhao (Author)
from AttentionMechanism.model.attention.gfnet import GFNet
import torch
from torch import nn
from torch.nn import functional as F
x = torch.randn(1, 3, 224, 224)
gfnet = GFNet(embed_dim=384, img_size=224, patch_size=16, num_classes=1000)
out = gfnet(x)
print(out.shape)
Rotate to Attend: Convolutional Triplet Attention Module (CVPR 2021)
26.3. Usage Code - Implemented by digantamisra98
from AttentionMechanism.model.attention.TripletAttention import TripletAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
triplet = TripletAttention()
output=triplet(input)
print(output.shape)
Coordinate Attention for Efficient Mobile Network Design (CVPR 2021)
27.3. Usage Code - Implemented by Andrew-Qibin
from AttentionMechanism.model.attention.CoordAttention import CoordAtt
import torch
from torch import nn
from torch.nn import functional as F
inp=torch.rand([2, 96, 56, 56])
inp_dim, oup_dim = 96, 96
reduction=32
coord_attention = CoordAtt(inp_dim, oup_dim, reduction=reduction)
output=coord_attention(inp)
print(output.shape)
MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer (arXiv 2021.10.05)
from AttentionMechanism.model.attention.MobileViTAttention import MobileViTAttention
import torch
from torch import nn
from torch.nn import functional as F
if __name__ == '__main__':
m=MobileViTAttention()
input=torch.randn(1,3,49,49)
output=m(input)
print(output.shape) #output:(1,3,49,49)
Non-deep Networks (NeurIPS 2022)
from AttentionMechanism.model.attention.ParNetAttention import *
import torch
from torch import nn
from torch.nn import functional as F
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
pna = ParNetAttention(channel=512)
output=pna(input)
print(output.shape) #50,512,7,7
UFO-ViT: High Performance Linear Vision Transformer without Softmax (ECCV 2022)
from AttentionMechanism.model.attention.UFOAttention import *
import torch
from torch import nn
from torch.nn import functional as F
if __name__ == '__main__':
input=torch.randn(50,49,512)
ufo = UFOAttention(d_model=512, d_k=512, d_v=512, h=8)
output=ufo(input,input,input)
print(output.shape) #[50, 49, 512]
On the Integration of Self-Attention and Convolution (CVPR 2022)
from AttentionMechanism.model.attention.ACmix import ACmix
import torch
if __name__ == '__main__':
input=torch.randn(50,256,7,7)
acmix = ACmix(in_planes=256, out_planes=256)
output=acmix(input)
print(output.shape)
Separable Self-attention for Mobile Vision Transformers (arXiv 2022.06.06)
from AttentionMechanism.model.attention.MobileViTv2Attention import MobileViTv2Attention
import torch
from torch import nn
from torch.nn import functional as F
if __name__ == '__main__':
input=torch.randn(50,49,512)
sa = MobileViTv2Attention(d_model=512)
output=sa(input)
print(output.shape)
Vision Transformer with Deformable Attention (CVPR2022)
from AttentionMechanism.model.attention.DAT import DAT
import torch
if __name__ == '__main__':
input=torch.randn(1,3,224,224)
model = DAT(
img_size=224,
patch_size=4,
num_classes=1000,
expansion=4,
dim_stem=96,
dims=[96, 192, 384, 768],
depths=[2, 2, 6, 2],
stage_spec=[['L', 'S'], ['L', 'S'], ['L', 'D', 'L', 'D', 'L', 'D'], ['L', 'D']],
heads=[3, 6, 12, 24],
window_sizes=[7, 7, 7, 7] ,
groups=[-1, -1, 3, 6],
use_pes=[False, False, True, True],
dwc_pes=[False, False, False, False],
strides=[-1, -1, 1, 1],
sr_ratios=[-1, -1, -1, -1],
offset_range_factor=[-1, -1, 2, 2],
no_offs=[False, False, False, False],
fixed_pes=[False, False, False, False],
use_dwc_mlps=[False, False, False, False],
use_conv_patches=False,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.2,
)
output=model(input)
print(output[0].shape)
CROSSFORMER: A VERSATILE VISION TRANSFORMER HINGING ON CROSS-SCALE ATTENTION (ICLR 2022)
from AttentionMechanism.model.attention.Crossformer import CrossFormer
import torch
if __name__ == '__main__':
input=torch.randn(1,3,224,224)
model = CrossFormer(img_size=224,
patch_size=[4, 8, 16, 32],
in_chans= 3,
num_classes=1000,
embed_dim=48,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
group_size=[7, 7, 7, 7],
mlp_ratio=4.,
qkv_bias=True,
qk_scale=None,
drop_rate=0.0,
drop_path_rate=0.1,
ape=False,
patch_norm=True,
use_checkpoint=False,
merge_size=[[2, 4], [2,4], [2, 4]]
)
output=model(input)
print(output.shape)
Aggregating Global Features into Local Vision Transformer (ICPR 2022)
from AttentionMechanism.model.attention.MOATransformer import MOATransformer
import torch
if __name__ == '__main__':
input=torch.randn(1,3,224,224)
model = MOATransformer(
img_size=224,
patch_size=4,
in_chans=3,
num_classes=1000,
embed_dim=96,
depths=[2, 2, 6],
num_heads=[3, 6, 12],
window_size=14,
mlp_ratio=4.,
qkv_bias=True,
qk_scale=None,
drop_rate=0.0,
drop_path_rate=0.1,
ape=False,
patch_norm=True,
use_checkpoint=False
)
output=model(input)
print(output.shape)
CCNet: Criss-Cross Attention for Semantic Segmentation (ICCV 2019)
from AttentionMechanism.model.attention.CrissCrossAttention import CrissCrossAttention
import torch
if __name__ == '__main__':
input=torch.randn(3, 64, 7, 7)
model = CrissCrossAttention(64)
outputs = model(input)
print(outputs.shape)
Axial Attention in Multidimensional Transformers (arXiv 2019.12.20)
from AttentionMechanism.model.attention.Axial_attention import AxialImageTransformer
import torch
if __name__ == '__main__':
input=torch.randn(3, 128, 7, 7)
model = AxialImageTransformer(
dim = 128,
depth = 12,
reversible = True
)
outputs = model(input)
print(outputs.shape)
FcaNet: Frequency Channel Attention Networks (ICCV 2021)
from AttentionMechanism.model.attention.FCA import MultiSpectralAttentionLayer
import torch
if __name__ == "__main__":
input = torch.randn(32, 128, 64, 64) # (b, c, h, w)
fca_layer = MultiSpectralAttentionLayer(channel = 128, dct_h = 64, dct_w = 64, reduction = 16, freq_sel_method = 'top16')
output = fca_layer(input)
print(output.shape)
Attention Augmented Convolutional Networks (ICCV 2019)
from AttentionMechanism.model.attention.AAAttention import AugmentedConv
import torch
if __name__ == "__main__":
input = torch.randn((16, 3, 32, 32))
augmented_conv = AugmentedConv(in_channels=3, out_channels=64, kernel_size=3, dk=40, dv=4, Nh=4, relative=True, stride=2, shape=16)
output = augmented_conv(input)
print(output.shape)
GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond (ICCVW 2019 Best Paper)
Global Context Networks (TPAMI 2020)
from AttentionMechanism.model.attention.GCAttention import GCModule
import torch
if __name__ == "__main__":
input = torch.randn(16, 64, 32, 32)
gc_layer = GCModule(64)
output = gc_layer(input)
print(output.shape)
Linear Context Transform Block (AAAI 2020)
from AttentionMechanism.model.attention.LCTAttention import LCT
import torch
if __name__ == "__main__":
x = torch.randn(16, 64, 32, 32)
attn = LCT(64, 8)
y = attn(x)
print(y.shape)
Gated Channel Transformation for Visual Recognition (CVPR 2020)
from AttentionMechanism.model.attention.GCTAttention import GCT
import torch
if __name__ == "__main__":
input = torch.randn(16, 64, 32, 32)
gct_layer = GCT(64)
output = gct_layer(input)
print(output.shape)
Gaussian Context Transformer (CVPR 2021)
from AttentionMechanism.model.attention.GaussianAttention import GCA
import torch
if __name__ == "__main__":
input = torch.randn(16, 64, 32, 32)
gca_layer = GCA(64)
output = gca_layer(input)
print(output.shape)
-
Pytorch implementation of RepMLP: Re-parameterizing Convolutions into Fully-connected Layers for Image Recognition (arXiv 2021.05.05)
-
Pytorch implementation of MLP-Mixer: An all-MLP Architecture for Vision (NeurIPS 2021)
-
Pytorch implementation of ResMLP: Feedforward networks for image classification with data-efficient training (TPAMI 2022)
-
Pytorch implementation of Pay Attention to MLPs (NeurIPS 2021)
-
Pytorch implementation of Sparse MLP for Image Recognition: Is Self-Attention Really Necessary? (AAAI 2022)
from fightingcv_attention.mlp.repmlp import RepMLP
import torch
from torch import nn
N=4 #batch size
C=512 #input dim
O=1024 #output dim
H=14 #image height
W=14 #image width
h=7 #patch height
w=7 #patch width
fc1_fc2_reduction=1 #reduction ratio
fc3_groups=8 # groups
repconv_kernels=[1,3,5,7] #kernel list
repmlp=RepMLP(C,O,H,W,h,w,fc1_fc2_reduction,fc3_groups,repconv_kernels=repconv_kernels)
x=torch.randn(N,C,H,W)
repmlp.eval()
for module in repmlp.modules():
if isinstance(module, nn.BatchNorm2d) or isinstance(module, nn.BatchNorm1d):
nn.init.uniform_(module.running_mean, 0, 0.1)
nn.init.uniform_(module.running_var, 0, 0.1)
nn.init.uniform_(module.weight, 0, 0.1)
nn.init.uniform_(module.bias, 0, 0.1)
#training result
out=repmlp(x)
#inference result
repmlp.switch_to_deploy()
deployout = repmlp(x)
print(((deployout-out)**2).sum())
MLP-Mixer: An all-MLP Architecture for Vision (NeurIPS 2021)
from fightingcv_attention.mlp.mlp_mixer import MlpMixer
import torch
mlp_mixer=MlpMixer(num_classes=1000,num_blocks=10,patch_size=10,tokens_hidden_dim=32,channels_hidden_dim=1024,tokens_mlp_dim=16,channels_mlp_dim=1024)
input=torch.randn(50,3,40,40)
output=mlp_mixer(input)
print(output.shape)
ResMLP: Feedforward networks for image classification with data-efficient training (TPAMI 2022)
from fightingcv_attention.mlp.resmlp import ResMLP
import torch
input=torch.randn(50,3,14,14)
resmlp=ResMLP(dim=128,image_size=14,patch_size=7,class_num=1000)
out=resmlp(input)
print(out.shape) #the last dimention is class_num
Pay Attention to MLPs (NeurIPS 2021)
from fightingcv_attention.mlp.g_mlp import gMLP
import torch
num_tokens=10000
bs=50
len_sen=49
num_layers=6
input=torch.randint(num_tokens,(bs,len_sen)) #bs,len_sen
gmlp = gMLP(num_tokens=num_tokens,len_sen=len_sen,dim=512,d_ff=1024)
output=gmlp(input)
print(output.shape)
Sparse MLP for Image Recognition: Is Self-Attention Really Necessary? (AAAI 2022)
from fightingcv_attention.mlp.sMLP_block import sMLPBlock
import torch
from torch import nn
from torch.nn import functional as F
if __name__ == '__main__':
input=torch.randn(50,3,224,224)
smlp=sMLPBlock(h=224,w=224)
out=smlp(input)
print(out.shape)
Vision Permutator: A Permutable MLP-Like Architecture for Visual Recognition (TPAMI 2022)
from fightingcv_attention.mlp.vip-mlp import VisionPermutator
import torch
from torch import nn
from torch.nn import functional as F
if __name__ == '__main__':
input=torch.randn(1,3,224,224)
model = VisionPermutator(
layers=[4, 3, 8, 3],
embed_dims=[384, 384, 384, 384],
patch_size=14,
transitions=[False, False, False, False],
segment_dim=[16, 16, 16, 16],
mlp_ratios=[3, 3, 3, 3],
mlp_fn=WeightedPermuteMLP
)
output=model(input)
print(output.shape)
During the development of this project, the following open-source projects provided significant help and support. We hereby express our sincere gratitude: