diff --git a/.gitignore b/.gitignore index b6e4761..4923756 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,22 @@ dmypy.json # Pyre type checker .pyre/ + +# Dataset +toDataset/ +/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/ +/content/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/toDataset/ +/content/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/resnet50-19c8e357.pth +/content/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log +/content/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/ +*.pth +feature-pickle.pkl +*/feature-pickle.pkl +*.pkl +*.png +**/market1501/ +**/test_image/ +**/writer/ +**/log +log/* +log/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..cc67606 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.linting.pylintEnabled": true, + "python.linting.enabled": true +} \ No newline at end of file diff --git a/Experiment-AGW-market.sh b/Experiment-AGW-market.sh index dcb3304..2c886a2 100644 --- a/Experiment-AGW-market.sh +++ b/Experiment-AGW-market.sh @@ -8,5 +8,5 @@ # weight regularized triplet loss # generalized mean pooling # non local blocks -python3 tools/main.py --config_file='configs/AGW_baseline.yml' MODEL.DEVICE_ID "('3')" \ -DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/Experiment-AGW-baseline')" \ No newline at end of file +python3 tools/main.py --config_file='configs/AGW_baseline.yml' MODEL.DEVICE_ID "('0')" \ +DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/AGW-new-conf-1')" \ No newline at end of file diff --git a/Experiment-arcface_baseline-market.sh b/Experiment-arcface_baseline-market.sh new file mode 100644 index 0000000..a6c395a --- /dev/null +++ b/Experiment-arcface_baseline-market.sh @@ -0,0 +1,12 @@ +# Dataset: market1501 +# imagesize: 256x128 +# batchsize: 16x4 +# warmup_step 10 +# random erase prob 0.5 +# last stride 1 +# with center loss +# weight regularized triplet loss +# generalized mean pooling +# non local blocks +python3 tools/main.py --config_file='configs/arcface_baseline.yml' MODEL.DEVICE_ID "('0')" \ +DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/arcface_baseline-1')" \ No newline at end of file diff --git a/Experiment-arcface_center-market.sh b/Experiment-arcface_center-market.sh new file mode 100644 index 0000000..3ce439a --- /dev/null +++ b/Experiment-arcface_center-market.sh @@ -0,0 +1,12 @@ +# Dataset: market1501 +# imagesize: 256x128 +# batchsize: 16x4 +# warmup_step 10 +# random erase prob 0.5 +# last stride 1 +# with center loss +# weight regularized triplet loss +# generalized mean pooling +# non local blocks +python3 tools/main.py --config_file='configs/arcface_center.yml' MODEL.DEVICE_ID "('0')" \ +DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/arcface_center-1')" \ No newline at end of file diff --git a/Experiment-cosface_center-market.sh b/Experiment-cosface_center-market.sh new file mode 100644 index 0000000..591fd2a --- /dev/null +++ b/Experiment-cosface_center-market.sh @@ -0,0 +1,12 @@ +# Dataset: market1501 +# imagesize: 256x128 +# batchsize: 16x4 +# warmup_step 10 +# random erase prob 0.5 +# last stride 1 +# with center loss +# weight regularized triplet loss +# generalized mean pooling +# non local blocks +python3 tools/main.py --config_file='configs/cosface_center.yml' MODEL.DEVICE_ID "('0')" \ +DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/cosface_center-completed-1')" \ No newline at end of file diff --git a/Test-Oxygen-Transfer.sh b/Test-Oxygen-Transfer.sh new file mode 100644 index 0000000..8393bb0 --- /dev/null +++ b/Test-Oxygen-Transfer.sh @@ -0,0 +1,7 @@ +python tools/main.py --config_file='configs/AGW_baseline.yml' \ + MODEL.DEVICE_ID "('0')" \ + DATASETS.NAMES "('oxygen')" \ + MODEL.PRETRAIN_CHOICE "('self')" \ + TEST.WEIGHT "('./log/oxygen1/Experiment-AGW-baseline/resnet50_nl_model_120.pth')" \ + TEST.EVALUATE_ONLY "('on')" \ + OUTPUT_DIR "('./log/Oxygen_Test')" \ No newline at end of file diff --git a/Train-Oxygen-Transfer.sh b/Train-Oxygen-Transfer.sh new file mode 100644 index 0000000..22d6fce --- /dev/null +++ b/Train-Oxygen-Transfer.sh @@ -0,0 +1,6 @@ +python ./tools/main.py --config_file='configs/AGW_baseline.yml' \ + MODEL.DEVICE_ID "('0')" \ + DATASETS.NAMES "('oxygen')" \ + MODEL.TRANSFER_MODE "('on')" \ + MODEL.PRETRAIN_CHOICE "('self')" \ + MODEL.PRETRAIN_PATH "('./log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth')" \ No newline at end of file diff --git a/Visalize-no-label.sh b/Visalize-no-label.sh new file mode 100644 index 0000000..efdaff3 --- /dev/null +++ b/Visalize-no-label.sh @@ -0,0 +1,23 @@ +# GO TO ROOT DIRECTORY +# rm -rf ./toDataset/oxygen1 +# rm -rf ./toDataset/oxygen1/gallery +# mkdir ./toDataset/oxygen1 +# mkdir ./toDataset/oxygen1/gallery +# Then you put oxygen folder in gallery folder +# And the structure must be like /gallery/mbk-12-4/*.jpg +# rm -rf ./log/oxygen1 +# rm -rf ./log/oxygen1/query_image +# mkdir ./log/oxygen1 +# mkdir ./log/oxygen1/query_image + +# and run this command in single line +python ./tools/main.py --config_file='configs/AGW_baseline.yml' \ + MODEL.DEVICE_ID "('0')" \ + DATASETS.NAMES "('oxygen1')" \ + MODEL.PRETRAIN_CHOICE "('self')" \ + TEST.WEIGHT "('/home/thanit456/capstone/ReID-Survey/log/market1501/local-AGW-baseline-2/resnet50_nl_model_120.pth')" \ + VISUALIZE.OPTION "('on_no_label')" \ + VISUALIZE.INDEX "(-1)" \ + VISUALIZE.NEED_NEW_FEAT_EMBED "('off')" \ + VISUALIZE.TOP_RANK "(20)" \ + VISUALIZE.RE_RANK "('on')" diff --git a/Visualize-Oxygen-label.sh b/Visualize-Oxygen-label.sh new file mode 100644 index 0000000..92ac7a5 --- /dev/null +++ b/Visualize-Oxygen-label.sh @@ -0,0 +1,9 @@ +python ./tools/main.py --config_file='configs/AGW_baseline.yml' \ + VISUALIZE.INDEX "(-1)" \ + MODEL.DEVICE_ID "('0')" \ + DATASETS.NAMES "('oxygen')" \ + MODEL.PRETRAIN_CHOICE "('self')" \ + TEST.WEIGHT "('./log/oxygen/Experiment-AGW-baseline/resnet50_nl_model_120.pth')" \ + VISUALIZE.OPTION "('on')" \ + OUTPUT_DIR "('./log/Oxygen/visualize_label_log.txt')" + \ No newline at end of file diff --git a/command.sh b/command.sh new file mode 100644 index 0000000..d693a26 --- /dev/null +++ b/command.sh @@ -0,0 +1,11 @@ +conda create -n reid python=3.6 +conda activate reid +pip install yacs +pip install -U scipy==1.2.0 +conda install -c pytorch pytorch torchvision ignite==0.1.2 + +conda install ipykernel + + +# source activate reid +# python -m ipykernel install --user --name REID --display-name "Python (REID)" \ No newline at end of file diff --git a/config/defaults.py b/config/defaults.py index e64bdb3..d62dec6 100644 --- a/config/defaults.py +++ b/config/defaults.py @@ -22,22 +22,30 @@ # ID number of GPU _C.MODEL.DEVICE_ID = '0' # Name of backbone -_C.MODEL.NAME = 'resnet50' +_C.MODEL.BACKBONE = 'resnet50' # Last stride of backbone _C.MODEL.LAST_STRIDE = 1 # Path to pretrained model of backbone _C.MODEL.PRETRAIN_PATH = '' +_C.MODEL.TRANSFER_MODE ="off" # Use ImageNet pretrained model to initialize backbone or use self trained model to initialize the whole model # Options: 'imagenet' or 'self' _C.MODEL.PRETRAIN_CHOICE = 'imagenet' -# If train with center loss, options: 'bnneck' or 'no' -_C.MODEL.CENTER_LOSS = 'on' -_C.MODEL.CENTER_FEAT_DIM = 2048 # If train with weighted regularized triplet loss, options: 'on', 'off' _C.MODEL.WEIGHT_REGULARIZED_TRIPLET = 'off' -# If train with generalized mean pooling, options: 'on', 'off' -_C.MODEL.GENERALIZED_MEAN_POOL = 'off' - +# If train with cos +_C.MODEL.USE_COS = False +# If train with dropout +_C.MODEL.USE_DROPOUT = False + +# for baseline +_C.MODEL.BASELINE = CN() +_C.MODEL.BASELINE.POOL_TYPE = 'avg' +_C.MODEL.BASELINE.COSINE_LOSS_TYPE = '' +_C.MODEL.BASELINE.SCALING_FACTOR = 60.0 +_C.MODEL.BASELINE.MARGIN = 0.35 +_C.MODEL.BASELINE.USE_BNBIAS = False +_C.MODEL.BASELINE.USE_SESTN = False # ----------------------------------------------------------------------------- # INPUT @@ -49,6 +57,7 @@ _C.INPUT.PROB = 0.5 # Random probability for random erasing _C.INPUT.RE_PROB = 0.5 +_C.INPUT.RE_MAX_RATIO = 0.4 # Values to be used for image normalization _C.INPUT.PIXEL_MEAN = [0.485, 0.456, 0.406] # Values to be used for image normalization @@ -90,10 +99,20 @@ _C.SOLVER.MOMENTUM = 0.9 # Margin of triplet loss _C.SOLVER.MARGIN = 0.3 -# Learning rate of SGD to learn the centers of center loss -_C.SOLVER.CENTER_LR = 0.5 -# Balanced weight of center loss -_C.SOLVER.CENTER_LOSS_WEIGHT = 0.0005 + +###### Center loss ###### + +_C.SOLVER.CENTER_LOSS = CN() + +_C.SOLVER.CENTER_LOSS.USE = False +# # Learning rate of SGD to learn the centers of center loss +_C.SOLVER.CENTER_LOSS.LR = 0.5 +_C.SOLVER.CENTER_LOSS.WEIGHT = 0.0005 +# _C.SOLVER.CENTER_LOSS.ALPHA = 1.0 + +_C.SOLVER.CENTER_LOSS.NUM_FEATS = 2048 + +########################## # Settings of weight decay _C.SOLVER.WEIGHT_DECAY = 0.0005 @@ -136,6 +155,25 @@ _C.TEST.FEAT_NORM = 'on' _C.TEST.EVALUATE_ONLY = 'off' +# ---------------------------------------------------------------------------- # +# Visualize +# ---------------------------------------------------------------------------- # +_C.VISUALIZE = CN() +# option +_C.VISUALIZE.OPTION = "off" +_C.VISUALIZE.CAM_OPTION = "allow_other" +_C.VISUALIZE.IMS_PER_BATCH = 256 +_C.VISUALIZE.NEED_NEW_FEAT_EMBED = "off" +_C.VISUALIZE.INDEX = 0 +_C.VISUALIZE.TOP_RANK = 10 +_C.VISUALIZE.RE_RANK = "off" +# ---------------------------------------------------------------------------- # +# Embedding projector +# ---------------------------------------------------------------------------- # +_C.EMBEDDING_PROJECTOR = CN() +# option +_C.EMBEDDING_PROJECTOR.OPTION = "off" + # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # diff --git a/configs/AGW_baseline.yml b/configs/AGW_baseline.yml index ff81dd6..fc3b8e9 100644 --- a/configs/AGW_baseline.yml +++ b/configs/AGW_baseline.yml @@ -1,16 +1,23 @@ MODEL: PRETRAIN_CHOICE: 'imagenet' - PRETRAIN_PATH: '/home/cgv841/.cache/torch/checkpoints/resnet50-19c8e357.pth' - CENTER_LOSS: 'on' - CENTER_FEAT_DIM: 2048 - NAME: 'resnet50_nl' + # PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth" + PRETRAIN_PATH: '/home/thanit456/torch/weights/resnet50-19c8e357.pth' + TRANSFER_MODE : 'off' + BACKBONE: 'resnet50_nl' WEIGHT_REGULARIZED_TRIPLET: 'on' - GENERALIZED_MEAN_POOL: 'on' + USE_COS: False # unused config + USE_DROPOUT: False + BASELINE: + POOL_TYPE: 'gem' + COSINE_LOSS_TYPE: '' + SCALING_FACTOR: 60.0 + MARGIN: 0.35 INPUT: IMG_SIZE: [256, 128] PROB: 0.5 # random horizontal flip RE_PROB: 0.5 # random erasing + RE_MAX_RATIO: 0.4 # correspond to actual baseline PADDING: 10 DATASETS: @@ -25,12 +32,19 @@ SOLVER: OPTIMIZER_NAME: 'Adam' MAX_EPOCHS: 120 BASE_LR: 0.00035 + MARGIN: 0.3 - CENTER_LR: 0.5 - CENTER_LOSS_WEIGHT: 0.0005 + CENTER_LOSS: + USE: True + LR: 0.5 + # ALPHA: 0.5 + WEIGHT: 0.0005 + NUM_FEATS: 2048 - WEIGHT_DECAY: 0.0005 + WEIGHT_DECAY: 5e-4 IMS_PER_BATCH: 64 + # IMS_PER_BATCH: 32 + STEPS: [40, 70] GAMMA: 0.1 @@ -39,16 +53,26 @@ SOLVER: WARMUP_ITERS: 10 WARMUP_METHOD: 'linear' - CHECKPOINT_PERIOD: 40 + CHECKPOINT_PERIOD: 20 LOG_PERIOD: 20 EVAL_PERIOD: 40 TEST: - IMS_PER_BATCH: 128 + IMS_PER_BATCH: 64 RE_RANKING: 'off' WEIGHT: "path" FEAT_NORM: 'on' EVALUATE_ONLY: 'off' +VISUALIZE : + OPTION : "off" + IMS_PER_BATCH : 256 + NEED_NEW_FEAT_EMBED : "off" + INDEX : 0 + TOP_RANK : 10 + RE_RANK : "off" + CAM_OPTION : "none" +EMBEDDING_PROJECTOR: + OPTION: "off" OUTPUT_DIR: "./log/market1501/Experiment-AGW-baseline" diff --git a/configs/arcface_baseline.yml b/configs/arcface_baseline.yml new file mode 100644 index 0000000..ad7e1c6 --- /dev/null +++ b/configs/arcface_baseline.yml @@ -0,0 +1,83 @@ +MODEL: + PRETRAIN_CHOICE: 'imagenet' + # PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth" + PRETRAIN_PATH: '/home/thanit456/torch/weights/resnet50-19c8e357.pth' + TRANSFER_MODE : 'off' + BACKBONE: 'resnet50_nl' + WEIGHT_REGULARIZED_TRIPLET: 'on' + USE_COS: True + USE_DROPOUT: False + BASELINE: + POOL_TYPE: 'gem' + COSINE_LOSS_TYPE: 'ArcFace' + SCALING_FACTOR: 30.0 + MARGIN: 0.5 + # ? Lack of SEPNORM + +INPUT: + IMG_SIZE: [256, 128] + PROB: 0.5 # random horizontal flip + RE_PROB: 0.5 # random erasing + RE_MAX_RATIO: 0.4 # correspond to actual baseline + PADDING: 10 + +DATASETS: + NAMES: ('market1501') + +DATALOADER: + PK_SAMPLER: 'on' + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + +SOLVER: + OPTIMIZER_NAME: 'Adam' + MAX_EPOCHS: 120 + BASE_LR: 0.00035 + MARGIN: 0.3 + MOMENTUM: 0.9 + + # Center loss will always force using SGD optimizer + CENTER_LOSS: + USE: False + LR: 0.5 + # ALPHA: 0.5 + WEIGHT: 0.0005 + NUM_FEATS: 2048 + + WEIGHT_DECAY: 5e-4 + IMS_PER_BATCH: 64 + # IMS_PER_BATCH: 32 + + + STEPS: [40, 70] + GAMMA: 0.1 + + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: 'linear' + + CHECKPOINT_PERIOD: 20 + LOG_PERIOD: 20 + EVAL_PERIOD: 20 + +TEST: + IMS_PER_BATCH: 64 + RE_RANKING: 'off' + WEIGHT: "path" + FEAT_NORM: 'on' + EVALUATE_ONLY: 'off' +VISUALIZE : + OPTION : "off" + IMS_PER_BATCH : 256 + NEED_NEW_FEAT_EMBED : "off" + INDEX : 0 + TOP_RANK : 10 + RE_RANK : "off" + CAM_OPTION : "none" +EMBEDDING_PROJECTOR: + OPTION: "off" + +OUTPUT_DIR: "./log/market1501/Experiment-AGW-baseline" + + + \ No newline at end of file diff --git a/configs/arcface_center.yml b/configs/arcface_center.yml new file mode 100644 index 0000000..fad8d68 --- /dev/null +++ b/configs/arcface_center.yml @@ -0,0 +1,83 @@ +MODEL: + PRETRAIN_CHOICE: 'imagenet' + # PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth" + PRETRAIN_PATH: '/home/thanit456/torch/weights/resnet50-19c8e357.pth' + TRANSFER_MODE : 'off' + BACKBONE: 'resnet50_nl' + WEIGHT_REGULARIZED_TRIPLET: 'on' + USE_COS: True + USE_DROPOUT: False + BASELINE: + POOL_TYPE: 'gem' + COSINE_LOSS_TYPE: 'ArcFace' + SCALING_FACTOR: 30.0 + MARGIN: 0.5 + # ? Lack of SEPNORM + +INPUT: + IMG_SIZE: [256, 128] + PROB: 0.5 # random horizontal flip + RE_PROB: 0.5 # random erasing + RE_MAX_RATIO: 0.4 # correspond to actual baseline + PADDING: 10 + +DATASETS: + NAMES: ('market1501') + +DATALOADER: + PK_SAMPLER: 'on' + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + +SOLVER: + OPTIMIZER_NAME: 'Adam' + MAX_EPOCHS: 120 + BASE_LR: 0.00035 + MARGIN: 0.3 + MOMENTUM: 0.9 + + # Center loss will always force using SGD optimizer + CENTER_LOSS: + USE: True + LR: 0.5 + # ALPHA: 0.5 + WEIGHT: 0.0005 + NUM_FEATS: 2048 + + WEIGHT_DECAY: 5e-4 + IMS_PER_BATCH: 64 + # IMS_PER_BATCH: 32 + + + STEPS: [40, 70] + GAMMA: 0.1 + + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: 'linear' + + CHECKPOINT_PERIOD: 20 + LOG_PERIOD: 20 + EVAL_PERIOD: 20 + +TEST: + IMS_PER_BATCH: 64 + RE_RANKING: 'off' + WEIGHT: "path" + FEAT_NORM: 'on' + EVALUATE_ONLY: 'off' +VISUALIZE : + OPTION : "off" + IMS_PER_BATCH : 256 + NEED_NEW_FEAT_EMBED : "off" + INDEX : 0 + TOP_RANK : 10 + RE_RANK : "off" + CAM_OPTION : "none" +EMBEDDING_PROJECTOR: + OPTION: "off" + +OUTPUT_DIR: "./log/market1501/Experiment-AGW-baseline" + + + \ No newline at end of file diff --git a/configs/cosface_center.yml b/configs/cosface_center.yml new file mode 100644 index 0000000..de8d9f6 --- /dev/null +++ b/configs/cosface_center.yml @@ -0,0 +1,82 @@ +MODEL: + PRETRAIN_CHOICE: 'imagenet' + # PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth" + PRETRAIN_PATH: '/home/thanit456/torch/weights/resnet50-19c8e357.pth' + TRANSFER_MODE : 'off' + BACKBONE: 'resnet50_nl' + WEIGHT_REGULARIZED_TRIPLET: 'on' + USE_COS: True + USE_DROPOUT: False + BASELINE: + POOL_TYPE: 'avg' + COSINE_LOSS_TYPE: 'CosFace' + SCALING_FACTOR: 60.0 + MARGIN: 0.35 + +INPUT: + IMG_SIZE: [256, 128] + PROB: 0.5 # random horizontal flip + RE_PROB: 0.5 # random erasing + RE_MAX_RATIO: 0.2 + PADDING: 10 + +DATASETS: + NAMES: ('market1501') + +DATALOADER: + PK_SAMPLER: 'on' + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + +SOLVER: + OPTIMIZER_NAME: 'Adam' + MAX_EPOCHS: 180 + BASE_LR: 3e-4 + MARGIN: 0.3 + MOMENTUM: 0.0 + + # Center loss will always force using SGD optimizer + CENTER_LOSS: + USE: True + LR: 0.5 + # ALPHA: 0.5 + WEIGHT: 0.0005 + NUM_FEATS: 2048 + + WEIGHT_DECAY: 0.0 + IMS_PER_BATCH: 64 + # IMS_PER_BATCH: 32 + + + STEPS: [40, 70] + GAMMA: 0.1 + + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: 'linear' + + CHECKPOINT_PERIOD: 20 + LOG_PERIOD: 180 + EVAL_PERIOD: 20 + +TEST: + IMS_PER_BATCH: 64 + RE_RANKING: 'off' + WEIGHT: "path" + FEAT_NORM: 'on' + EVALUATE_ONLY: 'off' +VISUALIZE : + OPTION : "off" + IMS_PER_BATCH : 256 + NEED_NEW_FEAT_EMBED : "off" + INDEX : 0 + TOP_RANK : 10 + RE_RANK : "off" + CAM_OPTION : "none" +EMBEDDING_PROJECTOR: + OPTION: "off" + +OUTPUT_DIR: "./log/market1501/Experiment-AGW-baseline" + + + \ No newline at end of file diff --git a/data/build.py b/data/build.py index c6406ae..e76e077 100644 --- a/data/build.py +++ b/data/build.py @@ -3,30 +3,42 @@ import torch from torch.utils.data import DataLoader -from .datasets import init_dataset, ImageDataset +from .datasets import init_dataset, ImageDataset, ImageNoLabelDataset from .triplet_sampler import RandomIdentitySampler from .transforms import build_transforms - +# ASK : def train_collate_fn(batch): imgs, pids, _, _, = zip(*batch) pids = torch.tensor(pids, dtype=torch.int64) return torch.stack(imgs, dim=0), pids - def val_collate_fn(batch): imgs, pids, camids, _ = zip(*batch) return torch.stack(imgs, dim=0), pids, camids +def val_no_label_collate_fn(batch) : + imgs, camids, dates, _ = zip(*batch) + return torch.stack(imgs, dim=0), camids, dates def make_data_loader(cfg): transforms = build_transforms(cfg) dataset = init_dataset(cfg.DATASETS.NAMES, root=cfg.DATASETS.ROOT_DIR) - - num_classes = dataset.num_train_pids num_workers = cfg.DATALOADER.NUM_WORKERS + if cfg.VISUALIZE.OPTION == "on_no_label" : + gallery_set = ImageNoLabelDataset( dataset.gallery, transforms['eval']) + print(gallery_set.dataset[0]) + data_loader={} + data_loader['gallery'] = DataLoader( + gallery_set, batch_size=cfg.VISUALIZE.IMS_PER_BATCH, shuffle=False, num_workers=num_workers, + collate_fn=val_no_label_collate_fn + ) + return data_loader + # number of identities + num_classes = dataset.num_train_pids train_set = ImageDataset(dataset.train, transforms['train']) data_loader={} + # ASK : what is PK_SAMPLER, collate_fm if cfg.DATALOADER.PK_SAMPLER == 'on': data_loader['train'] = DataLoader( train_set, batch_size=cfg.SOLVER.IMS_PER_BATCH, @@ -44,4 +56,16 @@ def make_data_loader(cfg): eval_set, batch_size=cfg.TEST.IMS_PER_BATCH, shuffle=False, num_workers=num_workers, collate_fn=val_collate_fn ) + if cfg.VISUALIZE.OPTION == "on" and cfg.EMBEDDING_PROJECTOR.OPTION == "off" : + query_set = ImageDataset(dataset.query , transforms['eval']) + gallery_set = ImageDataset( dataset.gallery, transforms['eval']) + data_loader['query'] = DataLoader( + query_set, batch_size=cfg.VISUALIZE.IMS_PER_BATCH, shuffle=False, num_workers=num_workers, + collate_fn=val_collate_fn + ) + data_loader['gallery'] = DataLoader( + gallery_set, batch_size=cfg.VISUALIZE.IMS_PER_BATCH, shuffle=False, num_workers=num_workers, + collate_fn=val_collate_fn + ) + return data_loader, len(dataset.query), num_classes diff --git a/data/datasets/__init__.py b/data/datasets/__init__.py index 2ef2f0e..0e24fc5 100644 --- a/data/datasets/__init__.py +++ b/data/datasets/__init__.py @@ -5,7 +5,9 @@ from .market1501 import Market1501 from .msmt17 import MSMT17 from .veri import VeRi -from .dataset_loader import ImageDataset +from .oxygen1 import Oxygen_1 +from .oxygen import Oxygen +from .dataset_loader import ImageDataset,ImageNoLabelDataset __factory = { 'market1501': Market1501, @@ -13,6 +15,8 @@ 'dukemtmc': DukeMTMCreID, 'msmt17': MSMT17, 'veri': VeRi, + 'oxygen1' : Oxygen_1, + 'oxygen' : Oxygen } diff --git a/data/datasets/bases.py b/data/datasets/bases.py index e225319..268c870 100644 --- a/data/datasets/bases.py +++ b/data/datasets/bases.py @@ -67,14 +67,12 @@ class BaseVideoDataset(BaseDataset): def print_dataset_statistics(self, train, query, gallery): num_train_pids, num_train_tracklets, num_train_cams, train_tracklet_stats = \ self.get_videodata_info(train, return_tracklet_stats=True) - num_query_pids, num_query_tracklets, num_query_cams, query_tracklet_stats = \ self.get_videodata_info(query, return_tracklet_stats=True) - num_gallery_pids, num_gallery_tracklets, num_gallery_cams, gallery_tracklet_stats = \ self.get_videodata_info(gallery, return_tracklet_stats=True) - tracklet_stats = train_tracklet_stats + query_tracklet_stats + gallery_tracklet_stats + min_num = np.min(tracklet_stats) max_num = np.max(tracklet_stats) avg_num = np.mean(tracklet_stats) diff --git a/data/datasets/cuhk03.py b/data/datasets/cuhk03.py index 57bf8e3..2316451 100644 --- a/data/datasets/cuhk03.py +++ b/data/datasets/cuhk03.py @@ -3,7 +3,9 @@ import h5py import os.path as osp from scipy.io import loadmat -from scipy.misc import imsave +# from scipy.misc import imsave +from imageio import imwrite as imsave + from utils.iotools import mkdir_if_missing, write_json, read_json from .bases import BaseImageDataset diff --git a/data/datasets/dataset_loader.py b/data/datasets/dataset_loader.py index c988165..96dbfc7 100644 --- a/data/datasets/dataset_loader.py +++ b/data/datasets/dataset_loader.py @@ -36,7 +36,31 @@ def __getitem__(self, index): img_path, pid, camid = self.dataset[index] img = read_image(img_path) + # while it needs to transform everytime that want to get item if self.transform is not None: img = self.transform(img) return img, pid, camid, img_path + +class ImageNoLabelDataset(Dataset): + """Image Person ReID Dataset""" + + def __init__(self, dataset, transform=None): + print("init dataset") + self.dataset = dataset + # TODO compute new transform + self.transform = transform + # self.transform = None + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + img_path, camid, date = self.dataset[index] + img = read_image(img_path) + + # while it needs to transform everytime that want to get item + if self.transform is not None: + img = self.transform(img) + + return img, camid, date, img_path diff --git a/data/datasets/market1501.py b/data/datasets/market1501.py index e2149cd..7655717 100644 --- a/data/datasets/market1501.py +++ b/data/datasets/market1501.py @@ -60,7 +60,6 @@ def _check_before_run(self): def _process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') - pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) diff --git a/data/datasets/msmt17.py b/data/datasets/msmt17.py index 6e5ee63..b6328e7 100644 --- a/data/datasets/msmt17.py +++ b/data/datasets/msmt17.py @@ -1,85 +1,85 @@ -# encoding: utf-8 -import glob -import re - -import os.path as osp - -from .bases import BaseImageDataset - - -class MSMT17(BaseImageDataset): - """ - MSMT17 - - Reference: - Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018. - - URL: http://www.pkuvmc.com/publications/msmt17.html - - Dataset statistics: - # identities: 4101 - # images: 32621 (train) + 11659 (query) + 82161 (gallery) - # cameras: 15 - """ - dataset_dir = 'msmt17' - - def __init__(self,root='./toDataset', verbose=True, **kwargs): - super(MSMT17, self).__init__() - self.dataset_dir = osp.join(root, self.dataset_dir) - self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') - self.query_dir = osp.join(self.dataset_dir, 'query') - self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') - - self._check_before_run() - - train = self._process_dir(self.train_dir, relabel=True) - query = self._process_dir(self.query_dir, relabel=False) - gallery = self._process_dir(self.gallery_dir, relabel=False) - - if verbose: - print("=> MSMT17 loaded") - self.print_dataset_statistics(train, query, gallery) - - self.train = train - self.query = query - self.gallery = gallery - - self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) - self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) - self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - - def _check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError("'{}' is not available".format(self.dataset_dir)) - if not osp.exists(self.train_dir): - raise RuntimeError("'{}' is not available".format(self.train_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError("'{}' is not available".format(self.query_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError("'{}' is not available".format(self.query_dir)) - if not osp.exists(self.gallery_dir): - raise RuntimeError("'{}' is not available".format(self.gallery_dir)) - - def _process_dir(self, dir_path, relabel=False): - img_paths = glob.glob(osp.join(dir_path, '*.jpg')) - pattern = re.compile(r'([-\d]+)_c(\d)') - - pid_container = set() - for img_path in img_paths: - pid, _ = map(int, pattern.search(img_path).groups()) - if pid == -1: continue # junk images are just ignored - pid_container.add(pid) - pid2label = {pid: label for label, pid in enumerate(pid_container)} - - dataset = [] - for img_path in img_paths: - pid, camid = map(int, pattern.search(img_path).groups()) - # if pid == -1: continue # junk images are just ignored - # assert 0 <= pid <= 1501 # pid == 0 means background - # assert 1 <= camid <= 6 - camid -= 1 # index starts from 0 - if relabel: pid = pid2label[pid] - dataset.append((img_path, pid, camid)) - +# encoding: utf-8 +import glob +import re + +import os.path as osp + +from .bases import BaseImageDataset + + +class MSMT17(BaseImageDataset): + """ + MSMT17 + + Reference: + Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018. + + URL: http://www.pkuvmc.com/publications/msmt17.html + + Dataset statistics: + # identities: 4101 + # images: 32621 (train) + 11659 (query) + 82161 (gallery) + # cameras: 15 + """ + dataset_dir = 'msmt17' + + def __init__(self,root='./toDataset', verbose=True, **kwargs): + super(MSMT17, self).__init__() + self.dataset_dir = osp.join(root, self.dataset_dir) + self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') + self.query_dir = osp.join(self.dataset_dir, 'query') + self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') + + self._check_before_run() + + train = self._process_dir(self.train_dir, relabel=True) + query = self._process_dir(self.query_dir, relabel=False) + gallery = self._process_dir(self.gallery_dir, relabel=False) + + if verbose: + print("=> MSMT17 loaded") + self.print_dataset_statistics(train, query, gallery) + + self.train = train + self.query = query + self.gallery = gallery + + self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) + self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) + self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) + + def _check_before_run(self): + """Check if all files are available before going deeper""" + if not osp.exists(self.dataset_dir): + raise RuntimeError("'{}' is not available".format(self.dataset_dir)) + if not osp.exists(self.train_dir): + raise RuntimeError("'{}' is not available".format(self.train_dir)) + if not osp.exists(self.query_dir): + raise RuntimeError("'{}' is not available".format(self.query_dir)) + if not osp.exists(self.query_dir): + raise RuntimeError("'{}' is not available".format(self.query_dir)) + if not osp.exists(self.gallery_dir): + raise RuntimeError("'{}' is not available".format(self.gallery_dir)) + + def _process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_c(\d)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + if pid == -1: continue # junk images are just ignored + pid_container.add(pid) + pid2label = {pid: label for label, pid in enumerate(pid_container)} + + dataset = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + # if pid == -1: continue # junk images are just ignored + # assert 0 <= pid <= 1501 # pid == 0 means background + # assert 1 <= camid <= 6 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + dataset.append((img_path, pid, camid)) + return dataset \ No newline at end of file diff --git a/data/datasets/oxygen.py b/data/datasets/oxygen.py new file mode 100644 index 0000000..4dfda2d --- /dev/null +++ b/data/datasets/oxygen.py @@ -0,0 +1,76 @@ +# encoding: utf-8 + +import glob +import re +import os.path as osp +from .bases import BaseImageDataset + +class Oxygen(BaseImageDataset): + """ + Oxygen + Dataset statistics: + # identities: 1501 (+1 for background) + # images: 12936 (train) + 3368 (query) + 15913 (gallery) + """ + dataset_dir = 'oxygen' + + def __init__(self, root='./toDataset', verbose=True, **kwargs): + super(Oxygen, self).__init__() + self.dataset_dir = osp.join(root, self.dataset_dir) + self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') + self.query_dir = osp.join(self.dataset_dir, 'query') + self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') + + self._check_before_run() + print(self.train_dir) + train = self._process_dir(self.train_dir, relabel=True) + query = self._process_dir(self.query_dir, relabel=False) + gallery = self._process_dir(self.gallery_dir, relabel=False) + + if verbose: + print("=> Oxygen loaded") + self.print_dataset_statistics(train, query, gallery) + + self.train = train + self.query = query + self.gallery = gallery + + self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) + self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) + self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) + + def _check_before_run(self): + """Check if all files are available before going deeper""" + if not osp.exists(self.dataset_dir): + raise RuntimeError("'{}' is not available".format(self.dataset_dir)) + if not osp.exists(self.train_dir): + raise RuntimeError("'{}' is not available".format(self.train_dir)) + if not osp.exists(self.query_dir): + raise RuntimeError("'{}' is not available".format(self.query_dir)) + if not osp.exists(self.gallery_dir): + raise RuntimeError("'{}' is not available".format(self.gallery_dir)) + + def _process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_(.*)_') + + pid_container = set() + for img_path in img_paths: + # print(pattern.search(img_path).groups()) + pid = int(pattern.search(img_path).groups()[0]) + if pid == -1: continue # junk images are just ignored + pid_container.add(pid) + pid2label = {pid: label for label, pid in enumerate(pid_container)} + dataset = [] + for img_path in img_paths: + grp = pattern.search(img_path).groups() + pid, camid = (int(grp[0]), grp[1]) + # pid, camid = map(int, pattern.search(img_path).groups()) + # print(pid,camid) + if pid == -1: continue # junk images are just ignored + # assert 0 <= pid <= 1501 # pid == 0 means background + # assert 1 <= camid <= 6 + # camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + dataset.append((img_path, pid, camid)) + return dataset diff --git a/data/datasets/oxygen1.py b/data/datasets/oxygen1.py new file mode 100644 index 0000000..1303a56 --- /dev/null +++ b/data/datasets/oxygen1.py @@ -0,0 +1,70 @@ +# encoding: utf-8 + +import glob +import re +import os +import os.path as osp +from os.path import join + +from .bases import BaseImageDataset + +class Oxygen_1(BaseImageDataset): + """ + Market1501 + Reference: + Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. + URL: http://www.liangzheng.org/Project/project_reid.html + + Dataset statistics: + # identities: 1501 (+1 for background) + # images: 12936 (train) + 3368 (query) + 15913 (gallery) + """ + dataset_dir = 'oxygen1' + + def __init__(self, root='./toDataset', verbose=True, **kwargs): + super(Oxygen_1, self).__init__() + # global print_dataset_statistics + self.dataset_dir = osp.join(root, self.dataset_dir) + self.gallery_dir = osp.join(self.dataset_dir, 'gallery/') + + self._check_before_run() + + # debug here + gallery = self._process_dir(self.gallery_dir, relabel=False) + + if verbose: + print("=> Oxygen Gallery loaded") + + self.gallery = gallery + self.num_gallery_imgs, self.num_gallery_cams, self.num_gallery_dates = self._get_imagedata_info(self.gallery) + + def _get_imagedata_info(self, data): + cams, dates = [], [] + for _, cam, date in data: + cams += [cam] + dates += [date] + cams = set(cams) + dates = set(dates) + num_dates = len(dates) + num_cams = len(cams) + num_imgs = len(data) + return num_imgs , num_cams , num_dates + + def _check_before_run(self): + """Check if all files are available before going deeper""" + if not osp.exists(self.dataset_dir): + raise RuntimeError("'{}' is not available".format(self.dataset_dir)) + if not osp.exists(self.gallery_dir): + raise RuntimeError("'{}' is not available".format(self.gallery_dir)) + + # TODO : 1 mbk-(/d+)-(/d+) + # data loader without label or pid + def _process_dir(self, dir_path, relabel=False): + # print(dir_path) + all_folder = os.listdir(dir_path) + img_paths = [fs for files in [glob.glob(osp.join(dir_path+folder, '*.jpg')) for folder in all_folder] for fs in files] + + # (path, cam, date) + # without validate cam id or date format + dataset = [(img_path, img_path.split("/")[-2], img_path.split("/")[-1][:10]) for img_path in img_paths] + return dataset diff --git a/data/datasets/veri.py b/data/datasets/veri.py index 047ec46..b976e24 100644 --- a/data/datasets/veri.py +++ b/data/datasets/veri.py @@ -1,84 +1,83 @@ -# encoding: utf-8 -import glob -import re - -import os.path as osp - -from .bases import BaseImageDataset - - -class VeRi(BaseImageDataset): - """ - VeRi-776 - Reference: - Liu, Xinchen, et al. "Large-scale vehicle re-identification in urban surveillance videos." ICME 2016. - - URL:https://vehiclereid.github.io/VeRi/ - - Dataset statistics: - # identities: 776 - # images: 37778 (train) + 1678 (query) + 11579 (gallery) - # cameras: 20 - """ - - dataset_dir = 'veri' - - def __init__(self, root='./toDataset', verbose=True, **kwargs): - super(VeRi, self).__init__() - self.dataset_dir = osp.join(root, self.dataset_dir) - self.train_dir = osp.join(self.dataset_dir, 'image_train') - self.query_dir = osp.join(self.dataset_dir, 'image_query') - self.gallery_dir = osp.join(self.dataset_dir, 'image_test') - - self._check_before_run() - - train = self._process_dir(self.train_dir, relabel=True) - query = self._process_dir(self.query_dir, relabel=False) - gallery = self._process_dir(self.gallery_dir, relabel=False) - - if verbose: - print("=> VeRi-776 loaded") - self.print_dataset_statistics(train, query, gallery) - - self.train = train - self.query = query - self.gallery = gallery - - self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) - self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) - self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) - - def _check_before_run(self): - """Check if all files are available before going deeper""" - if not osp.exists(self.dataset_dir): - raise RuntimeError("'{}' is not available".format(self.dataset_dir)) - if not osp.exists(self.train_dir): - raise RuntimeError("'{}' is not available".format(self.train_dir)) - if not osp.exists(self.query_dir): - raise RuntimeError("'{}' is not available".format(self.query_dir)) - if not osp.exists(self.gallery_dir): - raise RuntimeError("'{}' is not available".format(self.gallery_dir)) - - def _process_dir(self, dir_path, relabel=False): - img_paths = glob.glob(osp.join(dir_path, '*.jpg')) - pattern = re.compile(r'([-\d]+)_c(\d+)') - - pid_container = set() - for img_path in img_paths: - pid, _ = map(int, pattern.search(img_path).groups()) - if pid == -1: continue # junk images are just ignored - pid_container.add(pid) - pid2label = {pid: label for label, pid in enumerate(pid_container)} - - dataset = [] - for img_path in img_paths: - pid, camid = map(int, pattern.search(img_path).groups()) - if pid == -1: continue # junk images are just ignored - assert 0 <= pid <= 776 # pid == 0 means background - assert 1 <= camid <= 20 - camid -= 1 # index starts from 0 - if relabel: pid = pid2label[pid] - dataset.append((img_path, pid, camid)) - - return dataset - +# encoding: utf-8 +import glob +import re + +import os.path as osp + +from .bases import BaseImageDataset + +class VeRi(BaseImageDataset): + """ + VeRi-776 + Reference: + Liu, Xinchen, et al. "Large-scale vehicle re-identification in urban surveillance videos." ICME 2016. + + URL:https://vehiclereid.github.io/VeRi/ + + Dataset statistics: + # identities: 776 + # images: 37778 (train) + 1678 (query) + 11579 (gallery) + # cameras: 20 + """ + + dataset_dir = 'veri' + + def __init__(self, root='./toDataset', verbose=True, **kwargs): + super(VeRi, self).__init__() + self.dataset_dir = osp.join(root, self.dataset_dir) + self.train_dir = osp.join(self.dataset_dir, 'image_train') + self.query_dir = osp.join(self.dataset_dir, 'image_query') + self.gallery_dir = osp.join(self.dataset_dir, 'image_test') + + self._check_before_run() + + train = self._process_dir(self.train_dir, relabel=True) + query = self._process_dir(self.query_dir, relabel=False) + gallery = self._process_dir(self.gallery_dir, relabel=False) + + if verbose: + print("=> VeRi-776 loaded") + self.print_dataset_statistics(train, query, gallery) + + self.train = train + self.query = query + self.gallery = gallery + + self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) + self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) + self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery) + + def _check_before_run(self): + """Check if all files are available before going deeper""" + if not osp.exists(self.dataset_dir): + raise RuntimeError("'{}' is not available".format(self.dataset_dir)) + if not osp.exists(self.train_dir): + raise RuntimeError("'{}' is not available".format(self.train_dir)) + if not osp.exists(self.query_dir): + raise RuntimeError("'{}' is not available".format(self.query_dir)) + if not osp.exists(self.gallery_dir): + raise RuntimeError("'{}' is not available".format(self.gallery_dir)) + + def _process_dir(self, dir_path, relabel=False): + img_paths = glob.glob(osp.join(dir_path, '*.jpg')) + pattern = re.compile(r'([-\d]+)_c(\d+)') + + pid_container = set() + for img_path in img_paths: + pid, _ = map(int, pattern.search(img_path).groups()) + if pid == -1: continue # junk images are just ignored + pid_container.add(pid) + pid2label = {pid: label for label, pid in enumerate(pid_container)} + + dataset = [] + for img_path in img_paths: + pid, camid = map(int, pattern.search(img_path).groups()) + if pid == -1: continue # junk images are just ignored + assert 0 <= pid <= 776 # pid == 0 means background + assert 1 <= camid <= 20 + camid -= 1 # index starts from 0 + if relabel: pid = pid2label[pid] + dataset.append((img_path, pid, camid)) + + return dataset + diff --git a/data/transforms.py b/data/transforms.py index 8c40a6e..8ebb555 100644 --- a/data/transforms.py +++ b/data/transforms.py @@ -15,7 +15,7 @@ def build_transforms(cfg): T.RandomCrop(cfg.INPUT.IMG_SIZE), T.ToTensor(), normalize_transform, - RandomErasing(probability=cfg.INPUT.RE_PROB, mean=cfg.INPUT.PIXEL_MEAN) + RandomErasing(probability=cfg.INPUT.RE_PROB, mean=cfg.INPUT.PIXEL_MEAN, sh=cfg.INPUT.RE_MAX_RATIO) ]) transforms['eval'] = T.Compose([ diff --git a/embedding_projector.sh b/embedding_projector.sh new file mode 100644 index 0000000..7ab3d3e --- /dev/null +++ b/embedding_projector.sh @@ -0,0 +1,5 @@ +python tools/main.py --config_file='configs/AGW_baseline.yml' MODEL.DEVICE_ID "('0')" \ + DATASETS.NAMES "('market1501')" MODEL.PRETRAIN_CHOICE "('self')" \ + TEST.WEIGHT "('./log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth')" TEST.EVALUATE_ONLY "('on')" \ + EMBEDDING_PROJECTOR.OPTION "('on')" \ + OUTPUT_DIR "('./log/market1501/local-AGW-baseline/test_embedding_projector')" \ No newline at end of file diff --git a/engine/__init__.py b/engine/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/engine/inferencer.py b/engine/inferencer.py new file mode 100644 index 0000000..e69de29 diff --git a/engine/trainer.py b/engine/trainer.py new file mode 100644 index 0000000..e69de29 diff --git a/evaluate/__init__.py b/evaluate/__init__.py new file mode 100644 index 0000000..f735821 --- /dev/null +++ b/evaluate/__init__.py @@ -0,0 +1,12 @@ +import torch + +from .eval_reid import eval_func + +def euclidean_dist(x, y): + m, n = x.size(0), y.size(0) + xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) + yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() + dist = xx + yy + dist.addmm_(1, -2, x, y.t()) + dist = dist.clamp(min=1e-12).sqrt() + return dist diff --git a/evaluate/eval_reid.py b/evaluate/eval_reid.py new file mode 100644 index 0000000..682d689 --- /dev/null +++ b/evaluate/eval_reid.py @@ -0,0 +1,65 @@ +import numpy as np + +def eval_func(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=200): + """Evaluation with market1501 metric + Key: for each query identity, its gallery images from the same camera view are discarded. + """ + num_q, num_g = distmat.shape + if num_g < max_rank: + max_rank = num_g + print("Note: number of gallery samples is quite small, got {}".format(num_g)) + indices = np.argsort(distmat, axis=1) + matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) + + # compute cmc curve for each query + all_cmc = [] + all_AP = [] + num_valid_q = 0. # number of valid query + for q_idx in range(num_q): + # get query pid and camid + q_pid = q_pids[q_idx] + q_camid = q_camids[q_idx] + + # remove gallery samples that have the same pid and camid with query + order = indices[q_idx] + remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) + keep = np.invert(remove) + + # compute cmc curve + # binary vector, positions with value 1 are correct matches + orig_cmc = matches[q_idx][keep] + if not np.any(orig_cmc): + # this condition is true when query identity does not appear in gallery + # [update:20191029] divide by query + all_AP.append(0) + continue + + cmc = orig_cmc.cumsum() + cmc[cmc > 1] = 1 + + all_cmc.append(cmc[:max_rank]) + num_valid_q += 1. + + # compute average precision + # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision + # [update:20191029] support for map@max_rank + orig_cmc = orig_cmc[:max_rank] + if not np.any(orig_cmc): + all_AP.append(0) + continue + num_rel = orig_cmc.sum() + tmp_cmc = orig_cmc.cumsum() + tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)] + tmp_cmc = np.asarray(tmp_cmc) * orig_cmc + AP = tmp_cmc.sum() / num_rel + all_AP.append(AP) + + # assert num_valid_q > 0, "Error: all query identities do not appear in gallery" + + all_cmc = np.asarray(all_cmc).astype(np.float32) + # [update:20191029] divide by query + all_cmc = all_cmc.sum(0) / num_q + + mAP = np.mean(all_AP) + + return all_cmc, mAP,all_AP \ No newline at end of file diff --git a/local-AGW-market.sh b/local-AGW-market.sh new file mode 100644 index 0000000..e9e126b --- /dev/null +++ b/local-AGW-market.sh @@ -0,0 +1,12 @@ +# Dataset: market1501 +# imagesize: 256x128 +# batchsize: 16x4 +# warmup_step 10 +# random erase prob 0.5 +# last stride 1 +# with center loss +# weight regularized triplet loss +# generalized mean pooling +# non local blocks +python tools/main.py --config_file='configs/AGW_baseline.yml' MODEL.DEVICE_ID "('0')" \ +DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/local-AGW-baseline')" \ No newline at end of file diff --git a/log/Test/log.txt b/log/Test/log.txt new file mode 100644 index 0000000..bd3ca93 --- /dev/null +++ b/log/Test/log.txt @@ -0,0 +1,117 @@ +2020-09-06 07:21:08,966 reid_baseline INFO: Using 1 GPUS +2020-09-06 07:21:08,966 reid_baseline INFO: Namespace(config_file='configs/AGW_baseline.yml', opts=['MODEL.DEVICE_ID', "('0')", 'DATASETS.NAMES', "('market1501')", 'MODEL.PRETRAIN_CHOICE', "('self')", 'TEST.WEIGHT', "('/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth')", 'VISUALIZE.OPTION', "('on')", 'TEST.EVALUATE_ONLY', "('on')", 'OUTPUT_DIR', "('./log/Test')"]) +2020-09-06 07:21:08,966 reid_baseline INFO: Loaded configuration file configs/AGW_baseline.yml +2020-09-06 07:21:08,967 reid_baseline INFO: +MODEL: + PRETRAIN_CHOICE: 'imagenet' + PRETRAIN_PATH: "/gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/resnet50-19c8e357.pth" + CENTER_LOSS: 'on' + CENTER_FEAT_DIM: 2048 + NAME: 'resnet50_nl' + WEIGHT_REGULARIZED_TRIPLET: 'on' + GENERALIZED_MEAN_POOL: 'on' + +INPUT: + IMG_SIZE: [256, 128] + PROB: 0.5 # random horizontal flip + RE_PROB: 0.5 # random erasing + PADDING: 10 + +DATASETS: + NAMES: ('market1501') + +DATALOADER: + PK_SAMPLER: 'on' + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + +SOLVER: + OPTIMIZER_NAME: 'Adam' + MAX_EPOCHS: 160 + BASE_LR: 0.00035 + + CENTER_LR: 0.5 + CENTER_LOSS_WEIGHT: 0.0005 + + WEIGHT_DECAY: 0.0005 + IMS_PER_BATCH: 64 + + STEPS: [40, 70] + GAMMA: 0.1 + + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: 'linear' + + CHECKPOINT_PERIOD: 40 + LOG_PERIOD: 60 + EVAL_PERIOD: 40 + +TEST: + IMS_PER_BATCH: 64 + RE_RANKING: 'off' + WEIGHT: "path" + FEAT_NORM: 'on' + EVALUATE_ONLY: 'off' +VISUALIZE : + OPTION : "off" + +OUTPUT_DIR: "./log/market1501/Experiment-AGW-baseline" + + + +2020-09-06 07:21:08,968 reid_baseline INFO: Running with config: +DATALOADER: + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + PK_SAMPLER: on +DATASETS: + NAMES: market1501 + ROOT_DIR: ./toDataset +INPUT: + IMG_SIZE: [256, 128] + PADDING: 10 + PIXEL_MEAN: [0.485, 0.456, 0.406] + PIXEL_STD: [0.229, 0.224, 0.225] + PROB: 0.5 + RE_PROB: 0.5 +MODEL: + CENTER_FEAT_DIM: 2048 + CENTER_LOSS: on + DEVICE: cuda + DEVICE_ID: 0 + GENERALIZED_MEAN_POOL: on + LAST_STRIDE: 1 + NAME: resnet50_nl + PRETRAIN_CHOICE: self + PRETRAIN_PATH: /gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/resnet50-19c8e357.pth + WEIGHT_REGULARIZED_TRIPLET: on +OUTPUT_DIR: ./log/Test +SOLVER: + BASE_LR: 0.00035 + CENTER_LOSS_WEIGHT: 0.0005 + CENTER_LR: 0.5 + CHECKPOINT_PERIOD: 40 + EVAL_PERIOD: 40 + GAMMA: 0.1 + IMS_PER_BATCH: 64 + LOG_PERIOD: 60 + MARGIN: 0.3 + MAX_EPOCHS: 160 + MOMENTUM: 0.9 + OPTIMIZER_NAME: Adam + STEPS: (40, 70) + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: linear + WEIGHT_DECAY: 0.0005 +TEST: + EVALUATE_ONLY: on + FEAT_NORM: on + IMS_PER_BATCH: 64 + RE_RANKING: off + WEIGHT: /gdrive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth +VISUALIZE: + OPTION: on +2020-09-06 07:23:49,533 reid_baseline INFO: Eval and Visualize Only +2020-09-06 07:23:52,738 reid_baseline INFO: Enter inferencing to visualize diff --git a/log/market1501/Experiment-AGW-baseline/log.txt b/log/market1501/Experiment-AGW-baseline/log.txt new file mode 100644 index 0000000..bf039b6 --- /dev/null +++ b/log/market1501/Experiment-AGW-baseline/log.txt @@ -0,0 +1,169 @@ +<<<<<<< Updated upstream +2020-09-24 19:32:15,052 reid_baseline INFO: Using 1 GPUS +2020-09-24 19:32:15,053 reid_baseline INFO: Namespace(config_file='configs/AGW_baseline.yml', opts=['MODEL.DEVICE_ID', "('0')", 'DATASETS.NAMES', "('oxygen')", 'MODEL.TRANSFER_MODE', "('on')", 'MODEL.PRETRAIN_CHOICE', "('self')", 'MODEL.PRETRAIN_PATH', "('./log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth')"]) +2020-09-24 19:32:15,053 reid_baseline INFO: Loaded configuration file configs/AGW_baseline.yml +2020-09-24 19:32:15,053 reid_baseline INFO: +======= +2020-09-25 02:52:40,975 reid_baseline INFO: Using 1 GPUS +2020-09-25 02:52:40,976 reid_baseline INFO: Namespace(config_file='configs/AGW_baseline.yml', opts=['MODEL.DEVICE_ID', "('0')", 'DATASETS.NAMES', "('oxygen')", 'MODEL.TRANSFER_MODE', "('on')", 'MODEL.PRETRAIN_CHOICE', "('self')", 'MODEL.PRETRAIN_PATH', "('./log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth')"]) +2020-09-25 02:52:40,977 reid_baseline INFO: Loaded configuration file configs/AGW_baseline.yml +2020-09-25 02:52:40,978 reid_baseline INFO: +>>>>>>> Stashed changes +MODEL: + PRETRAIN_CHOICE: 'self' + PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth" + # PRETRAIN_PATH: 'C:/Users/60310249/.torch/models/resnet50-19c8e357.pth' + TRANSFER_MODE : 'off' + CENTER_LOSS: 'on' + CENTER_FEAT_DIM: 2048 + NAME: 'resnet50_nl' + WEIGHT_REGULARIZED_TRIPLET: 'on' + GENERALIZED_MEAN_POOL: 'on' + +INPUT: + IMG_SIZE: [256, 128] + PROB: 0.5 # random horizontal flip + RE_PROB: 0.5 # random erasing + PADDING: 10 + +DATASETS: + NAMES: ('market1501') + +DATALOADER: + PK_SAMPLER: 'on' + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + +SOLVER: + OPTIMIZER_NAME: 'Adam' + MAX_EPOCHS: 180 + BASE_LR: 0.00035 + + CENTER_LR: 0.5 + CENTER_LOSS_WEIGHT: 0.0005 + + WEIGHT_DECAY: 0.0005 + IMS_PER_BATCH: 64 + + STEPS: [40, 70] + GAMMA: 0.1 + + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: 'linear' + + CHECKPOINT_PERIOD: 20 + LOG_PERIOD: 180 + EVAL_PERIOD: 20 + +TEST: + IMS_PER_BATCH: 64 + RE_RANKING: 'off' + WEIGHT: "path" + FEAT_NORM: 'on' + EVALUATE_ONLY: 'off' +VISUALIZE : + OPTION : "off" + IMS_PER_BATCH : 256 + NEED_NEW_FEAT_EMBED : "off" + INDEX : 0 + TOP_RANK : 10 + RE_RANK : "off" +EMBEDDING_PROJECTOR: + OPTION: "off" + +OUTPUT_DIR: "./log/market1501/Experiment-AGW-baseline" + + + +<<<<<<< Updated upstream +2020-09-24 19:32:15,054 reid_baseline INFO: Running with config: +======= +2020-09-25 02:52:40,987 reid_baseline INFO: Running with config: +>>>>>>> Stashed changes +DATALOADER: + NUM_INSTANCE: 4 + NUM_WORKERS: 8 + PK_SAMPLER: on +DATASETS: + NAMES: oxygen + ROOT_DIR: ./toDataset +EMBEDDING_PROJECTOR: + OPTION: off +INPUT: + IMG_SIZE: [256, 128] + PADDING: 10 + PIXEL_MEAN: [0.485, 0.456, 0.406] + PIXEL_STD: [0.229, 0.224, 0.225] + PROB: 0.5 + RE_PROB: 0.5 +MODEL: + CENTER_FEAT_DIM: 2048 + CENTER_LOSS: on + DEVICE: cuda + DEVICE_ID: 0 + GENERALIZED_MEAN_POOL: on + LAST_STRIDE: 1 + NAME: resnet50_nl + PRETRAIN_CHOICE: self + PRETRAIN_PATH: ./log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth + TRANSFER_MODE: on + WEIGHT_REGULARIZED_TRIPLET: on +OUTPUT_DIR: ./log/market1501/Experiment-AGW-baseline +SOLVER: + BASE_LR: 0.00035 + CENTER_LOSS_WEIGHT: 0.0005 + CENTER_LR: 0.5 + CHECKPOINT_PERIOD: 20 + EVAL_PERIOD: 20 + GAMMA: 0.1 + IMS_PER_BATCH: 64 + LOG_PERIOD: 180 + MARGIN: 0.3 + MAX_EPOCHS: 180 + MOMENTUM: 0.9 + OPTIMIZER_NAME: Adam + STEPS: (40, 70) + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 10 + WARMUP_METHOD: linear + WEIGHT_DECAY: 0.0005 +TEST: + EVALUATE_ONLY: off + FEAT_NORM: on + IMS_PER_BATCH: 64 + RE_RANKING: off + WEIGHT: path +VISUALIZE: + IMS_PER_BATCH: 256 + INDEX: 0 + NEED_NEW_FEAT_EMBED: off + OPTION: off + RE_RANK: off + TOP_RANK: 10 +<<<<<<< Updated upstream +2020-09-24 19:32:21,094 reid_baseline INFO: Start training +2020-09-24 19:32:21,473 reid_baseline INFO: Epoch 1 done. Time per batch: 0.000[s] Speed: 171.6[samples/s] +2020-09-24 19:32:21,474 reid_baseline INFO: ---------- +2020-09-24 19:32:21,829 reid_baseline INFO: Epoch 2 done. Time per batch: 0.000[s] Speed: 180.2[samples/s] +2020-09-24 19:32:21,830 reid_baseline INFO: ---------- +2020-09-24 19:32:22,197 reid_baseline INFO: Epoch 3 done. Time per batch: 0.000[s] Speed: 174.2[samples/s] +2020-09-24 19:32:22,198 reid_baseline INFO: ---------- +2020-09-24 19:32:22,563 reid_baseline INFO: Epoch 4 done. Time per batch: 0.000[s] Speed: 175.2[samples/s] +2020-09-24 19:32:22,564 reid_baseline INFO: ---------- +2020-09-24 19:32:22,923 reid_baseline INFO: Epoch 5 done. Time per batch: 0.000[s] Speed: 178.2[samples/s] +2020-09-24 19:32:22,923 reid_baseline INFO: ---------- +2020-09-24 19:32:23,288 reid_baseline INFO: Epoch 6 done. Time per batch: 0.000[s] Speed: 175.7[samples/s] +2020-09-24 19:32:23,288 reid_baseline INFO: ---------- +2020-09-24 19:32:23,647 reid_baseline INFO: Epoch 7 done. Time per batch: 0.000[s] Speed: 178.5[samples/s] +2020-09-24 19:32:23,647 reid_baseline INFO: ---------- +2020-09-24 19:32:24,008 reid_baseline INFO: Epoch 8 done. Time per batch: 0.000[s] Speed: 177.7[samples/s] +2020-09-24 19:32:24,008 reid_baseline INFO: ---------- +2020-09-24 19:32:24,370 reid_baseline INFO: Epoch 9 done. Time per batch: 0.000[s] Speed: 176.9[samples/s] +2020-09-24 19:32:24,371 reid_baseline INFO: ---------- +2020-09-24 19:32:24,727 reid_baseline INFO: Epoch 10 done. Time per batch: 0.000[s] Speed: 179.7[samples/s] +2020-09-24 19:32:24,728 reid_baseline INFO: ---------- +2020-09-24 19:32:25,087 reid_baseline INFO: Epoch 11 done. Time per batch: 0.000[s] Speed: 178.3[samples/s] +2020-09-24 19:32:25,087 reid_baseline INFO: ---------- +======= +>>>>>>> Stashed changes diff --git a/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599139867.0c030843616c.5845.0 b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599139867.0c030843616c.5845.0 new file mode 100644 index 0000000..ce049f4 Binary files /dev/null and b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599139867.0c030843616c.5845.0 differ diff --git a/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599141753.0c030843616c.10259.0 b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599141753.0c030843616c.10259.0 new file mode 100644 index 0000000..d0f6980 Binary files /dev/null and b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599141753.0c030843616c.10259.0 differ diff --git a/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599142030.0c030843616c.10963.0 b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599142030.0c030843616c.10963.0 new file mode 100644 index 0000000..47da2bd Binary files /dev/null and b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599142030.0c030843616c.10963.0 differ diff --git a/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599142399.0c030843616c.11861.0 b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599142399.0c030843616c.11861.0 new file mode 100644 index 0000000..1d84e00 Binary files /dev/null and b/log/market1501/Experiment-AGW-baseline/writer/events.out.tfevents.1599142399.0c030843616c.11861.0 differ diff --git a/losses/__init__.py b/losses/__init__.py new file mode 100644 index 0000000..3d43964 --- /dev/null +++ b/losses/__init__.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn + +from .id_loss import CrossEntropyLabelSmooth +from .center_loss import CenterLoss +from .circle_loss import CircleLoss +from .cosine_loss import CosFace, AdaCos, ArcFace +from .triplet_loss import TripletLoss, WeightedRegularizedTriplet +from .smooth_ap_loss import SmoothAP + +def build_loss_fn(cfg, num_classes): + if cfg.MODEL.ID_LOSS_TYPE == 'none': + def id_loss_fn(score, target): + return 0 + else: + id_loss_fn = CrossEntropyLabelSmooth(num_classes=num_classes) + + if cfg.MODEL.METRIC_LOSS_TYPE == 'triplet': + metric_loss_fn = TripletLoss(margin=0.0) + elif cfg.MODEL.METRIC_LOSS_TYPE == 'circle': + metric_loss_fn = CircleLoss(m=cfg.MODEL.METRIC_LOSS.MARGIN, s=cfg.MODEL.METRIC_LOSS.SCALE) + elif cfg.MODEL.METRIC_LOSS_TYPE == 'smoothAP': + assert(cfg.SOLVER.BATCH_SIZE % cfg.DATALOADER.NUM_INSTANCE == 0) + metric_loss_fn = SmoothAP(anneal=0.01, batch_size=cfg.SOLVER.BATCH_SIZE, + num_id=cfg.SOLVER.BATCH_SIZE // cfg.DATALOADER.NUM_INSTANCE, feat_dims=2048 + ) + else: + def metric_loss_fn(feat, target, feat_t, target_t): + return 0 + def loss_func(score, feat, target, feat_t, target_t): + return id_loss_fn(score, target), metric_loss_fn(feat, target, feat_t, target_t) + + return loss_func diff --git a/losses/center_loss.py b/losses/center_loss.py new file mode 100644 index 0000000..0f5fd21 --- /dev/null +++ b/losses/center_loss.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import + +import torch +from torch import nn + + +class CenterLoss(nn.Module): + """Center loss. + + Reference: + Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. + + Args: + num_classes (int): number of classes. + feat_dim (int): feature dimension. + """ + + def __init__(self, num_classes=751, feat_dim=2048, use_gpu=True): + super(CenterLoss, self).__init__() + self.num_classes = num_classes + self.feat_dim = feat_dim + self.use_gpu = use_gpu + + if self.use_gpu: + self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda()) + else: + self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)) + + def forward(self, x, labels): + """ + Args: + x: feature matrix with shape (batch_size, feat_dim). + labels: ground truth labels with shape (num_classes). + """ + assert x.size(0) == labels.size(0), "features.size(0) is not equal to labels.size(0)" + + batch_size = x.size(0) + distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ + torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t() + distmat.addmm_(1, -2, x, self.centers.t()) + + classes = torch.arange(self.num_classes).long() + if self.use_gpu: classes = classes.cuda() + labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) + mask = labels.eq(classes.expand(batch_size, self.num_classes)) + + dist = distmat * mask.float() + loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size + #dist = [] + #for i in range(batch_size): + # value = distmat[i][mask[i]] + # value = value.clamp(min=1e-12, max=1e+12) # for numerical stability + # dist.append(value) + #dist = torch.cat(dist) + #loss = dist.mean() + return loss + + +if __name__ == '__main__': + use_gpu = False + center_loss = CenterLoss(use_gpu=use_gpu) + features = torch.rand(16, 2048) + targets = torch.Tensor([0, 1, 2, 3, 2, 3, 1, 4, 5, 3, 2, 1, 0, 0, 5, 4]).long() + if use_gpu: + features = torch.rand(16, 2048).cuda() + targets = torch.Tensor([0, 1, 2, 3, 2, 3, 1, 4, 5, 3, 2, 1, 0, 0, 5, 4]).cuda() + + loss = center_loss(features, targets) + print(loss) diff --git a/losses/circle_loss.py b/losses/circle_loss.py new file mode 100644 index 0000000..2674d1e --- /dev/null +++ b/losses/circle_loss.py @@ -0,0 +1,38 @@ +import torch +import torch.nn as nn + +class CircleLoss(nn.Module): + def __init__(self, m=0.25, s=96): + super(CircleLoss, self).__init__() + self.m = m + self.s = s + self.soft_plus = nn.Softplus() + + def forward(self, feat1, label1, feat2_t, label2_t): + sim_mat = torch.mm(feat1, feat2_t) + + N, M = sim_mat.size() + + is_pos = label1.view(N, 1).expand(N, M).eq(label2_t.expand(N, M)).float() + + same_indx = torch.eye(N, N, device='cuda') + remain_indx = torch.zeros(N, M - N, device='cuda') + same_indx = torch.cat((same_indx, remain_indx), dim=1) + is_pos = is_pos - same_indx + + is_neg = label1.view(N, 1).expand(N, M).ne(label2_t.expand(N, M)).float() + + s_p = sim_mat * is_pos + s_n = sim_mat * is_neg + + alpha_p = torch.clamp_min(-s_p.detach() + 1 + self.m, min=0.) + alpha_n = torch.clamp_min(s_n.detach() + self.m, min=0.) + delta_p = 1 - self.m + delta_n = self.m + + logit_p = -self.s * alpha_p * (s_p - delta_p) + logit_n = self.s * alpha_n * (s_n - delta_n) + + loss = nn.functional.softplus(torch.logsumexp(logit_p, dim=1) + torch.logsumexp(logit_n, dim=1)).mean() + + return loss \ No newline at end of file diff --git a/losses/cosine_loss.py b/losses/cosine_loss.py new file mode 100644 index 0000000..6e91343 --- /dev/null +++ b/losses/cosine_loss.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Parameter +import math + +class ArcFace(nn.Module): + def __init__(self, num_features, num_classes, s=30.0, m=0.50): + super(ArcFace, self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = s + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + + def forward(self, input, label=None): + # normalized features + x = F.normalize(input) + # normalized weights + W = F.normalize(self.W) + # dot product + logits = F.linear(x, W) + if label is None: + return logits + # add margin + theta = torch.acoss(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7)) + target_logits = torch.cos(theta + self.m) + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + output = logits * (1 - one_hot) + target_logits * one_hot + # feature re-scale + output *= self.s + + return output + +# class ArcCos(nn.Module): +# def __init__(self, in_features, out_features, s=30.0, m=0.50, bias=False) +# super(ArcCos, self).__init__() +# self.in_features = in_features +# self.out_features = out_features +# self.s = s +# self.m = m +# self.cos_m = math.cos(m) +# self.sin_m = math.sin(m) + +# self.th = math.cos(math.pi - m) +# self.mm = math.sin(math.pi - m) * m + +# self.weight = Parameter(torch.Tensor(out_features, in_features)) +# if bias: +# self.bias = Parameter(torch.Tensor(out_features)) +# else: +# self.register_parameter('bias', None) +# self.register_parameters() + +# def reset_parameters(self): +# nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) +# if self.bias is not None: +# fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) +# boud = 1 / math.sqrt(fan_in) +# nn.init.uniform_(self.bias, -bound, bound) + +# def forward(self, input, label): +# # fix for + +class AdaCos(nn.Module): + def __init__(self, num_features, num_classes, m=0.50): + super(AdaCos,self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = math.sqrt(2) * math.log(num_classes - 1) + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + def forward(self, input, label=None): + # normalize features + x = F.normalize(input) + # normalize weights + W = F.normalize(self.W) + # dot product + print('debug logging') + print(x.shape) + print(W.shape) + logits = F.linear(x, W) + if label is None: + return logits + # feature re-scale + theta = torch.acos(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7)) + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + with torch.no_grad(): + B_avg = torch.where(one_hot < 1, torch.exp(self.s * logits), torch.zeros_like(logits)) + B_avg = torch.sum(B_avg) / input.size(0) + theta_med = torch.median(theta[one_hot == 1]) + self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med)) + output = self.s * logits + return output + +class CosFace(nn.Module): + def __init__(self, num_features, num_classes, s=30.0, m=0.35): + super(CosFace, self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = s + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + + def forward(self, input, label=None): + # normalize feature + x = F.normalize(input) + # normalize weights + W = F.normalize(self.W) + # dot product + logits = F.linear(x, W) + if label is None: + return logits + + # # * add margin version + # target_logits = logits - self.m + # one_hot = torch.zeros_like(logits) + # one_hot.scatter_(1, labels.view(-1, 1).long(), 1) + # output = logits - (1 - one_hot) + target_logits * one_hot + + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, abel.view(-1, 1).long(), 1) + output = logits - one_hot * self.m + # feature re-scale + output *= self.s + + return output + + def __repr__(self): + return self.__class__.__name__ +\ + '(' + 'num_features='+'{}'.format(self.num_features) + \ + ','+'num_classes=' + '{}'.format(self.num_classes) +\ + ', ' + 's=' + str(self.s) + \ + ', ' + 'm=' + str(self.m) +\ + ')' \ No newline at end of file diff --git a/losses/id_loss.py b/losses/id_loss.py new file mode 100644 index 0000000..f672bd8 --- /dev/null +++ b/losses/id_loss.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn + +class CrossEntropyLabelSmooth(nn.Module): + """Cross entropy loss with label smoothing regularizer. + + Reference: + Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. + Equation: y = (1 - epsilon) * y + epsilon / K. + + Args: + num_classes (int): number of classes. + epsilon (float): weight. + """ + def __init__(self, num_classes, epsilon=0.1, use_gpu=True): + super(CrossEntropyLabelSmooth, self).__init__() + self.num_classes = num_classes + self.epsilon = epsilon + self.use_gpu = use_gpu + self.logsoftmax = nn.LogSoftmax(dim=1) + + def forward(self, inputs, targets): + """ + Args: + inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) + targets: ground truth labels with shape (num_classes) + """ + log_probs = self.logsoftmax(inputs) + targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1) + if self.use_gpu: targets = targets.cuda() + targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes + loss = (- targets * log_probs).mean(0).sum() + return loss diff --git a/losses/smooth_ap_loss.py b/losses/smooth_ap_loss.py new file mode 100644 index 0000000..6cbc862 --- /dev/null +++ b/losses/smooth_ap_loss.py @@ -0,0 +1,111 @@ +import torch +import torch.nn as nn + +def sigmoid(tensor, temp=1.0): + """ temperature controlled sigmoid + takes as input a torch tensor (tensor) and passes it through a sigmoid, controlled by temperature: temp + """ + exponent = -tensor / temp + # clamp the input tensor for stability + exponent = torch.clamp(exponent, min=-50, max=50) + y = 1.0 / (1.0 + torch.exp(exponent)) + return y + +def compute_aff(x): + """computes the affinity matrix between an input vector and itself""" + return torch.mm(x, x.t()) + +class SmoothAP(nn.Module): + """PyTorch implementation of the Smooth-AP loss. + implementation of the Smooth-AP loss. Takes as input the mini-batch of CNN-produced feature embeddings and returns + the value of the Smooth-AP loss. The mini-batch must be formed of a defined number of classes. Each class must + have the same number of instances represented in the mini-batch and must be ordered sequentially by class. + e.g. the labels for a mini-batch with batch size 9, and 3 represented classes (A,B,C) must look like: + labels = ( A, A, A, B, B, B, C, C, C) + (the order of the classes however does not matter) + For each instance in the mini-batch, the loss computes the Smooth-AP when it is used as the query and the rest of the + mini-batch is used as the retrieval set. The positive set is formed of the other instances in the batch from the + same class. The loss returns the average Smooth-AP across all instances in the mini-batch. + Args: + anneal : float + the temperature of the sigmoid that is used to smooth the ranking function. A low value of the temperature + results in a steep sigmoid, that tightly approximates the heaviside step function in the ranking function. + batch_size : int + the batch size being used during training. + num_id : int + the number of different classes that are represented in the batch. + feat_dims : int + the dimension of the input feature embeddings + Shape: + - Input (preds): (batch_size, feat_dims) (must be a cuda torch float tensor) + - Output: scalar + Examples:: + >>> loss = SmoothAP(0.01, 60, 6, 256) + >>> input = torch.randn(60, 256, requires_grad=True).cuda() + >>> output = loss(input) + >>> output.backward() + """ + + def __init__(self, anneal, batch_size, num_id, feat_dims): + """ + Parameters + ---------- + anneal : float + the temperature of the sigmoid that is used to smooth the ranking function + batch_size : int + the batch size being used + num_id : int + the number of different classes that are represented in the batch + feat_dims : int + the dimension of the input feature embeddings + """ + super(SmoothAP, self).__init__() + + assert(batch_size%num_id==0) + + self.anneal = anneal + self.batch_size = batch_size + self.num_id = num_id + self.feat_dims = feat_dims + + def forward(self, preds, targets=None, placeholder1=None, placeholder2=None): + """Forward pass for all input predictions: preds - (batch_size x feat_dims) """ + + + # ------ differentiable ranking of all retrieval set ------ + # compute the mask which ignores the relevance score of the query to itself + mask = 1.0 - torch.eye(self.batch_size, device='cuda') + mask = mask.unsqueeze(dim=0).repeat(self.batch_size, 1, 1) + # compute the relevance scores via cosine similarity of the CNN-produced embedding vectors + sim_all = compute_aff(preds) + sim_all_repeat = sim_all.unsqueeze(dim=1).repeat(1, self.batch_size, 1) + # compute the difference matrix + sim_diff = sim_all_repeat - sim_all_repeat.permute(0, 2, 1) + # pass through the sigmoid + sim_sg = sigmoid(sim_diff, temp=self.anneal) * mask.cuda() + # compute the rankings + sim_all_rk = torch.sum(sim_sg, dim=-1) + 1 + + # ------ differentiable ranking of only positive set in retrieval set ------ + # compute the mask which only gives non-zero weights to the positive set + xs = preds.view(self.num_id, int(self.batch_size / self.num_id), self.feat_dims) + pos_mask = 1.0 - torch.eye(int(self.batch_size / self.num_id), device='cuda') + pos_mask = pos_mask.unsqueeze(dim=0).unsqueeze(dim=0).repeat(self.num_id, int(self.batch_size / self.num_id), 1, 1) + # compute the relevance scores + sim_pos = torch.bmm(xs, xs.permute(0, 2, 1)) + sim_pos_repeat = sim_pos.unsqueeze(dim=2).repeat(1, 1, int(self.batch_size / self.num_id), 1) + # compute the difference matrix + sim_pos_diff = sim_pos_repeat - sim_pos_repeat.permute(0, 1, 3, 2) + # pass through the sigmoid + sim_pos_sg = sigmoid(sim_pos_diff, temp=self.anneal) * pos_mask.cuda() + # compute the rankings of the positive set + sim_pos_rk = torch.sum(sim_pos_sg, dim=-1) + 1 + + # sum the values of the Smooth-AP for all instances in the mini-batch + ap = torch.zeros(1).cuda() + group = int(self.batch_size / self.num_id) + for ind in range(self.num_id): + pos_divide = torch.sum(sim_pos_rk[ind] / (sim_all_rk[(ind * group):((ind + 1) * group), (ind * group):((ind + 1) * group)])) + ap = ap + ((pos_divide / group) / self.batch_size) + + return (1-ap) \ No newline at end of file diff --git a/losses/triplet_loss.py b/losses/triplet_loss.py new file mode 100644 index 0000000..8d8684b --- /dev/null +++ b/losses/triplet_loss.py @@ -0,0 +1,151 @@ +# encoding: utf-8 +import torch +from torch import nn + + +def normalize(x, axis=-1): + """Normalizing to unit length along the specified dimension. + Args: + x: pytorch Variable + Returns: + x: pytorch Variable, same shape as input + """ + x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12) + return x + + +def euclidean_dist(x, y): + """ + Args: + x: pytorch Variable, with shape [m, d] + y: pytorch Variable, with shape [n, d] + Returns: + dist: pytorch Variable, with shape [m, n] + """ + m, n = x.size(0), y.size(0) + xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) + yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() + dist = xx + yy + dist.addmm_(1, -2, x, y.t()) + dist = dist.clamp(min=1e-12).sqrt() # for numerical stability + return dist + + +def hard_example_mining(dist_mat, labels, return_inds=False): + """For each anchor, find the hardest positive and negative sample. + Args: + dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N] + labels: pytorch LongTensor, with shape [N] + return_inds: whether to return the indices. Save time if `False`(?) + Returns: + dist_ap: pytorch Variable, distance(anchor, positive); shape [N] + dist_an: pytorch Variable, distance(anchor, negative); shape [N] + p_inds: pytorch LongTensor, with shape [N]; + indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 + n_inds: pytorch LongTensor, with shape [N]; + indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 + NOTE: Only consider the case in which all labels have same num of samples, + thus we can cope with all anchors in parallel. + """ + + assert len(dist_mat.size()) == 2 + assert dist_mat.size(0) == dist_mat.size(1) + N = dist_mat.size(0) + + # shape [N, N] + is_pos = labels.expand(N, N).eq(labels.expand(N, N).t()) + is_neg = labels.expand(N, N).ne(labels.expand(N, N).t()) + + # `dist_ap` means distance(anchor, positive) + # both `dist_ap` and `relative_p_inds` with shape [N, 1] + dist_ap, relative_p_inds = torch.max( + dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True) + # `dist_an` means distance(anchor, negative) + # both `dist_an` and `relative_n_inds` with shape [N, 1] + dist_an, relative_n_inds = torch.min( + dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True) + # shape [N] + dist_ap = dist_ap.squeeze(1) + dist_an = dist_an.squeeze(1) + + if return_inds: + # shape [N, N] + ind = (labels.new().resize_as_(labels) + .copy_(torch.arange(0, N).long()) + .unsqueeze(0).expand(N, N)) + # shape [N, 1] + p_inds = torch.gather( + ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data) + n_inds = torch.gather( + ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data) + # shape [N] + p_inds = p_inds.squeeze(1) + n_inds = n_inds.squeeze(1) + return dist_ap, dist_an, p_inds, n_inds + + return dist_ap, dist_an + + +class TripletLoss(object): + """Modified from Tong Xiao's open-reid (https://github.com/Cysu/open-reid). + Related Triplet Loss theory can be found in paper 'In Defense of the Triplet + Loss for Person Re-Identification'.""" + + def __init__(self, margin=None): + self.margin = margin + if margin is not None: + self.ranking_loss = nn.MarginRankingLoss(margin=margin) + else: + self.ranking_loss = nn.SoftMarginLoss() + + def __call__(self, global_feat, labels, normalize_feature=False): + if normalize_feature: + global_feat = normalize(global_feat, axis=-1) + dist_mat = euclidean_dist(global_feat, global_feat) + dist_ap, dist_an = hard_example_mining( + dist_mat, labels) + y = dist_an.new().resize_as_(dist_an).fill_(1) + if self.margin is not None: + loss = self.ranking_loss(dist_an, dist_ap, y) + else: + loss = self.ranking_loss(dist_an - dist_ap, y) + return loss, dist_ap, dist_an + + +def softmax_weights(dist, mask): + max_v = torch.max(dist * mask, dim=1, keepdim=True)[0] + diff = dist - max_v + Z = torch.sum(torch.exp(diff) * mask, dim=1, keepdim=True) + 1e-6 # avoid division by zero + W = torch.exp(diff) * mask / Z + return W + + +class WeightedRegularizedTriplet(object): + + def __init__(self): + self.ranking_loss = nn.SoftMarginLoss() + + def __call__(self, global_feat, labels, normalize_feature=False): + if normalize_feature: + global_feat = normalize(global_feat, axis=-1) + dist_mat = euclidean_dist(global_feat, global_feat) + + N = dist_mat.size(0) + # shape [N, N] + is_pos = labels.expand(N, N).eq(labels.expand(N, N).t()).float() + is_neg = labels.expand(N, N).ne(labels.expand(N, N).t()).float() + + # `dist_ap` means distance(anchor, positive) + # both `dist_ap` and `relative_p_inds` with shape [N, 1] + dist_ap = dist_mat * is_pos + dist_an = dist_mat * is_neg + + weights_ap = softmax_weights(dist_ap, is_pos) + weights_an = softmax_weights(-dist_an, is_neg) + furthest_positive = torch.sum(dist_ap * weights_ap, dim=1) + closest_negative = torch.sum(dist_an * weights_an, dim=1) + + y = furthest_positive.new().resize_as_(furthest_positive).fill_(1) + loss = self.ranking_loss(closest_negative - furthest_positive, y) + + return loss, furthest_positive, closest_negative \ No newline at end of file diff --git a/modeling/__init__.py b/modeling/__init__.py index 0bfc57d..966d9ed 100644 --- a/modeling/__init__.py +++ b/modeling/__init__.py @@ -3,6 +3,16 @@ from .baseline import Baseline def build_model(cfg, num_classes): - model = Baseline(num_classes, cfg.MODEL.LAST_STRIDE, cfg.MODEL.PRETRAIN_PATH, cfg.MODEL.NAME, - cfg.MODEL.GENERALIZED_MEAN_POOL, cfg.MODEL.PRETRAIN_CHOICE) + model = Baseline(num_classes=num_classes, + last_stride=cfg.MODEL.LAST_STRIDE, + model_path=cfg.MODEL.PRETRAIN_PATH, + backbone=cfg.MODEL.BACKBONE, + pool_type=cfg.MODEL.BASELINE.POOL_TYPE, + use_dropout=cfg.MODEL.USE_DROPOUT, + cosine_loss_type=cfg.MODEL.BASELINE.COSINE_LOSS_TYPE, + s=cfg.MODEL.BASELINE.SCALING_FACTOR, + m=cfg.MODEL.BASELINE.MARGIN, + use_bnbias=cfg.MODEL.BASELINE.USE_BNBIAS, + use_sestn=cfg.MODEL.BASELINE.USE_SESTN, + pretrain_choice=cfg.MODEL.PRETRAIN_CHOICE) return model \ No newline at end of file diff --git a/modeling/backbones/efficientnet.py b/modeling/backbones/efficientnet.py new file mode 100644 index 0000000..f90def9 --- /dev/null +++ b/modeling/backbones/efficientnet.py @@ -0,0 +1,301 @@ +"""EfficientNet architecture. + +See: +- https://arxiv.org/abs/1905.11946 - EfficientNet +- https://arxiv.org/abs/1801.04381 - MobileNet V2 +- https://arxiv.org/abs/1905.02244 - MobileNet V3 +- https://arxiv.org/abs/1709.01507 - Squeeze-and-Excitation +- https://arxiv.org/abs/1803.02579 - Concurrent spatial and channel squeeze-and-excitation +- https://arxiv.org/abs/1812.01187 - Bag of Tricks for Image Classification with Convolutional Neural Networks + +Known issues: + +- Not using swish activation function: unclear where, if, and how + much it helps. Needs more experimentation. See also MobileNet V3. + +- Not using squeeze and excitation blocks: I had significantly worse + results with scse blocks, and cse blocks alone did not help, too. + Needs more experimentation as it was done on small datasets only. + +- Not using DropConnect: no efficient native implementation in PyTorch. + Unclear if and how much it helps over Dropout. + + +Todo: + +- See MobileNetV3 paper for tricks + - Figure 5, re-do last stages + - Use only 16 filters in head + +- Pre-train on ImageNet; at least B0, B4 + +- Provide tools for progressive resizing: initialize model n+1 with + weights from model n, and initialize new blocks and channels. + +""" + +import math +import collections + +import torch +import torch.nn as nn + + +EfficientNetParam = collections.namedtuple("EfficientNetParam", [ + "width", "depth", "resolution", "dropout"]) + +EfficientNetParams = { + "B0": EfficientNetParam(1.0, 1.0, 224, 0.2), + "B1": EfficientNetParam(1.0, 1.1, 240, 0.2), + "B2": EfficientNetParam(1.1, 1.2, 260, 0.3), + "B3": EfficientNetParam(1.2, 1.4, 300, 0.3), + "B4": EfficientNetParam(1.4, 1.8, 380, 0.4), + "B5": EfficientNetParam(1.6, 2.2, 456, 0.4), + "B6": EfficientNetParam(1.8, 2.6, 528, 0.5), + "B7": EfficientNetParam(2.0, 3.1, 600, 0.5)} + + +def efficientnet0(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B0"], num_classes=num_classes) + +def efficientnet1(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B1"], num_classes=num_classes) + +def efficientnet2(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B2"], num_classes=num_classes) + +def efficientnet3(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B3"], num_classes=num_classes) + +def efficientnet4(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B4"], num_classes=num_classes) + +def efficientnet5(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B5"], num_classes=num_classes) + +def efficientnet6(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B6"], num_classes=num_classes) + +def efficientnet7(pretrained=False, progress=False, num_classes=1000): + return EfficientNet(param=EfficientNetParams["B7"], num_classes=num_classes) + + +class EfficientNet(nn.Module): + def __init__(self, param, num_classes=1000): + super().__init__() + + # For the exact scaling technique we follow the official implementation as the paper does not tell us + # https://github.com/tensorflow/tpu/blob/01574500090fa9c011cb8418c61d442286720211/models/official/efficientnet/efficientnet_model.py#L101-L125 + + def scaled_depth(n): + return int(math.ceil(n * param.depth)) + + # Snap number of channels to multiple of 8 for optimized implementations + def scaled_width(n): + n = n * param.width + m = max(8, int(n + 8 / 2) // 8 * 8) + + if m < 0.9 * n: + m = m + 8 + + return int(m) + + self.conv1 = nn.Conv2d(3, scaled_width(32), kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(scaled_width(32)) + self.relu = nn.ReLU6(inplace=True) + + self.layer1 = self._make_layer(n=scaled_depth(1), expansion=1, cin=scaled_width(32), cout=scaled_width(16), kernel_size=3, stride=1) + self.layer2 = self._make_layer(n=scaled_depth(2), expansion=6, cin=scaled_width(16), cout=scaled_width(24), kernel_size=3, stride=2) + self.layer3 = self._make_layer(n=scaled_depth(2), expansion=6, cin=scaled_width(24), cout=scaled_width(40), kernel_size=5, stride=2) + self.layer4 = self._make_layer(n=scaled_depth(3), expansion=6, cin=scaled_width(40), cout=scaled_width(80), kernel_size=3, stride=2) + self.layer5 = self._make_layer(n=scaled_depth(3), expansion=6, cin=scaled_width(80), cout=scaled_width(112), kernel_size=5, stride=1) + self.layer6 = self._make_layer(n=scaled_depth(4), expansion=6, cin=scaled_width(112), cout=scaled_width(192), kernel_size=5, stride=2) + self.layer7 = self._make_layer(n=scaled_depth(1), expansion=6, cin=scaled_width(192), cout=scaled_width(320), kernel_size=3, stride=1) + + self.features = nn.Conv2d(scaled_width(320), scaled_width(1280), kernel_size=1, bias=False) + + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.dropout = nn.Dropout(param.dropout, inplace=True) + self.fc = nn.Linear(scaled_width(1280), num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + # Zero BatchNorm weight at end of res-blocks: identity by default + # See https://arxiv.org/abs/1812.01187 Section 3.1 + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.zeros_(m.linear[1].weight) + + + def _make_layer(self, n, expansion, cin, cout, kernel_size=3, stride=1): + layers = [] + + for i in range(n): + if i == 0: + planes = cin + expand = cin * expansion + squeeze = cout + stride = stride + else: + planes = cout + expand = cout * expansion + squeeze = cout + stride = 1 + + layers += [Bottleneck(planes, expand, squeeze, kernel_size=kernel_size, stride=stride)] + + return nn.Sequential(*layers) + + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.layer5(x) + x = self.layer6(x) + x = self.layer7(x) + + x = self.features(x) + + x = self.avgpool(x) + x = x.reshape(x.size(0), -1) + x = self.dropout(x) + x = self.fc(x) + + return x + + +class Bottleneck(nn.Module): + def __init__(self, planes, expand, squeeze, kernel_size, stride): + super().__init__() + + self.expand = nn.Identity() if planes == expand else nn.Sequential( + nn.Conv2d(planes, expand, kernel_size=1, bias=False), + nn.BatchNorm2d(expand), + nn.ReLU6(inplace=True)) + + self.depthwise = nn.Sequential( + nn.Conv2d(expand, expand, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=expand, bias=False), + nn.BatchNorm2d(expand), + nn.ReLU6(inplace=True)) + + #self.scse = scSE(expand, r=0.25) + + self.linear = nn.Sequential( + nn.Conv2d(expand, squeeze, kernel_size=1, bias=False), + nn.BatchNorm2d(squeeze)) + + # Make all blocks skip-able via AvgPool + 1x1 Conv + # See https://arxiv.org/abs/1812.01187 Figure 2 c + + downsample = [] + + if stride != 1: + downsample += [nn.AvgPool2d(kernel_size=stride, stride=stride)] + + if planes != squeeze: + downsample += [ + nn.Conv2d(planes, squeeze, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(squeeze)] + + self.downsample = nn.Identity() if not downsample else nn.Sequential(*downsample) + + + def forward(self, x): + xx = self.expand(x) + xx = self.depthwise(xx) + #xx = self.scse(xx) + xx = self.linear(xx) + + x = self.downsample(x) + xx.add_(x) + + return xx + + +class cSE(nn.Module): + def __init__(self, planes, r): + super().__init__() + + self.squeeze = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(planes, int(planes * r), kernel_size=1, bias=True), + nn.ReLU6(inplace=True)) + + self.expand = nn.Sequential( + nn.Conv2d(int(planes * r), planes, kernel_size=1, bias=True), + nn.Sigmoid()) + + + def forward(self, x): + xx = self.squeeze(x) + xx = self.expand(xx) + + return x * xx + + +class sSE(nn.Module): + def __init__(self, planes): + super().__init__() + + self.block = nn.Sequential( + nn.Conv2d(planes, 1, kernel_size=1, bias=True), + nn.Sigmoid()) + + + def forward(self, x): + xx = self.block(x) + + return x * xx + + +class scSE(nn.Module): + def __init__(self, planes, r=0.25): + super().__init__() + + self.cse = cSE(planes=planes, r=r) + self.sse = sSE(planes=planes) + + + def forward(self, x): + return self.cse(x) + self.sse(x) + + +def swish(x, inplace=False): + return x * torch.sigmoid(x) + + +class Swish(nn.Module): + def forward(self, x): + return swish(x) + + +def hardsigmoid(x): + return nn.functional.relu6(x + 3) / 6 + + +class Hardsigmoid(nn.Module): + def forward(self, x): + return hardsigmoid(x) + + +def hardswish(x): + return x * hardsigmoid(x) + + +class Hardswish(nn.Module): + def forward(self, x): + return hardswish(x) diff --git a/modeling/backbones/resnet_ibn_a.py b/modeling/backbones/resnet_ibn_a.py index d65cd54..41f60b9 100644 --- a/modeling/backbones/resnet_ibn_a.py +++ b/modeling/backbones/resnet_ibn_a.py @@ -3,6 +3,7 @@ import math import torch.utils.model_zoo as model_zoo +from ..layer.attention import SESTNLayer __all__ = ['ResNet_IBN', 'resnet50_ibn_a', 'resnet101_ibn_a', 'resnet152_ibn_a'] @@ -76,7 +77,7 @@ def forward(self, x): class ResNet_IBN(nn.Module): - def __init__(self, last_stride, block, layers, num_classes=1000): + def __init__(self, last_stride, block, layers, num_classes=1000, use_sestn=False): scale = 64 self.inplanes = scale super(ResNet_IBN, self).__init__() @@ -92,6 +93,11 @@ def __init__(self, last_stride, block, layers, num_classes=1000): self.avgpool = nn.AvgPool2d(7) self.fc = nn.Linear(scale * 8 * block.expansion, num_classes) + self.use_sestn = use_sestn + if use_sestn: + self.sestn1 = SESTNLayer(256, 16) + self.sestn2 = SESTNLayer(512, 32) + for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels @@ -130,7 +136,15 @@ def forward(self, x): x = self.maxpool(x) x = self.layer1(x) + + if self.use_sestn: + x = self.sestn1(x) + x = self.layer2(x) + + if self.use_sestn: + x = self.sestn2(x) + x = self.layer3(x) x = self.layer4(x) diff --git a/modeling/baseline.py b/modeling/baseline.py index bb6c19d..7766ccc 100644 --- a/modeling/baseline.py +++ b/modeling/baseline.py @@ -3,12 +3,13 @@ import torch from torch import nn import collections +from .backbones.efficientnet import efficientnet4, Bottleneck as BottleneckEff from .backbones.resnet import ResNet, Bottleneck from .backbones.senet import SENet, SEResNetBottleneck, SEBottleneck, SEResNeXtBottleneck from .backbones.resnet_ibn_a import resnet50_ibn_a from .backbones.resnet_nl import ResNetNL -from .layer import CrossEntropyLabelSmooth, TripletLoss, WeightedRegularizedTriplet, CenterLoss, GeneralizedMeanPooling, GeneralizedMeanPoolingP - +from .layer import CrossEntropyLabelSmooth, TripletLoss, WeightedRegularizedTriplet, CenterLoss, GeM +from .layer.cosine_loss import AdaCos, CosFace, ArcFace def weights_init_kaiming(m): classname = m.__class__.__name__ @@ -25,7 +26,6 @@ def weights_init_kaiming(m): nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) - def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: @@ -37,27 +37,40 @@ def weights_init_classifier(m): class Baseline(nn.Module): in_planes = 2048 - def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, pretrain_choice): + def __init__(self, + num_classes, + last_stride, + model_path, + backbone="resnet50", + pool_type="avg", + use_dropout=True, + cosine_loss_type='', + s=30.0, + m=0.35, + use_bnbias=False, + use_sestn=False, + pretrain_choice=None, + training=True): super(Baseline, self).__init__() - if model_name == 'resnet50': + if backbone == 'resnet50': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 6, 3]) - elif model_name == 'resnet50_nl': + elif backbone == 'resnet50_nl': self.base = ResNetNL(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 6, 3], non_layers=[0, 2, 3, 0]) - elif model_name == 'resnet101': + elif backbone == 'resnet101': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 23, 3]) - elif model_name == 'resnet152': + elif backbone == 'resnet152': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 8, 36, 3]) - elif model_name == 'se_resnet50': + elif backbone == 'se_resnet50': self.base = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], groups=1, @@ -68,7 +81,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnet101': + elif backbone == 'se_resnet101': self.base = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], groups=1, @@ -79,7 +92,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnet152': + elif backbone == 'se_resnet152': self.base = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], groups=1, @@ -90,7 +103,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnext50': + elif backbone == 'se_resnext50': self.base = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], groups=32, @@ -101,7 +114,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnext101': + elif backbone == 'se_resnext101': self.base = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], groups=32, @@ -112,49 +125,93 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'senet154': + elif backbone == 'senet154': self.base = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], groups=64, reduction=16, dropout_p=0.2, last_stride=last_stride) - elif model_name == 'resnet50_ibn_a': - self.base = resnet50_ibn_a(last_stride) - + elif backbone == 'resnet50_ibn_a': + self.base = resnet50_ibn_a(last_stride, use_sestn=use_sestn) + if pretrain_choice == 'imagenet': self.base.load_param(model_path) print('Loading pretrained ImageNet model......') self.num_classes = num_classes + in_features = self.in_planes - if gem_pool == 'on': - print("Generalized Mean Pooling") - self.global_pool = GeneralizedMeanPoolingP() + if pool_type == "avg": + self.gap = nn.AdaptiveAvgPool2d(1) + elif "gem" in pool_type: + if pool_type != "gem": + p = pool_type.split()[-1] + p = float(p) + self.gap = GeM(p=p, eps=1e-6, freeze_p=True) + else: + self.gap = GeM(eps=1e-6, freeze_p=False) + elif pool_type == "max": + self.gap = nn.AdaptiveMaxPool2d(1) + elif "Att" in pool_type: + self.gap = eval(pool_type)(in_features = in_features) + in_features = self.gap.out_features(in_features) else: - print("Global Adaptive Pooling") - self.global_pool = nn.AdaptiveAvgPool2d(1) - - self.bottleneck = nn.BatchNorm1d(self.in_planes) - self.bottleneck.bias.requires_grad_(False) # no shift - self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) - + self.gap = eval(pool_type) + in_features = self.gap.out_features(in_features) + + # ? legacy code + # if gem_pool: + # print("Generalized Mean Pooling") + # self.global_pool = GeneralizedMeanPoolingP() + # else: + # print("Global Adaptive Pooling") + # self.global_pool = nn.AdaptiveAvgPool2d(1) + + # bnneck + self.bottleneck = nn.BatchNorm1d(in_features) + if not use_bnbias: + self.bottleneck.bias.requires_grad = False + print("==> remove bnneck bias") + else: + print("==> using bnneck bias") self.bottleneck.apply(weights_init_kaiming) - self.classifier.apply(weights_init_classifier) - - def forward(self, x): - x = self.base(x) + + if cosine_loss_type == '': + self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) + self.classifier.apply(weights_init_classifier) + else: + if cosine_loss_type == 'AdaCos': + self.classifier = eval(cosine_loss_type)(in_features, self.num_classes, m) + # CosFace + else: + self.classifier = eval(cosine_loss_type)(in_features, self.num_classes, s, m) + self.cosine_loss_type = cosine_loss_type + self.use_dropout = use_dropout - global_feat = self.global_pool(x) # (b, 2048, 1, 1) + def forward(self, x, label=None): + global_feat = self.gap(self.base(x)) # (b, 2048, 1, 1) global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048) - feat = self.bottleneck(global_feat) # normalize for angular softmax - if not self.training: + if self.training: + if self.use_dropout: + feat = self.gap(self.base(x)) + if self.cosine_loss_type == '': + cls_score = self.classifier(feat) + else: + # assert label is not None + cls_score = self.classifier(feat, label) + return cls_score, global_feat # global feature for triplet loss + else: return feat - cls_score = self.classifier(feat) - return cls_score, global_feat + # ? legacy code + # if not self.training: + # return feat + + # cls_score = self.classifier(feat) + # return cls_score, global_feat def load_param(self, trained_path): param_dict = torch.load(trained_path) @@ -178,8 +235,8 @@ def get_optimizer(self, cfg, criterion): optimizer['model'] = getattr(torch.optim, cfg.SOLVER.OPTIMIZER_NAME)(params, momentum=cfg.SOLVER.MOMENTUM) else: optimizer['model'] = getattr(torch.optim, cfg.SOLVER.OPTIMIZER_NAME)(params) - if cfg.MODEL.CENTER_LOSS == 'on': - optimizer['center'] = torch.optim.SGD(criterion['center'].parameters(), lr=cfg.SOLVER.CENTER_LR) + if cfg.SOLVER.CENTER_LOSS.USE: + optimizer['center'] = torch.optim.SGD(criterion['center'].parameters(), lr=cfg.SOLVER.CENTER_LOSS.LR, momentum=cfg.SOLVER.MOMENTUM) return optimizer def get_creterion(self, cfg, num_classes): @@ -192,17 +249,16 @@ def get_creterion(self, cfg, num_classes): else: criterion['triplet'] = TripletLoss(cfg.SOLVER.MARGIN) # triplet loss - if cfg.MODEL.CENTER_LOSS == 'on': - criterion['center'] = CenterLoss(num_classes=num_classes, feat_dim=cfg.MODEL.CENTER_FEAT_DIM, + if cfg.SOLVER.CENTER_LOSS.USE: + criterion['center'] = CenterLoss(num_classes=num_classes, feat_dim=cfg.SOLVER.CENTER_LOSS.NUM_FEATS, use_gpu=True) def criterion_total(score, feat, target): loss = criterion['xent'](score, target) + criterion['triplet'](feat, target)[0] - if cfg.MODEL.CENTER_LOSS == 'on': - loss = loss + cfg.SOLVER.CENTER_LOSS_WEIGHT * criterion['center'](feat, target) + if cfg.SOLVER.CENTER_LOSS.USE: + loss = loss + cfg.SOLVER.CENTER_LOSS.WEIGHT * criterion['center'](feat, target) return loss criterion['total'] = criterion_total - return criterion - + return criterion \ No newline at end of file diff --git a/modeling/layer/__init__.py b/modeling/layer/__init__.py index 8635ca9..43b0579 100644 --- a/modeling/layer/__init__.py +++ b/modeling/layer/__init__.py @@ -3,4 +3,4 @@ from .center_loss import CenterLoss from .triplet_loss import CrossEntropyLabelSmooth, TripletLoss, WeightedRegularizedTriplet from .non_local import Non_local -from .gem_pool import GeneralizedMeanPooling, GeneralizedMeanPoolingP \ No newline at end of file +from .pooling import GeM \ No newline at end of file diff --git a/modeling/layer/attention.py b/modeling/layer/attention.py new file mode 100644 index 0000000..8fa37af --- /dev/null +++ b/modeling/layer/attention.py @@ -0,0 +1,98 @@ +import torch +from torch import nn + +class SELayer(nn.Module): + def __init__(self, channel, reduction=64, multiply=True): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid() + ) + self.multiply = multiply + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + if self.multiply == True: + return x * y + else: + return y + + +class STNLayer(nn.Module): + def __init__(self, channel_in, multiply=True): + super(STNLayer, self).__init__() + c = channel_in + C = c//32 + self.multiply = multiply + self.conv_in = nn.Conv2d(c, C, kernel_size=1) + self.conv_out = nn.Conv2d(C, 1, kernel_size=1) + # Encoder + self.conv1 = nn.Conv2d(C, 2*C, kernel_size=3) + self.bn1 = nn.BatchNorm2d(2*C) + self.ReLU1 = nn.ReLU(True) + self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True) + self.conv2 = nn.Conv2d(2*C, 4*C, kernel_size=3) + self.bn2 = nn.BatchNorm2d(4*C) + self.ReLU2 = nn.ReLU(True) + + # Decoder + self.deconv1 = nn.ConvTranspose2d(4*C, 2*C, kernel_size=3) + self.bn3 = nn.BatchNorm2d(2*C) + self.ReLU3 = nn.ReLU(True) + self.unpool1 = nn.MaxUnpool2d(kernel_size=2) + self.deconv2 = nn.ConvTranspose2d(2*C, C, kernel_size=3) + self.bn4 = nn.BatchNorm2d(C) + self.ReLU4 = nn.ReLU(True) + + + def forward(self, x): + b, c, _, _ = x.size() + #print("modules: x.shape: " + str(x.shape)) + y = self.conv_in(x) + + # Encode + y = self.conv1(y) + y = self.bn1(y) + y = self.ReLU1(y) + size1 = y.size() + y, indices1 = self.pool1(y) + y = self.conv2(y) + y = self.bn2(y) + y = self.ReLU2(y) + + # Decode + y = self.deconv1(y) + y = self.bn3(y) + y = self.ReLU3(y) + y = self.unpool1(y,indices1,size1) + y = self.deconv2(y) + y = self.bn4(y) + y = self.ReLU4(y) + + y = self.conv_out(y) + #torch.save(y,'./STN_stage1.pkl') + if self.multiply == True: + return x * y + else: + return y + + +class SESTNLayer(nn.Module): + def __init__(self, channel_in, r): + super(SESTNLayer, self).__init__() + c = channel_in + self.se = SELayer(channel=c, reduction=r, multiply=False) + self.stn = STNLayer(channel_in=c, multiply=False) + self.activation = nn.Hardtanh(inplace=True) + self.activation = nn.ReLU(True) + + + def forward(self, x): + y = self.se(x) + z = self.stn(x) + a = self.activation(y+z) # Final joint attention map + return x + x*a \ No newline at end of file diff --git a/modeling/layer/circle_loss.py b/modeling/layer/circle_loss.py new file mode 100644 index 0000000..2674d1e --- /dev/null +++ b/modeling/layer/circle_loss.py @@ -0,0 +1,38 @@ +import torch +import torch.nn as nn + +class CircleLoss(nn.Module): + def __init__(self, m=0.25, s=96): + super(CircleLoss, self).__init__() + self.m = m + self.s = s + self.soft_plus = nn.Softplus() + + def forward(self, feat1, label1, feat2_t, label2_t): + sim_mat = torch.mm(feat1, feat2_t) + + N, M = sim_mat.size() + + is_pos = label1.view(N, 1).expand(N, M).eq(label2_t.expand(N, M)).float() + + same_indx = torch.eye(N, N, device='cuda') + remain_indx = torch.zeros(N, M - N, device='cuda') + same_indx = torch.cat((same_indx, remain_indx), dim=1) + is_pos = is_pos - same_indx + + is_neg = label1.view(N, 1).expand(N, M).ne(label2_t.expand(N, M)).float() + + s_p = sim_mat * is_pos + s_n = sim_mat * is_neg + + alpha_p = torch.clamp_min(-s_p.detach() + 1 + self.m, min=0.) + alpha_n = torch.clamp_min(s_n.detach() + self.m, min=0.) + delta_p = 1 - self.m + delta_n = self.m + + logit_p = -self.s * alpha_p * (s_p - delta_p) + logit_n = self.s * alpha_n * (s_n - delta_n) + + loss = nn.functional.softplus(torch.logsumexp(logit_p, dim=1) + torch.logsumexp(logit_n, dim=1)).mean() + + return loss \ No newline at end of file diff --git a/modeling/layer/cosine_loss.py b/modeling/layer/cosine_loss.py new file mode 100644 index 0000000..6e91343 --- /dev/null +++ b/modeling/layer/cosine_loss.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Parameter +import math + +class ArcFace(nn.Module): + def __init__(self, num_features, num_classes, s=30.0, m=0.50): + super(ArcFace, self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = s + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + + def forward(self, input, label=None): + # normalized features + x = F.normalize(input) + # normalized weights + W = F.normalize(self.W) + # dot product + logits = F.linear(x, W) + if label is None: + return logits + # add margin + theta = torch.acoss(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7)) + target_logits = torch.cos(theta + self.m) + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + output = logits * (1 - one_hot) + target_logits * one_hot + # feature re-scale + output *= self.s + + return output + +# class ArcCos(nn.Module): +# def __init__(self, in_features, out_features, s=30.0, m=0.50, bias=False) +# super(ArcCos, self).__init__() +# self.in_features = in_features +# self.out_features = out_features +# self.s = s +# self.m = m +# self.cos_m = math.cos(m) +# self.sin_m = math.sin(m) + +# self.th = math.cos(math.pi - m) +# self.mm = math.sin(math.pi - m) * m + +# self.weight = Parameter(torch.Tensor(out_features, in_features)) +# if bias: +# self.bias = Parameter(torch.Tensor(out_features)) +# else: +# self.register_parameter('bias', None) +# self.register_parameters() + +# def reset_parameters(self): +# nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) +# if self.bias is not None: +# fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) +# boud = 1 / math.sqrt(fan_in) +# nn.init.uniform_(self.bias, -bound, bound) + +# def forward(self, input, label): +# # fix for + +class AdaCos(nn.Module): + def __init__(self, num_features, num_classes, m=0.50): + super(AdaCos,self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = math.sqrt(2) * math.log(num_classes - 1) + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + def forward(self, input, label=None): + # normalize features + x = F.normalize(input) + # normalize weights + W = F.normalize(self.W) + # dot product + print('debug logging') + print(x.shape) + print(W.shape) + logits = F.linear(x, W) + if label is None: + return logits + # feature re-scale + theta = torch.acos(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7)) + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + with torch.no_grad(): + B_avg = torch.where(one_hot < 1, torch.exp(self.s * logits), torch.zeros_like(logits)) + B_avg = torch.sum(B_avg) / input.size(0) + theta_med = torch.median(theta[one_hot == 1]) + self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med)) + output = self.s * logits + return output + +class CosFace(nn.Module): + def __init__(self, num_features, num_classes, s=30.0, m=0.35): + super(CosFace, self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = s + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + + def forward(self, input, label=None): + # normalize feature + x = F.normalize(input) + # normalize weights + W = F.normalize(self.W) + # dot product + logits = F.linear(x, W) + if label is None: + return logits + + # # * add margin version + # target_logits = logits - self.m + # one_hot = torch.zeros_like(logits) + # one_hot.scatter_(1, labels.view(-1, 1).long(), 1) + # output = logits - (1 - one_hot) + target_logits * one_hot + + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, abel.view(-1, 1).long(), 1) + output = logits - one_hot * self.m + # feature re-scale + output *= self.s + + return output + + def __repr__(self): + return self.__class__.__name__ +\ + '(' + 'num_features='+'{}'.format(self.num_features) + \ + ','+'num_classes=' + '{}'.format(self.num_classes) +\ + ', ' + 's=' + str(self.s) + \ + ', ' + 'm=' + str(self.m) +\ + ')' \ No newline at end of file diff --git a/modeling/layer/functional.py b/modeling/layer/functional.py new file mode 100644 index 0000000..acd59b8 --- /dev/null +++ b/modeling/layer/functional.py @@ -0,0 +1,9 @@ +import math +import torch +import torch.nn.functional as F + +def gem(x, p=3, eps=1e-6): + return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1. / p) + +def adaptive_gem2d(x, output_size=(1, 1), p=3, eps=1e-6): + return F.adaptive_avg_pool2d(x.clamp(min=eps).pow(p), output_size).pow(1. / p) \ No newline at end of file diff --git a/modeling/layer/gem_pool.py b/modeling/layer/gem_pool.py deleted file mode 100644 index c0d0243..0000000 --- a/modeling/layer/gem_pool.py +++ /dev/null @@ -1,43 +0,0 @@ -# encoding: utf-8 - -import torch -from torch import nn - - -class GeneralizedMeanPooling(nn.Module): - r"""Applies a 2D power-average adaptive pooling over an input signal composed of several input planes. - The function computed is: :math:`f(X) = pow(sum(pow(X, p)), 1/p)` - - At p = infinity, one gets Max Pooling - - At p = 1, one gets Average Pooling - The output is of size H x W, for any input size. - The number of output features is equal to the number of input planes. - Args: - output_size: the target output size of the image of the form H x W. - Can be a tuple (H, W) or a single H for a square image H x H - H and W can be either a ``int``, or ``None`` which means the size will - be the same as that of the input. - """ - - def __init__(self, norm, output_size=1, eps=1e-6): - super(GeneralizedMeanPooling, self).__init__() - assert norm > 0 - self.p = float(norm) - self.output_size = output_size - self.eps = eps - - def forward(self, x): - x = x.clamp(min=self.eps).pow(self.p) - return torch.nn.functional.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p) - - def __repr__(self): - return self.__class__.__name__ + '(' \ - + str(self.p) + ', ' \ - + 'output_size=' + str(self.output_size) + ')' - - -class GeneralizedMeanPoolingP(GeneralizedMeanPooling): - """ Same, but norm is trainable - """ - def __init__(self, norm=3, output_size=1, eps=1e-6): - super(GeneralizedMeanPoolingP, self).__init__(norm, output_size, eps) - self.p = nn.Parameter(torch.ones(1) * norm) \ No newline at end of file diff --git a/modeling/layer/pooling.py b/modeling/layer/pooling.py new file mode 100644 index 0000000..e11d0ea --- /dev/null +++ b/modeling/layer/pooling.py @@ -0,0 +1,89 @@ +# encoding: utf-8 +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.parameter import Parameter + +from .functional import gem, adaptive_gem2d + +class GeM(nn.Module): + def __init__(self, p=3.0, eps=1e-6, freeze_p=True): + super(GeM, self).__init__() + self.p = p if freeze_p else Parameter(torch.ones(1) * p) + self.eps = eps + self.freeze_p = freeze_p + def forward(self, x): + # return LF.gem(x, p=self.p, eps=self.eps) + return gem(x, p=self.p, eps=self.eps) + + def __repr__(self): + if isinstance(self.p, float): + p = self.p + else: + p = self.p.data.tolist()[0] + return self.__class__.__name__ +\ + '(' + 'p=' + '{:.4f}'.format(p) +\ + ', ' + 'eps=' + str(self.eps) + \ + ', ' + 'freeze_p=' + str(self.freeze_p) +\ + ')' + +class AdaptiveGeM2d(nn.Module): + def __init__(self, output_size=(1, 1), p=3.0, eps=1e-6, freeze_p=True): + super(AdaptiveGeM2d, self).__init__() + self.output_size = output_size + self.p = p if freeze_p else Parameter(torch.ones(1) * p) + self.eps = eps + self.freeze_p = freeze_p + + def forward(self, x): + return adaptive_gem2d(x, self.output_size, p=self.p, eps=self.eps) + def __repr__(self): + if isinstance(self.p, float): + p = self.p + else: + p = self.p.data.tolist()[0] + return self.__class__.__name__ +\ + '(' + 'output_size='+'{}'.format(self.output_size) + \ + ','+'p=' + '{:.4f}'.format(p) +\ + ', ' + 'eps=' + str(self.eps) + \ + ', ' + 'freeze_p=' + str(self.freeze_p) +\ + ')' + +# ? legacy code +# class GeneralizedMeanPooling(nn.Module): +# """Applies a 2D power-average adaptive pooling over an input signal composed of several input planes. +# The function computed is: :math:`f(X) = pow(sum(pow(X, p)), 1/p)` +# - At p = infinity, one gets Max Pooling +# - At p = 1, one gets Average Pooling +# The output is of size H x W, for any input size. +# The number of output features is equal to the number of input planes. +# Args: +# output_size: the target output size of the image of the form H x W. +# Can be a tuple (H, W) or a single H for a square image H x H +# H and W can be either a ``int``, or ``None`` which means the size will +# be the same as that of the input. +# """ + +# def __init__(self, norm, output_size=1, eps=1e-6): +# super(GeneralizedMeanPooling, self).__init__() +# assert norm > 0 +# self.p = float(norm) +# self.output_size = output_size +# self.eps = eps + +# def forward(self, x): +# x = x.clamp(min=self.eps).pow(self.p) +# return torch.nn.functional.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p) + +# def __repr__(self): +# return self.__class__.__name__ + '(' \ +# + str(self.p) + ', ' \ +# + 'output_size=' + str(self.output_size) + ')' + + +# class GeneralizedMeanPoolingP(GeneralizedMeanPooling): +# """ Same, but norm is trainable +# """ +# def __init__(self, norm=3, output_size=1, eps=1e-6): +# super(GeneralizedMeanPoolingP, self).__init__(norm, output_size, eps) +# self.p = nn.Parameter(torch.ones(1) * norm) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..37112ab --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# pip install torch===1.4.0 torchvision===0.5.0 -f https://download.pytorch.org/whl/torch_stable.html +torch==1.4.0 +torchvision +ignite +yacs +# conda install -c pytorch ignite +# pip install pytorch-ignite==0.1.2 \ No newline at end of file diff --git a/tools/demo.py b/tools/demo.py new file mode 100644 index 0000000..cdc732c --- /dev/null +++ b/tools/demo.py @@ -0,0 +1,111 @@ +import argparse +import scipy.io +import torch +import numpy as np +import os +from torchvision import datasets +import matplotlib +matplotlib.use('agg') +import matplotlib.pyplot as plt +####################################################################### +# Evaluate +parser = argparse.ArgumentParser(description='Demo') +parser.add_argument('--query_index', default=777, type=int, help='test_image_index') +parser.add_argument('--test_dir',default='../Market/pytorch',type=str, help='./test_data') +opts = parser.parse_args() + +data_dir = opts.test_dir +# need to create different name for different dataset +image_datasets = {x: datasets.ImageFolder( os.path.join(data_dir,x) ) for x in ['bounding_box_test','query']} + +##################################################################### +#Show result +def imshow(path, title=None): + """Imshow for Tensor.""" + im = plt.imread(path) + plt.imshow(im) + if title is not None: + plt.title(title) + plt.pause(0.001) # pause a bit so that plots are updated + +###################################################################### +result = scipy.io.loadmat('pytorch_result.mat') +query_feature = torch.FloatTensor(result['query_f']) +query_cam = result['query_cam'][0] +query_label = result['query_label'][0] +gallery_feature = torch.FloatTensor(result['gallery_f']) +gallery_cam = result['gallery_cam'][0] +gallery_label = result['gallery_label'][0] + +multi = os.path.isfile('multi_query.mat') + +if multi: + m_result = scipy.io.loadmat('multi_query.mat') + mquery_feature = torch.FloatTensor(m_result['mquery_f']) + mquery_cam = m_result['mquery_cam'][0] + mquery_label = m_result['mquery_label'][0] + mquery_feature = mquery_feature.cuda() + +query_feature = query_feature.cuda() +gallery_feature = gallery_feature.cuda() + +####################################################################### +# sort the images +def sort_img(qf, ql, qc, gf, gl, gc): + query = qf.view(-1,1) + # print(query.shape) + score = torch.mm(gf,query) + score = score.squeeze(1).cpu() + score = score.numpy() + # predict index + index = np.argsort(score) #from small to large + index = index[::-1] + # index = index[0:2000] + # good index + query_index = np.argwhere(gl==ql) + #same camera + camera_index = np.argwhere(gc==qc) + + #good_index = np.setdiff1d(query_index, camera_index, assume_unique=True) + junk_index1 = np.argwhere(gl==-1) + junk_index2 = np.intersect1d(query_index, camera_index) + junk_index = np.append(junk_index2, junk_index1) + + mask = np.in1d(index, junk_index, invert=True) + index = index[mask] + return index + +i = opts.query_index +index = sort_img(query_feature[i],query_label[i],query_cam[i],gallery_feature,gallery_label,gallery_cam) + +######################################################################## +# Visualize the rank result + +query_path, _ = image_datasets['query'].imgs[i] +query_label = query_label[i] +print(query_path) +print('Top 10 images are as follow:') +try: # Visualize Ranking Result + # Graphical User Interface is needed + fig = plt.figure(figsize=(16,4)) + ax = plt.subplot(1,11,1) + ax.axis('off') + imshow(query_path,'query') + for i in range(10): + ax = plt.subplot(1,11,i+2) + ax.axis('off') + img_path, _ = image_datasets['gallery'].imgs[index[i]] + label = gallery_label[index[i]] + imshow(img_path) + if label == query_label: + ax.set_title('%d'%(i+1), color='green') + else: + ax.set_title('%d'%(i+1), color='red') + print(img_path) +except RuntimeError: + for i in range(10): + img_path = image_datasets.imgs[index[i]] + print(img_path[0]) + print('If you want to see the visualization of the ranking result, graphical user interface is needed.') + +fig.savefig("show.png") diff --git a/tools/embedding_projector.py b/tools/embedding_projector.py new file mode 100644 index 0000000..2cdc381 --- /dev/null +++ b/tools/embedding_projector.py @@ -0,0 +1,90 @@ +# encoding: utf-8 +import logging +import torchvision +import torch +import torch.nn as nn +from ignite.engine import Engine, Events +from utils.reid_metric import r1_mAP_mINP, r1_mAP_mINP_reranking +from ignite.handlers import Timer +import matplotlib.pyplot as plt +import numpy as np +import pickle +import os + +import tensorflow as tf +import tensorboard as tb +tf.io.gfile = tb.compat.tensorflow_stub.io.gfile + +global ITER +ITER = 0 +feat_ls = [] +cam_ls = [] +label_ls = [] +from torch.utils.tensorboard import SummaryWriter +global writer +writer = SummaryWriter('./log/market1501/embedding_projector/') + +def create_supervised_evaluator(model, metrics, device=None): + def _inference(engine, batch): + global ITER + model.eval() + with torch.no_grad(): + data, pids, camids = batch + data = data.to(device) if torch.cuda.device_count() >= 1 else data + feat = model(data) + return feat, pids, camids + engine = Engine(_inference) + for name, metric in metrics.items(): + metric.attach(engine, name) + return engine + +def do_embedding_projector( + cfg, + model, + data_loader, + num_query +): + global feat_ls, cam_ls, label_ls + device = cfg.MODEL.DEVICE + + logger = logging.getLogger("embedding projector") + logger.info("Enter embedding images") + + if cfg.TEST.RE_RANKING == 'off': + print("Create evaluator") + evaluator = create_supervised_evaluator(model, metrics={'r1_mAP_mINP': r1_mAP_mINP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, + device=device) + elif cfg.TEST.RE_RANKING == 'on': + print("Create evaluator for reranking") + evaluator = create_supervised_evaluator(model, metrics={'r1_mAP_mINP': r1_mAP_mINP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, + device=device) + else: + print("Unsupported re_ranking config. Only support for on or off, but got {}.".format(cfg.TEST.RE_RANKING)) + + @evaluator.on(Events.ITERATION_COMPLETED) + def append_embedding_query(engine): + global feat_ls, cam_ls, label_ls + global ITER + ITER += 1 + feat_ls.append(evaluator.state.output[0]) + cam_ls.extend(evaluator.state.output[2]) + label_ls.extend(evaluator.state.output[1]) + + evaluator.run(data_loader['eval']) + + features = (torch.cat(feat_ls)).to(device) + # ? RGB + # features = features.view(-1, cfg.INPUT.IMG_SIZE[0] * cfg.INPUT.IMG_SIZE[1]) + print(label_ls) + writer.add_embedding(features, + metadata=label_ls, + global_step=1) + writer.close() + + cmc, mAP, mINP = evaluator.state.metrics['r1_mAP_mINP'] + logger.info('Validation Results') + logger.info("mINP: {:.1%}".format(mINP)) + logger.info("mAP: {:.1%}".format(mAP)) + for r in [1, 5, 10]: + logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) + diff --git a/tools/main.py b/tools/main.py index 31e0c60..1209f73 100644 --- a/tools/main.py +++ b/tools/main.py @@ -1,12 +1,10 @@ # encoding: utf-8 - import argparse import os import sys import torch from torch.backends import cudnn - sys.path.append('.') from config import cfg from data import make_data_loader @@ -15,20 +13,22 @@ from utils.logger import setup_logger from tools.train import do_train from tools.test import do_test - +from tools.visualize import do_visualize +from tools.embedding_projector import do_embedding_projector +from tools.visualize_no_label import do_visualize_no_label def main(): parser = argparse.ArgumentParser(description="AGW Re-ID Baseline") + # load argument from config file parser.add_argument( "--config_file", default="", help="path to config file", type=str ) + parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) - args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 - if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) @@ -53,25 +53,51 @@ def main(): os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID # new add by gu cudnn.benchmark = True - data_loader, num_query, num_classes = make_data_loader(cfg) - model = build_model(cfg, num_classes) + # 1. Build Model + if cfg.VISUALIZE.OPTION == "on_no_label" : + data_loader = make_data_loader(cfg) + model = build_model(cfg, 1) + else : + data_loader, num_query, num_classes = make_data_loader(cfg) + model = build_model(cfg, num_classes) if 'cpu' not in cfg.MODEL.DEVICE: if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model.to(device=cfg.MODEL.DEVICE) + # 2. Select Option Mode + if cfg.VISUALIZE.OPTION == 'on': + logger.info("Visualize Only") + model.load_param(cfg.TEST.WEIGHT) + # test + do_visualize(cfg, model, data_loader, num_query) + return + if cfg.EMBEDDING_PROJECTOR.OPTION == 'on': + logger.info("Eval and Visualize embedding projector") + model.load_param(cfg.TEST.WEIGHT) + do_embedding_projector(cfg, model, data_loader, num_query) + return + if cfg.VISUALIZE.OPTION == "on_no_label" : + logger.info("Visualize no label Only") + model.load_param(cfg.TEST.WEIGHT) + do_visualize_no_label(cfg, model, data_loader) + return if cfg.TEST.EVALUATE_ONLY == 'on': logger.info("Evaluate Only") model.load_param(cfg.TEST.WEIGHT) + # test do_test(cfg, model, data_loader, num_query) return - criterion = model.get_creterion(cfg, num_classes) optimizer = model.get_optimizer(cfg, criterion) # Add for using self trained model - if cfg.MODEL.PRETRAIN_CHOICE == 'self': + if cfg.MODEL.PRETRAIN_CHOICE == 'self' and cfg.MODEL.TRANSFER_MODE == "off": + to_load = {'model': model, + 'optimizer': optimizer['model'], + 'center_param': criterion['center'], + 'optimizer_center': optimizer['center']} start_epoch = eval(cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_')[-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer') @@ -80,17 +106,33 @@ def main(): print('Path to the checkpoint of center_param:', path_to_center_param) path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer_center') print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) - model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) - optimizer['model'].load_state_dict(torch.load(path_to_optimizer)) - criterion['center'].load_state_dict(torch.load(path_to_center_param)) - optimizer['center'].load_state_dict(torch.load(path_to_optimizer_center)) + # model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH)) + model.load_param(cfg.MODEL.PRETRAIN_PATH) + # optimizer['model'].load_param(path_to_optimizer) + # criterion['center'].load_param(path_to_center_param) + # optimizer['center'].load_param(path_to_optimizer_center) + # + optimizer['model'].load_state_dict(torch.load(path_to_optimizer).state_dict()) + criterion['center'].load_state_dict(torch.load(path_to_center_param).state_dict()) + optimizer['center'].load_state_dict(torch.load(path_to_optimizer_center).state_dict()) scheduler = WarmupMultiStepLR(optimizer['model'], cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) + elif cfg.MODEL.PRETRAIN_CHOICE == 'self' and cfg.MODEL.TRANSFER_MODE == "on": + start_epoch = 0 + print('Start epoch:', start_epoch) + path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer') + print('Path to the checkpoint of optimizer:', path_to_optimizer) + path_to_center_param = cfg.MODEL.PRETRAIN_PATH.replace('model', 'center_param') + print('Path to the checkpoint of center_param:', path_to_center_param) + path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace('model', 'optimizer_center') + print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) + model.load_param(cfg.MODEL.PRETRAIN_PATH) + scheduler = WarmupMultiStepLR(optimizer['model'], cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, + cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer['model'], cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) - else: print('Only support pretrain_choice for imagenet and self, but got {}'.format(cfg.MODEL.PRETRAIN_CHOICE)) diff --git a/tools/oxygen-data-preparation/generate_mock.py b/tools/oxygen-data-preparation/generate_mock.py new file mode 100644 index 0000000..7ca241d --- /dev/null +++ b/tools/oxygen-data-preparation/generate_mock.py @@ -0,0 +1,32 @@ +import os +import random +import string + +from utils import create_dir + +def get_random_string(length): + letters = string.ascii_lowercase + result_str = ''.join(random.choice(letters) for i in range(length)) + return result_str + + +src_dir_path = './mock_Dataset_without_eth' + +gender_names = ['male', 'female'] +age_names =['children', 'teenagers', 'young-adults', 'adults', 'seniors'] + +num_id = 1000 + +for i in range(1, num_id + 1): + dir_path = src_dir_path + '/' + str(i) + create_dir(dir_path) + + for j in range(random.randint(5, 50)): + file_name = gender_names[random.randint(0, len(gender_names)-1)] + '_' + \ + age_names[random.randint(0, len(age_names)-1)] + '_' + \ + 'cam-' + str(random.randint(1, 100)) + '_' + \ + '[date]_' + \ + get_random_string(8) + '.jpg' + with open(dir_path + '/' + file_name, 'w') as f: + pass + diff --git a/tools/oxygen-data-preparation/labelled2dataset.py b/tools/oxygen-data-preparation/labelled2dataset.py new file mode 100644 index 0000000..c347db8 --- /dev/null +++ b/tools/oxygen-data-preparation/labelled2dataset.py @@ -0,0 +1,101 @@ +import os +import shutil +import numpy as np +from sklearn.model_selection import train_test_split +from tqdm import tqdm +import random + +from utils import create_dir + +seed = 1024 +np.random.seed(seed) +random.seed(seed) + +# Dataset_without_eth +# - date +# [Y] [id]/[gender]_[age]_[anonymous_cam]_[date_timestamp]_[anonymous].jpg + +# labelled dataset to labelled-pool +src_dir_path = './mock_Dataset_without_eth' +dst_dir_path = './Oxygen1' +train_dir_path = dst_dir_path + '/bounding_box_train' +create_dir(train_dir_path) +query_dir_path = dst_dir_path + '/query' +create_dir(query_dir_path) +gallery_dir_path = dst_dir_path + '/bounding_box_test' +create_dir(gallery_dir_path) +hasDistractor = True +if hasDistractor: + distractor_dir_path = dst_dir_path + '/distractor' + create_dir(distractor_dir_path) +num_distractor = 10 + +org_img_paths = list() +processed_file_names = list() +path2newname_mapper = dict() + +num_people = len(os.listdir(src_dir_path)) +distractor_pids = list(range(1, num_people+1)) +random.shuffle(distractor_pids) +distractor_pids = distractor_pids[:num_distractor] +distractor_uniques = [e-1 for e in sorted(distractor_pids)] +distractor_pairs = list() + +for root, subdirs, file_names in os.walk(src_dir_path): + if len(file_names) != 0: + pid = root[root.rfind('/')+1:] + for file_name in file_names: + if file_name[0] == '.': continue + gender, age, camid, timestamp, uniqueid = file_name.split('_') + # ! uniqueid (unique_id + '.jpg') + if hasDistractor: + if int(pid) in distractor_pids: + distractor_pairs.append([root + '/' + file_name, pid + '_' + camid + '_' + uniqueid]) + else: + path2newname_mapper[root + '/' + file_name] = pid + '_' + camid + '_' + uniqueid + else: + path2newname_mapper[root + '/' + file_name] = pid + '_' + camid + '_' + uniqueid + +# split train, test, distractor +# stratify between train and test +labels = [int(file_name.split('_')[0]) - 1 for file_name in path2newname_mapper.values()] + +print('The number of ids : ', num_people) +print(len(list(path2newname_mapper.keys()))) +print('-' * 30) +print(len(labels)) + +pairs = np.array([[img_path, file_name] for img_path, file_name in path2newname_mapper.items()]) +train_pairs, test_pairs, train_labels, test_labels = train_test_split(pairs, labels, stratify=labels, test_size=0.64) +gallery_pairs, query_pairs, gallery_labels, query_labels = train_test_split(test_pairs, test_labels, stratify=test_labels, test_size=0.15 ) + +train_uniques = np.unique(np.array(train_labels)) +gallery_uniques = np.unique(np.array(gallery_labels)) +query_uniques = np.unique(np.array(gallery_labels)) +print('train unqiue id :', len(train_uniques)) +print('gallery unqiue id :', len(gallery_uniques)) +print('query unique id : ', len(query_uniques)) + +with open(dst_dir_path + '/' + 'train_uniques.txt', 'w') as f: + for i in list(train_uniques): + f.write(str(i) + '\n') +with open(dst_dir_path + '/' + 'gallery_uniques.txt', 'w') as f: + for i in list(gallery_uniques): + f.write(str(i) + '\n') +with open(dst_dir_path + '/' + 'query_uniques.txt', 'w') as f: + for i in list(query_uniques): + f.write(str(i) + '\n') +if hasDistractor: + with open(dst_dir_path + '/' + 'distractor_uniques.txt', 'w') as f: + for i in distractor_uniques: + f.write(str(i) + '\n') + +for img_path, file_name in tqdm(train_pairs): + shutil.copyfile(img_path, train_dir_path + '/' + file_name) +for img_path, file_name in tqdm(gallery_pairs): + shutil.copyfile(img_path, gallery_dir_path + '/' + file_name) +for img_path, file_name in tqdm(query_pairs): + shutil.copyfile(img_path, query_dir_path + '/' + file_name) +if hasDistractor: + for img_path, file_name in tqdm(distractor_pairs): + shutil.copyfile(img_path, distractor_dir_path + '/' + file_name) diff --git a/tools/oxygen-data-preparation/utils.py b/tools/oxygen-data-preparation/utils.py new file mode 100644 index 0000000..e630736 --- /dev/null +++ b/tools/oxygen-data-preparation/utils.py @@ -0,0 +1,6 @@ +import os + + +def create_dir(path): + if not os.path.exists(path): + os.makedirs(path) \ No newline at end of file diff --git a/tools/test.py b/tools/test.py index 7780ef7..c6bd0d7 100644 --- a/tools/test.py +++ b/tools/test.py @@ -20,15 +20,17 @@ def create_supervised_evaluator(model, metrics, device=None): Returns: Engine: an evaluator engine with supervised inference function """ - + def _inference(engine, batch): model.eval() + # setting requires_grad flag to false with torch.no_grad(): data, pids, camids = batch + # data = data.to(device) if torch.cuda.device_count() >= 1 else data feat = model(data) return feat, pids, camids - + # Engine is the abstraction refering to the loop provided data and execute func and return result engine = Engine(_inference) for name, metric in metrics.items(): @@ -47,6 +49,7 @@ def do_test( logger = logging.getLogger("reid_baseline") logger.info("Enter inferencing") + # TRY : By default reranking is off if cfg.TEST.RE_RANKING == 'off': print("Create evaluator") evaluator = create_supervised_evaluator(model, metrics={'r1_mAP_mINP': r1_mAP_mINP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, diff --git a/tools/train.py b/tools/train.py index 6e281a9..8fc84cf 100644 --- a/tools/train.py +++ b/tools/train.py @@ -4,6 +4,7 @@ import torch import torch.nn as nn +from torch.utils.tensorboard import SummaryWriter from ignite.engine import Engine, Events from ignite.handlers import ModelCheckpoint, Timer from ignite.metrics import RunningAverage @@ -78,14 +79,24 @@ def do_train( logger = logging.getLogger("reid_baseline") logger.info("Start training") - trainer = create_supervised_trainer(model, optimizer, criterion, cfg.SOLVER.CENTER_LOSS_WEIGHT, device=device) + writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR + '/writer') + + if cfg.SOLVER.CENTER_LOSS.USE: + trainer = create_supervised_trainer(model, optimizer, criterion, cfg.SOLVER.CENTER_LOSS.WEIGHT, device=device) + else: + trainer = create_supervised_trainer(model, optimizer, criterion, device=device) evaluator = create_supervised_evaluator(model, metrics={'r1_mAP_mINP': r1_mAP_mINP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) - checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) - trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, - 'optimizer': optimizer['model'], - 'center_param': criterion['center'], - 'optimizer_center': optimizer['center']}) + checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.BACKBONE, checkpoint_period, n_saved=10, require_empty=False) + if cfg.SOLVER.CENTER_LOSS.USE: + trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, + 'optimizer': optimizer['model'], + 'center_param': criterion['center'], + 'optimizer_center': optimizer['center']}) + else: + trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, + 'optimizer': optimizer['model'] + }) timer = Timer(average=True) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) @@ -112,6 +123,8 @@ def log_training_loss(engine): .format(engine.state.epoch, ITER, len(data_loader['train']), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) + writer.add_scalar('loss/train_loss', engine.state.metrics['avg_loss'], engine.state.epoch * len(data_loader['train']) + ITER) + writer.add_scalar('acc/train_acc', engine.state.metrics['avg_acc'], engine.state.epoch * len(data_loader['train']) + ITER) if len(data_loader['train']) == ITER: ITER = 0 @@ -132,7 +145,12 @@ def log_validation_results(engine): logger.info("Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mINP: {:.1%}".format(mINP)) logger.info("mAP: {:.1%}".format(mAP)) + + writer.add_scalar('validation_metrics/mINP', mINP, engine.state.epoch * len(data_loader['train'])) + writer.add_scalar('validation_metrics/mAP', mAP, engine.state.epoch * len(data_loader['train'])) + for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) + writer.add_scalar('validation_metrics/Rank-{}'.format(r), cmc[r - 1], engine.state.epoch * len(data_loader['train'])) trainer.run(data_loader['train'], max_epochs=epochs) diff --git a/tools/visualize.py b/tools/visualize.py new file mode 100644 index 0000000..965e471 --- /dev/null +++ b/tools/visualize.py @@ -0,0 +1,215 @@ +# encoding: utf-8 +import logging +import torchvision +import torch +import torch.nn as nn +from ignite.engine import Engine, Events +from utils.reid_metric import r1_mAP_mINP, r1_mAP_mINP_reranking +from ignite.handlers import Timer +import matplotlib.pyplot as plt +import numpy as np +import pickle +import os +import tqdm + +global ITER +ITER = 0 +query_feat = [] +query_cam = [] +query_label = [] +gallery_feat = [] +gallery_cam = [] +gallery_label = [] +from torch.utils.tensorboard import SummaryWriter + +def create_feature_extractor(model, device=None): + """ + Factory function for creating an evaluator for supervised models + + Args: + model (`torch.nn.Module`): the model to evaluate + metrics (dict of str - :class:`ignite.metrics.Metric`): a map of metric names to Metrics + device (str, optional): device type specification (default: None). + Applies to both model and batches. + Returns: + Engine: an evaluator engine with supervised inference function + """ + + def _inference(engine, batch): + global ITER + model.eval() + with torch.no_grad(): + data, pids, camids = batch + data = data.to(device) if torch.cuda.device_count() >= 1 else data + + feat = model(data) + # print('shape {}'.format(feat.shape)) + return feat, pids, camids + + engine = Engine(_inference) + + # visualize does not have to calculate metrics + # for name, metric in metrics.items(): + # metric.attach(engine, name) + + return engine + + +def do_visualize( + cfg, + model, + data_loader, + num_query +): + if ( not os.path.exists('./log/{}/feature-pickle.pkl'.format(cfg.DATASETS.NAMES)) or cfg.VISUALIZE.NEED_NEW_FEAT_EMBED == "on" ) : + print("compute new feature embedding") + global query_feat, query_cam, query_label + global gallery_feat, gallery_cam, gallery_label + # writer = SummaryWriter('./log/{}/Experiment-AGW-baseline/test_image').format(cfg.DATASET.NAMES) + device = cfg.MODEL.DEVICE + logger = logging.getLogger("reid_baseline") + logger.info("Enter inferencing to visualize") + + print("Create query engine and gallery engine to make feature extractor") + query_engine = create_feature_extractor(model, + device=device) + gallery_engine = create_feature_extractor(model, + device=device) + + # timer = Timer(average=True) + # timer.attach(query_engine, pause=Events.ITERATION_COMPLETED) + + @gallery_engine.on(Events.ITERATION_COMPLETED) + def append_result_gal(engine) : + global gallery_feat, gallery_cam, gallery_label + global ITER + ITER += 1 + gallery_feat.append(gallery_engine.state.output[0]) + gallery_cam.extend(gallery_engine.state.output[2]) + gallery_label.extend(gallery_engine.state.output[1]) + # logger.info("Epoch[{}] Iteration[{}/{}] output shape : {}" + # .format(engine.state.epoch, ITER, len(data_loader['query']), query_engine.state.output[0].shape)) + + @query_engine.on(Events.ITERATION_COMPLETED) + def append_result_query(engine) : + global query_feat, query_cam, query_label + global ITER + ITER += 1 + query_feat.append(query_engine.state.output[0]) + query_cam.extend(query_engine.state.output[2]) + query_label.extend(query_engine.state.output[1]) + #Show result + + query_engine.run(data_loader['query']) + # print(torch.cat(query_feat).shape) + gallery_engine.run(data_loader['gallery']) + # print(torch.cat(gallery_feat).shape) + + query_feature = torch.cat(query_feat) + gallery_feature = torch.cat(gallery_feat) + + # -------------------- visualize step ---------------------------------- + print(query_feature.shape) + print(gallery_feature.shape) + with open("./log/{}/feature-pickle.pkl".format(cfg.DATASETS.NAMES), "wb") as fout: + feat_dump_obj = { + "query" : { + "feat" : query_feature, + "id" : query_label, + "cam" : query_cam + }, + "gallery" : { + "feat" : gallery_feature, + "id" : gallery_label, + "cam" : gallery_cam + } + } + pickle.dump(feat_dump_obj, fout, protocol=pickle.HIGHEST_PROTOCOL) + else : + with open("./log/{}/feature-pickle.pkl".format(cfg.DATASETS.NAMES), "rb") as fout: + feat_dump_obj = pickle.load(fout) + query_feature = feat_dump_obj["query"]["feat"] + query_label = feat_dump_obj["query"]["id"] + query_cam = feat_dump_obj["query"]["cam"] + gallery_feature = feat_dump_obj["gallery"]["feat"] + gallery_label = feat_dump_obj["gallery"]["id"] + gallery_cam = feat_dump_obj["gallery"]["cam"] + query_feature = query_feature.cuda() + gallery_feature = gallery_feature.cuda() + ####################################################################### + # sort the images + def sort_img(qf, ql, qc, gf, gl, gc): + query = qf.view(-1,1) + # print(query.shape) + score = torch.mm(gf,query) + # tensor.cuda() is used to move a tensor to GPU memory. + # tensor.cpu() moves it back to memory accessible to the CPU. + score = score.squeeze(1).cpu() + score = score.numpy() + # predict index + index = np.argsort(score) #from small to large + index = index[::-1] + # index = index[0:2000] + # not counting image from the same iden in the same cam + # good index : query label equal to gallery label + query_index = np.argwhere(gl==ql) + #same camera + camera_index = np.argwhere(gc==qc) + # here + #good_index = np.setdiff1d(query_index, camera_index, assume_unique=True) + junk_index1 = np.argwhere(gl==-1) + junk_index2 = np.intersect1d(query_index, camera_index) + junk_index = np.append(junk_index2, junk_index1) + # stuck + mask = np.in1d(index, junk_index, invert=True) + index = index[mask] + return index + def imshow(path, ax,title=None): + """Imshow for Tensor.""" + im = plt.imread(path) + ax.imshow(im) + if title is not None: + ax.set_title(title) + def make_query(i) : + query_ind = i + index = sort_img(query_feature[i],query_label[i],query_cam[i],gallery_feature,gallery_label,gallery_cam) + ######################################################################## + # Visualize the rank result + _, _, _, query_path = data_loader['query'].dataset[i] + query_lb = query_label[i] + # print('Top 10 images are as follow:') + try: # Visualize Ranking Result + # Graphical User Interface is needed + fig = plt.figure(figsize=(16,4)) + ax = fig.add_subplot(1,11,1) + ax.axis('off') + imshow(query_path,ax,'query') + # show top 10 + for i in range(10): + ax = fig.add_subplot(1,11,i+2) + ax.axis('off') + gallery_img, _, _, img_path = data_loader['gallery'].dataset[index[i]] + label = gallery_label[index[i]] + imshow(img_path, ax) + if label == query_lb: + ax.set_title('%d'%(i+1), color='green') + else: + ax.set_title('%d'%(i+1), color='red') + except RuntimeError: + for i in range(10): + img_path = data_loader['gallery'].dataset[index[i]][-1] + print(img_path[0]) + print('If you want to see the visualization of the ranking result, graphical user interface is needed.') + # plt.show() + fig.savefig("./log/{}/query_image/show_{}.png".format(cfg.DATASETS.NAMES,query_ind)) + return fig + i = cfg.VISUALIZE.INDEX + if i<0 : + # print("kaboom") + query_size = len(data_loader["query"].dataset) + for i in tqdm.tqdm(range(query_size)) : + # print(i) + make_query(i) + else : + fig = make_query(i) + fig.savefig("./log/{}/show.png".format(cfg.DATASETS.NAMES)) diff --git a/tools/visualize_no_label.py b/tools/visualize_no_label.py new file mode 100644 index 0000000..9a84c00 --- /dev/null +++ b/tools/visualize_no_label.py @@ -0,0 +1,236 @@ +# encoding: utf-8 +import logging +import torchvision +import torch +import torch.nn as nn +from ignite.engine import Engine, Events +from utils.reid_metric import r1_mAP_mINP, r1_mAP_mINP_reranking +from ignite.handlers import Timer +import matplotlib.pyplot as plt +import numpy as np +import pickle +import os +from utils.re_ranking import re_ranking_no_label +from utils.re_ranking_batch import re_ranking_batch_gpu + +global ITER +ITER = 0 +gallery_feat = [] +gallery_cam = [] +gallery_date = [] +from torch.utils.tensorboard import SummaryWriter + +def create_feature_extractor(model, device=None): + """ + Factory function for creating an evaluator for supervised models + + Args: + model (`torch.nn.Module`): the model to evaluate + device (str, optional): device type specification (default: None). + Applies to both model and batches. + Returns: + Engine: an evaluator engine with supervised inference function + """ + + def _inference(engine, batch): + global ITER + model.eval() + with torch.no_grad(): + data, camid, date = batch + data = data.to(device) if torch.cuda.device_count() >= 1 else data + + feat = model(data) + # print('shape {}'.format(feat.shape)) + return feat, camid, date + + engine = Engine(_inference) + + return engine + + +def do_visualize_no_label( + cfg, + model, + data_loader +): + if ( not os.path.exists('./log/{}/feature-pickle.pkl'.format(cfg.DATASETS.NAMES)) or cfg.VISUALIZE.NEED_NEW_FEAT_EMBED == "on" ) : + print("compute new feature embedding") + global gallery_feat, gallery_cam, gallery_date + device = cfg.MODEL.DEVICE + logger = logging.getLogger("reid_baseline") + logger.info("Enter inferencing to visualize no label data") + + print("Create gallery engine to make feature extractor") + gallery_engine = create_feature_extractor(model, + device=device) + + + @gallery_engine.on(Events.ITERATION_COMPLETED) + def append_result_gal(engine) : + global gallery_feat, gallery_cam, gallery_date + global ITER + ITER += 1 + gallery_feat.append(gallery_engine.state.output[0]) + gallery_cam.extend(gallery_engine.state.output[1]) + gallery_date.extend(gallery_engine.state.output[2]) + logger.info("Epoch[{}] Iteration[{}/{}] output shape : {}" + .format(engine.state.epoch, ITER, len(data_loader['gallery']), gallery_engine.state.output[0].shape)) + + #Show result + gallery_engine.run(data_loader['gallery']) + # print(type(gallery_feat)) + gallery_feature = torch.cat(gallery_feat) + + # -------------------- visualize step ---------------------------------- + # print(gallery_feature.shape) + if(not os.path.isdir("./log/{}".format(cfg.DATASETS.NAMES))) : + creating_directory = "./log/{}".format(cfg.DATASETS.NAMES) + os.mkdir(creating_directory) + with open("./log/{}/feature-pickle.pkl".format(cfg.DATASETS.NAMES), "wb") as fout: + feat_dump_obj = { + "gallery" : { + "feat" : gallery_feature, + "cam" : gallery_cam, + "date" : gallery_date + } + } + pickle.dump(feat_dump_obj, fout, protocol=pickle.HIGHEST_PROTOCOL) + else : + with open("./log/{}/feature-pickle.pkl".format(cfg.DATASETS.NAMES), "rb") as fout: + feat_dump_obj = pickle.load(fout) + gallery_feature = feat_dump_obj["gallery"]["feat"] + gallery_cam = feat_dump_obj["gallery"]["cam"] + galelry_date = feat_dump_obj["gallery"]["date"] + gallery_feature = gallery_feature.cuda() + ####################################################################### + # sort the images by cosine similarity matrix score + # TODO qf - gallery_feat[i], ql -> qc , qc -> qd, gl -> gc, gc -> gd + def sort_img(qf, qc, qd, gf, gc, gd,ignore_index=None): + query = qf.view(-1,1) + gf = torch.cat([gf[0:ignore_index], gf[ignore_index+1:]]) + score = torch.mm(gf,query) + # tensor.cuda() is used to move a tensor to GPU memory. + # tensor.cpu() moves it back to memory accessible to the CPU. + score = score.squeeze(1).cpu() + score = score.numpy() + # predict index + index = np.argsort(score) #from small to large + index = index[::-1] + # TODO NOMASK FOR NOW + # TODO MASK FOR ITSELF + # # not counting image from the same iden in the same cam + # good index : query label equal to gallery label + # query_index = np.argwhere(gl==ql) + # diff date + date_index = np.argwhere(gd==qd) + # same camera + camera_index = np.argwhere(gc==qc) + + #good_index = np.setdiff1d(query_index, camera_index, assume_unique=True) + # junk_index1 = np.argwhere(gl==-1) + junk_index2 = np.intersect1d(date_index, camera_index) + junk_index = np.append(junk_index2, junk_index1) + # stuck + mask = np.in1d(index, junk_index, invert=True) + index = index[mask] + return index + def imshow(path, ax,title=None): + """Imshow for Tensor.""" + im = plt.imread(path) + ax.imshow(im) + if title is not None: + ax.set_title(title) + def extract_iden(path) : + return path.split("\\")[-1].split("_")[-1].split("-")[0] + # TODO FIX QUERY + def make_query(i,re_rank=False,reranking_list=None,cam_option=None) : + query_ind = i + re_rank_str = "" + if not re_rank : + index = sort_img(gallery_feature[i],gallery_cam[i],gallery_date[i],gallery_feature,gallery_cam,gallery_date,ignore_index=i) + else : + re_rank_str = "_re_rank_" + index = list(reranking_list[query_ind]) + index= list(index) + # print(query_ind) + # print(index[:20]) + print('index : ', index) + print('query index : ', query_ind) + index.remove(query_ind) + # print(index[:20]) + # exit() + ######################################################################## + # Visualize the rank result + iden_set = set() + _, _, _, query_path = data_loader['gallery'].dataset[query_ind] + query_iden = extract_iden(query_path) + iden_set.add(query_iden) + # print(query_path.split("\\")[-1].split("_")[-1].spilt("-")[0]) + # query_lb = query_label[i] + # print('Top 10 images are as follow:') + try: # Visualize Ranking Result + # Graphical User Interface is needed + fig = plt.figure(figsize=(16,4)) + ax = fig.add_subplot(1,11,1) + ax.axis('off') + imshow(query_path,ax,'query '+query_iden) + # show top 10 + ind = 0 + index_ind = 0 + while ind < 10: + _, _, _, img_path = data_loader['gallery'].dataset[index[index_ind]] + index_ind += 1 + img_iden = extract_iden(img_path) + if img_iden not in iden_set : + iden_set.add(img_iden) + else : + continue + ax = fig.add_subplot(1,11,ind+2) + ax.axis('off') + # print(index[ind]) + # print(query_path==img_path) + imshow(img_path, ax,str(ind+1)+"_"+img_iden) + ind += 1 + # ax.set_title('%d'%(ind+1)) + except RuntimeError: + for i in range(10): + img_path = data_loader['gallery'].dataset[index[i]][-1] + print('If you want to see the visualization of the ranking result, graphical user interface is needed.') + # plt.show() + if(not os.path.isdir("./log/{}/query_image".format(cfg.DATASETS.NAMES))) : + creating_directory = "./log/{}/query_image".format(cfg.DATASETS.NAMES) + os.mkdir(creating_directory) + if cam_option == "none" : + cam_option="" + fig.savefig("./log/{}/query_image/show_{}{}{}.png".format(cfg.DATASETS.NAMES,query_ind,re_rank_str,cam_option)) + return fig,re_rank_str + i = cfg.VISUALIZE.INDEX + # query all image in gallery + is_re_rank = False + if cfg.VISUALIZE.RE_RANK == "on" : + # print(gallery_feature) + # print(gallery_feature.shape) + # gallery_feature = gallery_feature.view(-1,1) + + # ? version entire data + # reranking_list = re_ranking_no_label(gallery_feature,k1=20,k2=6,lambda_value=0.3) + # ? version batch version + reranking_list = re_ranking_batch_gpu(torch.cat([gallery_feature]), + len(gallery_feature), + k1=20, k2=6, lambda_value=0.3, + len_slice=1000) + + is_re_rank = True + cam_option = cfg.VISUALIZE.CAM_OPTION + print("start querying") + if i<0 : + query_size = len(data_loader["gallery"].dataset) + for i in range(query_size) : + make_query(i, is_re_rank,reranking_list,cam_option=cam_option) + else : + print('gallery_feature shape : ', gallery_feature.shape) + print('i : ', i) + print('is_re_rank : ', is_re_rank) + print('reranking_list : ', reranking_list) + fig, re_rank_str = make_query(i, is_re_rank, reranking_list) + fig.savefig("./log/{}/show{}.png".format(cfg.DATASETS.NAMES,re_rank_str)) \ No newline at end of file diff --git a/utils/re_ranking.py b/utils/re_ranking.py index 9bb15a6..0206139 100644 --- a/utils/re_ranking.py +++ b/utils/re_ranking.py @@ -10,7 +10,7 @@ API probFea: all feature vectors of the query set (torch tensor) -probFea: all feature vectors of the gallery set (torch tensor) +galFea: all feature vectors of the gallery set (torch tensor) k1,k2,lambda: parameters, the original paper is (k1=20,k2=6,lambda=0.3) MemorySave: set to 'True' when using MemorySave mode Minibatch: avaliable when 'MemorySave' is 'True' @@ -19,9 +19,9 @@ import numpy as np import torch - def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat=None, only_local=False): # if feature vector is numpy, you should use 'torch.tensor' transform it to tensor + query_num = probFea.size(0) all_num = query_num + galFea.size(0) if only_local: @@ -32,6 +32,7 @@ def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat=None, only_l distmat = torch.pow(feat,2).sum(dim=1, keepdim=True).expand(all_num,all_num) + \ torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num).t() distmat.addmm_(1,-2,feat,feat.t()) + # 1* distmat - 2 *(feat) original_dist = distmat.cpu().numpy() del feat if not local_distmat is None: @@ -59,7 +60,7 @@ def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat=None, only_l if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len( candidate_k_reciprocal_index): k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index) - + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) weight = np.exp(-original_dist[i, k_reciprocal_expansion_index]) V[i, k_reciprocal_expansion_index] = weight / np.sum(weight) @@ -93,3 +94,84 @@ def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat=None, only_l final_dist = final_dist[:query_num, query_num:] return final_dist +def re_ranking_no_label(probFea, k1, k2, lambda_value, local_distmat=None, only_local=False): + # if feature vector is numpy, you should use 'torch.tensor' transform it to tensor + + query_num = probFea.size(0) + all_num = query_num + if only_local: + original_dist = local_distmat + else: + feat = torch.cat([probFea]) + print('using GPU to compute original distance') + distmat = torch.pow(feat,2).sum(dim=1, keepdim=True).expand(all_num,all_num) + \ + torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num).t() + distmat.addmm_(1,-2,feat,feat.t()) + original_dist = distmat.cpu().numpy() + del feat + if not local_distmat is None: + original_dist = original_dist + local_distmat + gallery_num = original_dist.shape[0] + original_dist = np.transpose(original_dist / np.max(original_dist, axis=0)) + V = np.zeros_like(original_dist).astype(np.float16) + initial_rank = np.argsort(original_dist).astype(np.int32) + # print(initial_rank) + print('starting re_ranking') + for i in range(all_num): + # k-reciprocal neighbors + forward_k_neigh_index = initial_rank[i, :k1 + 1] + backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1] + fi = np.where(backward_k_neigh_index == i)[0] + k_reciprocal_index = forward_k_neigh_index[fi] + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2)) + 1] + candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index, + :int(np.around(k1 / 2)) + 1] + fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] + candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] + if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len( + candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index) + + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + weight = np.exp(-original_dist[i, k_reciprocal_expansion_index]) + V[i, k_reciprocal_expansion_index] = weight / np.sum(weight) + original_dist = original_dist[:query_num, ] + if k2 != 1: + V_qe = np.zeros_like(V, dtype=np.float16) + for i in range(all_num): + V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0) + V = V_qe + del V_qe + del initial_rank + invIndex = [] + for i in range(gallery_num): + invIndex.append(np.where(V[:, i] != 0)[0]) + jaccard_dist = np.zeros_like(original_dist, dtype=np.float16) + for i in range(query_num): + temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float16) + indNonZero = np.where(V[i, :] != 0)[0] + indImages = [invIndex[ind] for ind in indNonZero] + for j in range(len(indNonZero)): + temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]], + V[indImages[j], indNonZero[j]]) + jaccard_dist[i] = 1 - temp_min / (2 - temp_min) + # print(jaccard_dist) + final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value + del original_dist + del V + del jaccard_dist + # print(final_dist) + # final_dist = final_dist[:query_num, query_num:] + + # num_q, num_g = final_dist.shape + # # if num_g < max_rank: + # # max_rank = num_g + # # print("Note: number of gallery samples is quite small, got {}".format(num_g)) + indices = np.argsort(final_dist, axis=1) + return indices + + + diff --git a/utils/re_ranking_batch.py b/utils/re_ranking_batch.py new file mode 100644 index 0000000..0c7df2b --- /dev/null +++ b/utils/re_ranking_batch.py @@ -0,0 +1,253 @@ +import numpy as np +from scipy import sparse +import torch +import time +from tqdm import tqdm + +from evaluate import eval_func, euclidean_dist + +def calculate_V(initial_rank, all_feature_len, dis_i_qg, i, k1): + # dis_i_qg = euclidean_dist(torch.tensor([all_feature[i].numpy()]), all_feature).numpy() + + forward_k_neigh_index = initial_rank[i, :k1 + 1] + # print(forward_k_neigh_index) + backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1] + + fi = np.where(backward_k_neigh_index == i)[0] + k_reciprocal_index = forward_k_neigh_index[fi] + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2.)) + 1] + candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index, + :int(np.around(k1 / 2.)) + 1] + fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] + candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] + if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2. / 3 * len( + candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index) + + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + # print(k_reciprocal_expansion_index) + weight = np.exp(-dis_i_qg[k_reciprocal_expansion_index]) + # print(weight) + V = np.zeros(( all_feature_len)).astype(np.float32) + V[k_reciprocal_expansion_index] = 1. * weight / np.sum(weight) + return V, k_reciprocal_expansion_index, weight + + +def re_ranking_batch(all_feature, q_num, k1, k2, lambda_value, len_slice=1000): + + # calculate (q+g)*(q+g) + initial_rank = np.zeros((len(all_feature), k1+1)).astype(np.int32) + + original_dist = np.zeros((q_num, len(all_feature))) + + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(all_feature[i*len_slice:(i+1)*len_slice], all_feature).data.cpu().numpy() + initial_i_rank = np.argpartition(dis_i_qg, range(1, k1 + 1), ).astype(np.int32)[:, :k1 + 1] + initial_rank[i*len_slice:(i+1)*len_slice] = initial_i_rank + pbar.update(1) + # print(initial_rank[0]) + + end_time = time.time() + print("rank time : %s" % (end_time-s_time)) + + all_V = [] + + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(all_feature[i * len_slice:(i + 1) * len_slice], all_feature).data.cpu().numpy() + for ks in range(dis_i_qg.shape[0]): + r_k = i*len_slice+ks + dis_i_qg[ks] = np.power(dis_i_qg[ks], 2).astype(np.float32) + dis_i_qg[ks] = 1. * dis_i_qg[ks] / np.max(dis_i_qg[ks]) + if r_k < q_num: + original_dist[r_k] = dis_i_qg[ks] + V ,k_reciprocal_expansion_index, weight = calculate_V(initial_rank, len(all_feature), dis_i_qg[ks], r_k, k1) + # if r_k == 0: + # print(k_reciprocal_expansion_index) + # print(weight) + # print(dis_i_qg[ks]) + all_V.append(sparse.csr_matrix(V)) + + pbar.update(1) + + all_V = sparse.vstack(all_V) + # print(all_V.getrow(0).toarray()) + end_time = time.time() + print("calculate V time : %s" % (end_time - s_time)) + # print(all_V.todense()[0]) + + all_V_qe = [] + s_time = time.time() + for i in range(len(all_feature)): + temp_V = np.zeros((k2, len(all_feature))) + for l, row_index in enumerate(initial_rank[i, :k2]): + temp_V[l, :] = all_V.getrow(row_index).toarray()[0] + + + V_qe = np.mean(temp_V, axis=0) + all_V_qe.append(sparse.csr_matrix(V_qe)) + all_V_qe = sparse.vstack(all_V_qe) + # print(all_V_qe.todense()[0]) + del all_V + end_time = time.time() + print("calculate V_qe time : %s" % (end_time - s_time)) + + invIndex = [] + for i in range(len(all_feature)): + invIndex.append(np.where(all_V_qe.getcol(i).toarray().transpose()[0] != 0)[0]) + jaccard_dist = np.zeros_like(original_dist, dtype=np.float32) + + for i in range(q_num): + temp_min = np.zeros(shape=[1, len(all_feature)], dtype=np.float32) + + indNonZero = np.where(all_V_qe.getrow(i).toarray()[0] != 0)[0] + + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + # print(indImages) + for j in range(len(indNonZero)): + # print(indNonZero[j]) + c = all_V_qe.getrow(i).getcol(indNonZero[j]).toarray()[0, 0] + # print(c) + # print(indImages[j]) + + t_min = np.zeros((indImages[j].shape[0])) + for kk in range(indImages[j].shape[0]): + temp_d = all_V_qe.getrow(indImages[j][kk]).getcol(indNonZero[j]).toarray()[0, 0] + t_min[kk] = np.minimum(c, temp_d) + # print(t_min) + + temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + t_min + # temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]], + # V[indImages[j], indNonZero[j]]) + jaccard_dist[i] = 1 - temp_min / (2. - temp_min) + # print(jaccard_dist[0]) + # print(original_dist[0]) + final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value + del original_dist + del all_V_qe + del jaccard_dist + final_dist = final_dist[:q_num, q_num:] + return final_dist + +def re_ranking_batch_gpu(all_feature, q_num, k1, k2, lambda_value, len_slice=1000): + + # calculate (q+g)*(q+g) + initial_rank = np.zeros((len(all_feature), k1+1)).astype(np.int32) + + original_dist = np.zeros((q_num, len(all_feature))) + gpu_features = all_feature.cuda() + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(gpu_features[i*len_slice:(i+1)*len_slice], gpu_features).data.cpu().numpy() + initial_i_rank = np.argpartition(dis_i_qg, range(1, k1 + 1), ).astype(np.int32)[:, :k1 + 1] + initial_rank[i*len_slice:(i+1)*len_slice] = initial_i_rank + pbar.update(1) + # print(initial_rank[0]) + + end_time = time.time() + print("rank time : %s" % (end_time-s_time)) + + all_V = [] + + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(gpu_features[i * len_slice:(i + 1) * len_slice], gpu_features).data.cpu().numpy() + for ks in range(dis_i_qg.shape[0]): + r_k = i*len_slice+ks + dis_i_qg[ks] = np.power(dis_i_qg[ks], 2).astype(np.float32) + dis_i_qg[ks] = 1. * dis_i_qg[ks] / np.max(dis_i_qg[ks]) + if r_k < q_num: + original_dist[r_k] = dis_i_qg[ks] + V ,k_reciprocal_expansion_index, weight = calculate_V(initial_rank, len(all_feature), dis_i_qg[ks], r_k, k1) + # if r_k == 0: + # print(k_reciprocal_expansion_index) + # print(weight) + # print(dis_i_qg[ks]) + all_V.append(sparse.csr_matrix(V)) + + pbar.update(1) + + all_V = sparse.vstack(all_V) + # print(all_V.getrow(0).toarray()) + end_time = time.time() + print("calculate V time : %s" % (end_time - s_time)) + # print(all_V.todense()[0]) + + all_V_qe = [] + s_time = time.time() + for i in range(len(all_feature)): + temp_V = np.zeros((k2, len(all_feature))) + for l, row_index in enumerate(initial_rank[i, :k2]): + temp_V[l, :] = all_V.getrow(row_index).toarray()[0] + + + V_qe = np.mean(temp_V, axis=0) + all_V_qe.append(sparse.csr_matrix(V_qe)) + all_V_qe = sparse.vstack(all_V_qe) + # print(all_V_qe.todense()[0]) + del all_V + end_time = time.time() + print("calculate V_qe time : %s" % (end_time - s_time)) + + invIndex = [] + for i in range(len(all_feature)): + invIndex.append(np.where(all_V_qe.getcol(i).toarray().transpose()[0] != 0)[0]) + jaccard_dist = np.zeros_like(original_dist, dtype=np.float32) + + with tqdm(total=q_num) as pbar: + for i in range(q_num): + temp_min = np.zeros(shape=[1, len(all_feature)], dtype=np.float32) + + indNonZero = np.where(all_V_qe.getrow(i).toarray()[0] != 0)[0] + + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + # print(indImages) + for j in range(len(indNonZero)): + # print(indNonZero[j]) + c = all_V_qe.getrow(i).getcol(indNonZero[j]).toarray()[0, 0] + # print(c) + # print(indImages[j]) + + t_min = np.zeros((indImages[j].shape[0])) + for kk in range(indImages[j].shape[0]): + temp_d = all_V_qe.getrow(indImages[j][kk]).getcol(indNonZero[j]).toarray()[0, 0] + t_min[kk] = np.minimum(c, temp_d) + # print(t_min) + + temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + t_min + # temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]], + # V[indImages[j], indNonZero[j]]) + jaccard_dist[i] = 1 - temp_min / (2. - temp_min) + pbar.update(1) + # print(jaccard_dist[0]) + # print(original_dist[0]) + final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value + del original_dist + del all_V_qe + del jaccard_dist + final_dist = final_dist[:q_num, q_num:] + return final_dist diff --git a/utils/reid_metric.py b/utils/reid_metric.py index 505fac7..27efa53 100644 --- a/utils/reid_metric.py +++ b/utils/reid_metric.py @@ -45,10 +45,8 @@ def compute(self): distmat.addmm_(1, -2, qf, gf.t()) distmat = distmat.cpu().numpy() cmc, mAP, mINP = eval_func(distmat, q_pids, g_pids, q_camids, g_camids) - return cmc, mAP, mINP - class r1_mAP_mINP_reranking(Metric): def __init__(self, num_query, max_rank=50, feat_norm='on'): super(r1_mAP_mINP_reranking, self).__init__() diff --git a/visualize.sh b/visualize.sh new file mode 100644 index 0000000..88c8410 --- /dev/null +++ b/visualize.sh @@ -0,0 +1,6 @@ +python ./tools/main.py --config_file='configs/AGW_baseline.yml' \ + VISUALIZE.INDEX "(10)" MODEL.DEVICE_ID "('0')" DATASETS.NAMES "('market1501')" \ + MODEL.PRETRAIN_CHOICE "('self')" \ + TEST.WEIGHT "('./log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth')" \ + VISUALIZE.OPTION "('on')" \ + OUTPUT_DIR "('./log/Test')" \ No newline at end of file