forked from zhutmost/lsq-net
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
157 lines (134 loc) · 4.1 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#===============================================================================
# Default Configuration for LSQ-Net
#===============================================================================
# Please do NOT modify this file directly. If you want to modify configurations,
# please:
# 1. Create a new YAML file and copy some bellowing options to it.
# 2. Modify these options in your YAML file.
# 3. run main.py with your configuration file in the command line, like this:
# $ python main.py path/to/your/config/file
# The options modified in your configuration file will overwrite those in this
# file.
#============================ Environment ======================================
# Experiment name
name: MyProject
# Name of output directory. Checkpoints and logs will be saved at `pwd`/output_dir
output_dir: out
# Device to be used
device:
# Use CPU or GPU (choices: cpu, cuda)
type: cuda
# GPU device IDs to be used. Only valid when device.type is 'cuda'
gpu: [0, 1]
# Dataset loader
dataloader:
# Dataset to train/validate (choices: imagenet, cifar10)
dataset: imagenet
# Number of categories in the specified dataset (choices: 1000, 10)
num_classes: 1000
# Path to dataset directory
path: /localhome/fair/Dataset/imagenet
# Size of mini-batch
batch_size: 64
# Number of data loading workers
workers: 32
# Seeds random generators in a deterministic way (i.e., set all the seeds 0).
# Please keep it true when resuming the experiment from a checkpoint
deterministic: true
# Load the model without DataParallel wrapping it
serialized: false
# Portion of training dataset to set aside for validation (range: [0, 1))
val_split: 0.05
resume:
# Path to a checkpoint to be loaded. Leave blank to skip
path:
# Resume model parameters only
lean: false
log:
# Number of best scores to track and report
num_best_scores: 3
# Print frequency
print_freq: 20
#============================ Model ============================================
# Supported model architecture
# choices:
# ImageNet:
# resnet18, resnet34, resnet50, resnet101, resnet152
# CIFAR10:
# resnet20, resnet32, resnet44, resnet56, resnet110, resnet1202
arch: resnet18
# Use pre-trained model
pre_trained: true
#============================ Quantization =====================================
quan:
act: # (default for all layers)
# Quantizer type (choices: lsq)
mode: lsq
# Bit width of quantized activation
bit: 3
# Each output channel uses its own scaling factor
per_channel: false
# Whether to use symmetric quantization
symmetric: false
# Quantize all the numbers to non-negative
all_positive: true
weight: # (default for all layers)
# Quantizer type (choices: lsq)
mode: lsq
# Bit width of quantized weight
bit: 3
# Each output channel uses its own scaling factor
per_channel: true
# Whether to use symmetric quantization
symmetric: false
# Whether to quantize all the numbers to non-negative
all_positive: false
excepts:
# Specify quantized bit width for some layers, like this:
conv1:
act:
all_positive: false
weight:
bit:
fc:
act:
bit:
weight:
bit:
#============================ Training / Evaluation ============================
# Evaluate the model without training
# If this field is true, all the bellowing options will be ignored
eval: false
epochs: 90
optimizer:
learning_rate: 0.01
momentum: 0.9
weight_decay: 0.0001
# Learning rate scheduler
lr_scheduler:
# Update learning rate per batch or epoch
update_per_batch: true
# Uncomment one of bellowing options to activate a learning rate scheduling
# Fixed learning rate
mode: fixed
# Step decay
# mode: step
# step_size: 30
# gamma: 0.1
# Multi-step decay
# mode: multi_step
# milestones: [30, ]
# gamma: 0.1
# Exponential decay
# mode: exp
# gamma: 0.95
# Cosine annealing
# mode: cos
# lr_min: 0
# cycle: 0.95
# Cosine annealing with warm restarts
# mode: cos_warm_restarts
# lr_min: 0
# cycle: 5
# cycle_scale: 2
# amp_scale: 0.5