-
Notifications
You must be signed in to change notification settings - Fork 33
/
DeiT_base_patch16_224_in1k_2n16c_dp_fp16o2.yaml
154 lines (144 loc) · 3.33 KB
/
DeiT_base_patch16_224_in1k_2n16c_dp_fp16o2.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# global configs
Global:
checkpoint: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
max_num_latest_checkpoint: 0
eval_during_train: True
eval_interval: 1
eval_unit: "epoch"
accum_steps: 1
epochs: 300
print_batch_step: 10
use_visualdl: False
seed: 2021
# FP16 setting
FP16:
level: O2
GradScaler:
init_loss_scaling: 65536.0
DistributedStrategy:
data_parallel: True
# model architecture
Model:
name: DeiT_base_patch16_224
drop_path_rate : 0.1
drop_rate : 0.0
class_num: 1000
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
Eval:
- CELoss:
weight: 1.0
LRScheduler:
name: TimmCosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
decay_unit: epoch
Optimizer:
name: AdamW
betas: (0.9, 0.999)
eps: 1e-8
weight_decay: 0.05
no_weight_decay_name: ["cls_token", "pos_embed", "norm"]
use_master_param: True
exp_avg_force_fp32: True
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
mean: [0.485, 0.456, 0.406]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
- ToCHWImage:
batch_transform_ops:
- TransformOpSampler:
Mixup:
alpha: 0.8
prob: 0.5
epsilon: 0.1
class_num: 1000
Cutmix:
alpha: 1.0
prob: 0.5
epsilon: 0.1
class_num: 1000
sampler:
name: RepeatedAugSampler
batch_size: 64
drop_last: False
shuffle: True
loader:
num_workers: 8
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CenterCropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
sampler:
name: DistributedBatchSampler
batch_size: 256
drop_last: False
shuffle: False
loader:
num_workers: 8
use_shared_memory: True
Metric:
Eval:
- TopkAcc:
topk: [1, 5]
Export:
export_type: paddle
input_shape: [None, 3, 224, 224]