PaddlePaddle · TingquanGao · Jun 27, 2023 · Jun 28, 2023 · Jun 29, 2023 · Jun 29, 2023
diff --git a/configs/deit/deit-base-p16-pt_in1k-224_2n16c_fp16_o1_dp.yaml b/configs/deit/deit-base-p16-pt_in1k-224_2n16c_fp16_o1_dp.yaml
@@ -27,7 +27,7 @@ model:
       mlp_ratio: 4
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.0
       drop_path_rate : 0.1
 

diff --git a/configs/lvvit/lvvit_tiny.yaml b/configs/lvvit/lvvit_tiny.yaml
@@ -30,7 +30,7 @@ model:
       skip_lam: 1
       return_dense: True
       mix_token: True
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.0
       drop_path_rate : 0.1
 

diff --git a/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o1_dp.yaml b/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o1_dp.yaml
@@ -27,7 +27,7 @@ model:
       mlp_ratio: 4
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.1
       representation_size: 768
   label_smoothing: 0.0001

diff --git a/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o1_sharding.yaml b/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o1_sharding.yaml
@@ -33,7 +33,7 @@ model:
       mlp_ratio: 4
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.1
       representation_size: 768
   label_smoothing: 0.0001

diff --git a/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o2_dp.yaml b/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o2_dp.yaml
@@ -27,7 +27,7 @@ model:
       mlp_ratio: 4
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.1
       representation_size: 768
   label_smoothing: 0.0001

diff --git a/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o2_sharding.yaml b/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp16_o2_sharding.yaml
@@ -33,7 +33,7 @@ model:
       mlp_ratio: 4
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.1
       representation_size: 768
   label_smoothing: 0.0001

diff --git a/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp32_dp.yaml b/configs/vision_transformer/vit-base-p16-pt_in1k-224_4n32c_fp32_dp.yaml
@@ -15,7 +15,7 @@ model:
       mlp_ratio: 4
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.1
       representation_size: 768
   label_smoothing: 0.0001

diff --git a/configs/vision_transformer/vit-g-p14-pt_in1k-224_1n8c.yaml b/configs/vision_transformer/vit-g-p14-pt_in1k-224_1n8c.yaml
@@ -15,7 +15,7 @@ model:
       mlp_ratio: 4.9231
       qkv_bias: True
       epsilon: 1e-6
-      class_num: 1000
+      num_classes: 1000
       drop_rate: 0.1
       representation_size: 768
   label_smoothing: 0.0001

diff --git a/passl/models/convnext.py b/passl/models/convnext.py
@@ -119,7 +119,7 @@ class ConvNeXt(Model):
     def __init__(
             self,
             in_chans=3,
-            class_num=1000,
+            num_classes=1000,
             depths=[3, 3, 9, 3],
             dims=[96, 192, 384, 768],
             drop_path_rate=0.,
@@ -158,7 +158,7 @@ def __init__(
             cur += depths[i]
 
         self.norm = nn.LayerNorm(dims[-1], epsilon=1e-6)
-        self.head = nn.Linear(dims[-1], class_num)
+        self.head = nn.Linear(dims[-1], num_classes)
 
         self.apply(self._init_weights)
         self.head.weight.set_value(self.head.weight * head_init_scale)

diff --git a/passl/models/deit.py b/passl/models/deit.py
@@ -49,7 +49,7 @@ def __init__(self,
                  img_size=224,
                  patch_size=16,
                  in_chans=3,
-                 class_num=1000,
+                 num_classes=1000,
                  embed_dim=768,
                  depth=12,
                  num_heads=12,
@@ -63,7 +63,7 @@ def __init__(self,
                  epsilon=1e-5,
                  **kwargs):
         super().__init__()
-        self.class_num = class_num
+        self.num_classes = num_classes
 
         self.num_features = self.embed_dim = embed_dim
 
@@ -101,7 +101,7 @@ def __init__(self,
 
         # Classifier head
         self.head = nn.Linear(embed_dim,
-                              class_num) if class_num > 0 else nn.Identity()
+                              num_classes) if num_classes > 0 else nn.Identity()
 
         init.trunc_normal_(self.pos_embed, std=.002)
         init.trunc_normal_(self.cls_token, std=.002)
@@ -197,7 +197,7 @@ class DistilledVisionTransformer(DeitVisionTransformer):
     def __init__(self,
                  img_size=224,
                  patch_size=16,
-                 class_num=1000,
+                 num_classes=1000,
                  embed_dim=768,
                  depth=12,
                  num_heads=12,
@@ -209,7 +209,7 @@ def __init__(self,
         super().__init__(
             img_size=img_size,
             patch_size=patch_size,
-            class_num=class_num,
+            num_classes=num_classes,
             embed_dim=embed_dim,
             depth=depth,
             num_heads=num_heads,
@@ -229,7 +229,7 @@ def __init__(self,
 
         self.head_dist = nn.Linear(
             self.embed_dim,
-            self.class_num) if self.class_num > 0 else nn.Identity()
+            self.num_classes) if self.num_classes > 0 else nn.Identity()
 
         init.trunc_normal_(self.dist_token, std=.02)
         init.trunc_normal_(self.pos_embed, std=.02)

diff --git a/passl/models/dino/dino_vit.py b/passl/models/dino/dino_vit.py
@@ -280,9 +280,9 @@ def _freeze_norm(self, layer):
 
 class LinearClassifier(nn.Layer):
     """Linear layer to train on top of frozen features"""
-    def __init__(self, dim, class_num=1000):
+    def __init__(self, dim, num_classes=1000):
         super(LinearClassifier, self).__init__()
-        self.linear = nn.Linear(dim, class_num)
+        self.linear = nn.Linear(dim, num_classes)
         normal_(self.linear.weight)
         zeros_(self.linear.bias)
 
@@ -293,14 +293,14 @@ def forward(self, x):
 
 class DINOLinearProbe(DINO):
 
-    def __init__(self, class_num=1000, **kwargs):
+    def __init__(self, num_classes=1000, **kwargs):
         super().__init__(**kwargs)
         self.backbone.eval()
 
         self.n_last_blocks = self.backbone.n_last_blocks
         self.avgpool_patchtokens = self.backbone.avgpool_patchtokens
         embed_dim = self.backbone.embed_dim * (self.n_last_blocks + int(self.avgpool_patchtokens))
-        self.linear = LinearClassifier(embed_dim, class_num)
+        self.linear = LinearClassifier(embed_dim, num_classes)
 
         # freeze all layers but the last fc
         for name, param in self.named_parameters():

diff --git a/passl/models/dinov2/dinov2_vit.py b/passl/models/dinov2/dinov2_vit.py
@@ -544,9 +544,9 @@ def _freeze_norm(self, layer):
 
 class LinearClassifier(nn.Layer):
     """Linear layer to train on top of frozen features"""
-    def __init__(self, dim, class_num=1000):
+    def __init__(self, dim, num_classes=1000):
         super(LinearClassifier, self).__init__()
-        self.linear = nn.Linear(dim, class_num)
+        self.linear = nn.Linear(dim, num_classes)
         normal_(self.linear.weight)
         zeros_(self.linear.bias)
 
@@ -557,14 +557,14 @@ def forward(self, x):
 
 class DINOv2LinearProbe(DINOv2):
 
-    def __init__(self, class_num=1000, **kwargs):
+    def __init__(self, num_classes=1000, **kwargs):
         super().__init__(**kwargs)
         self.backbone.eval()
         self.n_last_blocks = self.backbone.n_last_blocks
         self.avgpool_patchtokens = self.backbone.avgpool_patchtokens
         embed_dim = self.backbone.embed_dim * (self.n_last_blocks + int(self.avgpool_patchtokens))
 
-        self.linear = LinearClassifier(embed_dim, class_num)
+        self.linear = LinearClassifier(embed_dim, num_classes)
 
         # freeze all layers but the last fc
         for name, param in self.named_parameters():

diff --git a/passl/models/resnet.py b/passl/models/resnet.py
@@ -55,12 +55,12 @@ def __init__(
         block,
         depth=50,
         width=64,
-        class_num=1000,
+        num_classes=1000,
         with_pool=True,
         groups=1,
         zero_init_residual=True,
     ):
-        super().__init__(block, depth=depth, width=width, num_classes=class_num, with_pool=with_pool, groups=groups)
+        super().__init__(block, depth=depth, width=width, num_classes=num_classes, with_pool=with_pool, groups=groups)
 
         # Zero-initialize the last BN in each residual branch,
         # so that the residual branch starts with zeros, and each residual block behaves like an identity.

diff --git a/passl/models/swav.py b/passl/models/swav.py
@@ -87,9 +87,9 @@ def _freeze_norm(self, layer):
             layer._use_global_stats = True
 
 class SwAVLinearProbe(SwAV):
-    def __init__(self, class_num=1000, **kwargs):
+    def __init__(self, num_classes=1000, **kwargs):
         super().__init__(**kwargs)
-        self.linear = RegLogit(class_num)
+        self.linear = RegLogit(num_classes)
         self.res_model.eval()
 
         # freeze all layers but the last fc
@@ -266,7 +266,7 @@ def __init__(self, block, depth,
                     constant_init(sublayer.weight, value=1.0)
                     constant_init(sublayer.bias, value=0.0)
 
-        self.encoder = functools.partial(ResNet, block=block, depth=depth)(with_pool=False, class_num=0)
+        self.encoder = functools.partial(ResNet, block=block, depth=depth)(with_pool=False, num_classes=0)
 
     def forward_backbone(self, x):
         x = self.encoder(x)

diff --git a/passl/models/vision_transformer.py b/passl/models/vision_transformer.py
@@ -257,7 +257,7 @@ def __init__(self,
                  img_size=224,
                  patch_size=16,
                  in_chans=3,
-                 class_num=1000,
+                 num_classes=1000,
                  embed_dim=768,
                  depth=12,
                  num_heads=12,
@@ -272,7 +272,7 @@ def __init__(self,
                  representation_size=None,
                  **kwargs):
         super().__init__()
-        self.class_num = class_num
+        self.num_classes = num_classes
         self.representation_size = representation_size
 
         self.num_features = self.embed_dim = embed_dim
@@ -322,14 +322,14 @@ def __init__(self,
             self.tanh = nn.Tanh()
             self.head = nn.Linear(
                 representation_size,
-                class_num) if class_num > 0 else nn.Identity()
+                num_classes) if num_classes > 0 else nn.Identity()
             init.xavier_uniform_(self.head0.weight)
             init.zeros_(self.head0.bias)
             init.xavier_uniform_(self.head.weight)
             init.constant_(self.head.bias, -10.0)
         else:
             self.head = nn.Linear(
-                embed_dim, class_num) if class_num > 0 else nn.Identity()
+                embed_dim, num_classes) if num_classes > 0 else nn.Identity()
             init.zeros_(self.head.weight)
             init.zeros_(self.head.bias)
 

diff --git a/passl/models/vision_transformer_hybrid.py b/passl/models/vision_transformer_hybrid.py
@@ -178,7 +178,7 @@ def __init__(self,
                  img_size=224,
                  patch_size=16,
                  in_chans=3,
-                 class_num=1000,
+                 num_classes=1000,
                  embed_dim=768,
                  depth=12,
                  num_heads=12,
@@ -193,7 +193,7 @@ def __init__(self,
                  representation_size=None,
                  **kwargs):
         super().__init__()
-        self.class_num = class_num
+        self.num_classes = num_classes
         self.representation_size = representation_size
 
         self.num_features = self.embed_dim = embed_dim
@@ -243,14 +243,14 @@ def __init__(self,
             self.tanh = nn.Tanh()
             self.head = nn.Linear(
                 representation_size,
-                class_num) if class_num > 0 else nn.Identity()
+                num_classes) if num_classes > 0 else nn.Identity()
             init.xavier_uniform_(self.head0.weight)
             init.zeros_(self.head0.bias)
             init.xavier_uniform_(self.head.weight)
             init.constant_(self.head.bias, -10.0)
         else:
             self.head = nn.Linear(
-                embed_dim, class_num) if class_num > 0 else nn.Identity()
+                embed_dim, num_classes) if num_classes > 0 else nn.Identity()
             init.zeros_(self.head.weight)
             init.zeros_(self.head.bias)
 

diff --git a/tasks/classification/convnext/configs/ConvNeXt_base_224_in1k_1n8c_dp_fp32.yaml b/tasks/classification/convnext/configs/ConvNeXt_base_224_in1k_1n8c_dp_fp32.yaml
@@ -37,7 +37,7 @@ EMA:
 Model:
   name: convnext_base
   drop_path_rate : 0.5
-  class_num: 1000
+  num_classes: 1000
 
 # loss function config for traing/eval process
 Loss:

diff --git a/tasks/classification/convnext/configs/ConvNeXt_base_224_in1k_4n32c_dp_fp16o2.yaml b/tasks/classification/convnext/configs/ConvNeXt_base_224_in1k_4n32c_dp_fp16o2.yaml
@@ -38,7 +38,7 @@ EMA:
 Model:
   name: convnext_base
   drop_path_rate : 0.5
-  class_num: 1000
+  num_classes: 1000
 
 # loss function config for traing/eval process
 Loss:

diff --git a/tasks/classification/convnext/configs/ConvNeXt_base_224_in1k_4n32c_dp_fp32.yaml b/tasks/classification/convnext/configs/ConvNeXt_base_224_in1k_4n32c_dp_fp32.yaml
@@ -37,7 +37,7 @@ EMA:
 Model:
   name: convnext_base
   drop_path_rate : 0.5
-  class_num: 1000
+  num_classes: 1000
 
 # loss function config for traing/eval process
 Loss:

diff --git a/tasks/classification/deit/configs/DeiT_base_patch16_224_in1k_1n8c_dp_fp16o2.yaml b/tasks/classification/deit/configs/DeiT_base_patch16_224_in1k_1n8c_dp_fp16o2.yaml
@@ -32,7 +32,7 @@ Model:
   name: DeiT_base_patch16_224
   drop_path_rate : 0.1
   drop_rate : 0.0
-  class_num: 1000
+  num_classes: 1000
 
 # loss function config for traing/eval process
 Loss:

diff --git a/tasks/classification/deit/configs/DeiT_base_patch16_224_in1k_1n8c_dp_fp32.yaml b/tasks/classification/deit/configs/DeiT_base_patch16_224_in1k_1n8c_dp_fp32.yaml
@@ -32,7 +32,7 @@ Model:
   name: DeiT_base_patch16_224
   drop_path_rate : 0.1
   drop_rate : 0.0
-  class_num: 1000
+  num_classes: 1000
 
 # loss function config for traing/eval process
 Loss:

diff --git a/tasks/classification/deit/configs/DeiT_base_patch16_224_in1k_2n16c_dp_fp16o2.yaml b/tasks/classification/deit/configs/DeiT_base_patch16_224_in1k_2n16c_dp_fp16o2.yaml
@@ -32,7 +32,7 @@ Model:
   name: DeiT_base_patch16_224
   drop_path_rate : 0.1
   drop_rate : 0.0
-  class_num: 1000
+  num_classes: 1000
 
 # loss function config for traing/eval process
 Loss:

diff --git a/tasks/classification/vit/configs/ViT_base_patch16_224_in1k_1n8c_dp_fp16o2.yaml b/tasks/classification/vit/configs/ViT_base_patch16_224_in1k_1n8c_dp_fp16o2.yaml
@@ -30,7 +30,7 @@ DistributedStrategy:
 # model architecture
 Model:
   name: ViT_base_patch16_224
-  class_num: 1000
+  num_classes: 1000
   drop_rate: 0.1
 
 # loss function config for traing/eval process

diff --git a/tasks/classification/vit/configs/ViT_base_patch16_384_ft_in1k_1n8c_dp_fp16o2.yaml b/tasks/classification/vit/configs/ViT_base_patch16_384_ft_in1k_1n8c_dp_fp16o2.yaml
@@ -31,7 +31,7 @@ DistributedStrategy:
 # model architecture
 Model:
   name: ViT_base_patch16_384
-  class_num: 1000
+  num_classes: 1000
   drop_rate: 0.1
 
 # loss function config for traing/eval process

diff --git a/tasks/classification/vit/configs/ViT_large_patch16_224_in21k_4n32c_dp_fp16o2.yaml b/tasks/classification/vit/configs/ViT_large_patch16_224_in21k_4n32c_dp_fp16o2.yaml
@@ -30,7 +30,7 @@ DistributedStrategy:
 # model architecture
 Model:
   name: ViT_large_patch16_224
-  class_num: 21841
+  num_classes: 21841
   drop_rate: 0.1
 
 # loss function config for traing/eval process

diff --git a/tasks/classification/vit/configs/ViT_large_patch16_384_in1k_ft_4n32c_dp_fp16o2.yaml b/tasks/classification/vit/configs/ViT_large_patch16_384_in1k_ft_4n32c_dp_fp16o2.yaml
@@ -31,7 +31,7 @@ DistributedStrategy:
 # model architecture
 Model:
   name: ViT_large_patch16_384
-  class_num: 1000
+  num_classes: 1000
   drop_rate: 0.1
 
 # loss function config for traing/eval process