From 9f60a1b255927ab879029f5e522bbc1c88bcbac8 Mon Sep 17 00:00:00 2001 From: Victor Turrisi Date: Mon, 4 Dec 2023 11:48:24 -0300 Subject: [PATCH] Moved to lightning (#370) --- docs/source/tutorials/offline_linear_eval.rst | 6 ++--- docs/source/tutorials/overview.rst | 8 +++--- main_linear.py | 26 +++--------------- main_pretrain.py | 27 +++---------------- requirements.txt | 2 +- scripts/pretrain/cifar-multicrop/swav.yaml | 2 +- scripts/pretrain/cifar/barlow.yaml | 2 +- scripts/pretrain/cifar/byol.yaml | 2 +- scripts/pretrain/cifar/deepclusterv2.yaml | 2 +- scripts/pretrain/cifar/dino.yaml | 2 +- scripts/pretrain/cifar/mae.yaml | 2 +- scripts/pretrain/cifar/mocov2plus.yaml | 2 +- scripts/pretrain/cifar/mocov3.yaml | 2 +- scripts/pretrain/cifar/nnbyol.yaml | 2 +- scripts/pretrain/cifar/nnclr.yaml | 2 +- scripts/pretrain/cifar/nnsiam.yaml | 2 +- scripts/pretrain/cifar/ressl.yaml | 2 +- scripts/pretrain/cifar/simclr.yaml | 2 +- scripts/pretrain/cifar/simsiam.yaml | 2 +- scripts/pretrain/cifar/supcon.yaml | 2 +- scripts/pretrain/cifar/swav.yaml | 2 +- scripts/pretrain/cifar/vibcreg.yaml | 2 +- scripts/pretrain/cifar/vicreg.yaml | 2 +- scripts/pretrain/cifar/wmse.yaml | 2 +- scripts/pretrain/custom/byol.yaml | 2 +- .../pretrain/imagenet-100-multicrop/byol.yaml | 2 +- .../imagenet-100-multicrop/simclr.yaml | 2 +- .../imagenet-100-multicrop/supcon.yaml | 2 +- scripts/pretrain/imagenet-100/barlow.yaml | 2 +- scripts/pretrain/imagenet-100/byol.yaml | 2 +- .../pretrain/imagenet-100/deepclusterv2.yaml | 2 +- scripts/pretrain/imagenet-100/dino.yaml | 2 +- scripts/pretrain/imagenet-100/dino_vit.yaml | 2 +- scripts/pretrain/imagenet-100/mae.yaml | 2 +- scripts/pretrain/imagenet-100/mocov2plus.yaml | 2 +- scripts/pretrain/imagenet-100/mocov3.yaml | 2 +- scripts/pretrain/imagenet-100/mocov3_vit.yaml | 2 +- scripts/pretrain/imagenet-100/nnclr.yaml | 2 +- scripts/pretrain/imagenet-100/ressl.yaml | 2 +- scripts/pretrain/imagenet-100/simclr.yaml | 2 +- scripts/pretrain/imagenet-100/simsiam.yaml | 2 +- scripts/pretrain/imagenet-100/supcon.yaml | 2 +- scripts/pretrain/imagenet-100/swav.yaml | 2 +- scripts/pretrain/imagenet-100/vibcreg.yaml | 2 +- scripts/pretrain/imagenet-100/vicreg.yaml | 2 +- scripts/pretrain/imagenet-100/wmse.yaml | 2 +- scripts/pretrain/imagenet/barlow.yaml | 2 +- scripts/pretrain/imagenet/byol.yaml | 2 +- scripts/pretrain/imagenet/mae.yaml | 2 +- scripts/pretrain/imagenet/mocov2plus.yaml | 2 +- solo/data/dali_dataloader.py | 5 ++-- solo/methods/base.py | 5 ++-- solo/methods/linear.py | 5 ++-- solo/utils/auto_umap.py | 9 ++++--- solo/utils/checkpointer.py | 18 +++++++++---- tests/methods/utils.py | 9 ++++--- 56 files changed, 93 insertions(+), 117 deletions(-) diff --git a/docs/source/tutorials/offline_linear_eval.rst b/docs/source/tutorials/offline_linear_eval.rst index de5f6b1a..c4c6eae5 100644 --- a/docs/source/tutorials/offline_linear_eval.rst +++ b/docs/source/tutorials/offline_linear_eval.rst @@ -8,9 +8,9 @@ As for pretraining, we start by importing the required packages: .. code-block:: python import torch - from pytorch_lightning import Trainer - from pytorch_lightning.loggers import WandbLogger - from pytorch_lightning.callbacks import LearningRateMonitor + from lightning.pytorch import Trainer + from lightning.pytorch.loggers import WandbLogger + from lightning.pytorch.callbacks import LearningRateMonitor from torchvision.models import resnet18 from solo.methods.linear import LinearModel # imports the linear eval class diff --git a/docs/source/tutorials/overview.rst b/docs/source/tutorials/overview.rst index f53d78eb..0cd38198 100644 --- a/docs/source/tutorials/overview.rst +++ b/docs/source/tutorials/overview.rst @@ -25,10 +25,10 @@ We start by importing everything that we will need (we will be relying on Pytorc .. code-block:: python import torch - from pytorch_lightning import Trainer - from pytorch_lightning.callbacks import LearningRateMonitor - from pytorch_lightning.loggers import WandbLogger - from pytorch_lightning.plugins import DDPPlugin + from lightning.pytorch import Trainer + from lightning.pytorch.callbacks import LearningRateMonitor + from lightning.pytorch.loggers import WandbLogger + from lightning.pytorch.plugins import DDPPlugin from solo.methods import BarlowTwins # imports the method class from solo.utils.checkpointer import Checkpointer diff --git a/main_linear.py b/main_linear.py index 50305fac..bb0b1454 100644 --- a/main_linear.py +++ b/main_linear.py @@ -24,11 +24,11 @@ import hydra import torch import torch.nn as nn +from lightning.pytorch import Trainer +from lightning.pytorch.callbacks import LearningRateMonitor +from lightning.pytorch.loggers import WandbLogger +from lightning.pytorch.strategies.ddp import DDPStrategy from omegaconf import DictConfig, OmegaConf -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import LearningRateMonitor -from pytorch_lightning.loggers import WandbLogger -from pytorch_lightning.strategies.ddp import DDPStrategy from timm.data.mixup import Mixup from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy @@ -169,7 +169,6 @@ def main(cfg: DictConfig): callbacks = [] if cfg.checkpoint.enabled: - # save checkpoint on last epoch only ckpt = Checkpointer( cfg, logdir=os.path.join(cfg.checkpoint.dir, "linear"), @@ -211,23 +210,6 @@ def main(cfg: DictConfig): ) trainer = Trainer(**trainer_kwargs) - # fix for incompatibility with nvidia-dali and pytorch lightning - # with dali 1.15 (this will be fixed on 1.16) - # https://github.com/Lightning-AI/lightning/issues/12956 - try: - from pytorch_lightning.loops import FitLoop - - class WorkaroundFitLoop(FitLoop): - @property - def prefetch_batches(self) -> int: - return 1 - - trainer.fit_loop = WorkaroundFitLoop( - trainer.fit_loop.min_epochs, trainer.fit_loop.max_epochs - ) - except: - pass - if cfg.data.format == "dali": trainer.fit(model, ckpt_path=ckpt_path, datamodule=dali_datamodule) else: diff --git a/main_pretrain.py b/main_pretrain.py index f873cc2d..e83d5591 100644 --- a/main_pretrain.py +++ b/main_pretrain.py @@ -22,12 +22,11 @@ import hydra import torch +from lightning.pytorch import Trainer, seed_everything +from lightning.pytorch.callbacks import LearningRateMonitor +from lightning.pytorch.loggers.wandb import WandbLogger +from lightning.pytorch.strategies.ddp import DDPStrategy from omegaconf import DictConfig, OmegaConf -from pytorch_lightning import Trainer, seed_everything -from pytorch_lightning.callbacks import LearningRateMonitor -from pytorch_lightning.loggers import WandbLogger -from pytorch_lightning.strategies.ddp import DDPStrategy - from solo.args.pretrain import parse_cfg from solo.data.classification_dataloader import prepare_data as prepare_data_classification from solo.data.pretrain_dataloader import ( @@ -177,7 +176,6 @@ def main(cfg: DictConfig): callbacks = [] if cfg.checkpoint.enabled: - # save checkpoint on last epoch only ckpt = Checkpointer( cfg, logdir=os.path.join(cfg.checkpoint.dir, cfg.method), @@ -230,23 +228,6 @@ def main(cfg: DictConfig): ) trainer = Trainer(**trainer_kwargs) - # fix for incompatibility with nvidia-dali and pytorch lightning - # with dali 1.15 (this will be fixed on 1.16) - # https://github.com/Lightning-AI/lightning/issues/12956 - try: - from pytorch_lightning.loops import FitLoop - - class WorkaroundFitLoop(FitLoop): - @property - def prefetch_batches(self) -> int: - return 1 - - trainer.fit_loop = WorkaroundFitLoop( - trainer.fit_loop.min_epochs, trainer.fit_loop.max_epochs - ) - except: - pass - if cfg.data.format == "dali": trainer.fit(model, ckpt_path=ckpt_path, datamodule=dali_datamodule) else: diff --git a/requirements.txt b/requirements.txt index bafca826..5e1d63e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ torch>=1.10.0 torchvision>=0.11.1 einops -pytorch-lightning==2.0.2 +lightning==2.1.2 torchmetrics>=0.6.0, <0.12.0 tqdm wandb diff --git a/scripts/pretrain/cifar-multicrop/swav.yaml b/scripts/pretrain/cifar-multicrop/swav.yaml index 5a9c436c..c36b7669 100644 --- a/scripts/pretrain/cifar-multicrop/swav.yaml +++ b/scripts/pretrain/cifar-multicrop/swav.yaml @@ -55,4 +55,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/barlow.yaml b/scripts/pretrain/cifar/barlow.yaml index 86c1aa68..728f14ba 100644 --- a/scripts/pretrain/cifar/barlow.yaml +++ b/scripts/pretrain/cifar/barlow.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/byol.yaml b/scripts/pretrain/cifar/byol.yaml index d3c163be..eec69496 100644 --- a/scripts/pretrain/cifar/byol.yaml +++ b/scripts/pretrain/cifar/byol.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/deepclusterv2.yaml b/scripts/pretrain/cifar/deepclusterv2.yaml index c3159f23..f8847859 100644 --- a/scripts/pretrain/cifar/deepclusterv2.yaml +++ b/scripts/pretrain/cifar/deepclusterv2.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/dino.yaml b/scripts/pretrain/cifar/dino.yaml index 843cbb6c..008e3abf 100644 --- a/scripts/pretrain/cifar/dino.yaml +++ b/scripts/pretrain/cifar/dino.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/mae.yaml b/scripts/pretrain/cifar/mae.yaml index 939c40fc..0d8f8bad 100644 --- a/scripts/pretrain/cifar/mae.yaml +++ b/scripts/pretrain/cifar/mae.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/mocov2plus.yaml b/scripts/pretrain/cifar/mocov2plus.yaml index 6e3b137b..8c990b19 100644 --- a/scripts/pretrain/cifar/mocov2plus.yaml +++ b/scripts/pretrain/cifar/mocov2plus.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/mocov3.yaml b/scripts/pretrain/cifar/mocov3.yaml index a23b8c1e..9eccbd2d 100644 --- a/scripts/pretrain/cifar/mocov3.yaml +++ b/scripts/pretrain/cifar/mocov3.yaml @@ -54,4 +54,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/nnbyol.yaml b/scripts/pretrain/cifar/nnbyol.yaml index 33110109..5cec47ba 100644 --- a/scripts/pretrain/cifar/nnbyol.yaml +++ b/scripts/pretrain/cifar/nnbyol.yaml @@ -54,4 +54,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/nnclr.yaml b/scripts/pretrain/cifar/nnclr.yaml index d10f3527..2786f365 100644 --- a/scripts/pretrain/cifar/nnclr.yaml +++ b/scripts/pretrain/cifar/nnclr.yaml @@ -52,4 +52,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/nnsiam.yaml b/scripts/pretrain/cifar/nnsiam.yaml index 9d1102c6..3d611e7a 100644 --- a/scripts/pretrain/cifar/nnsiam.yaml +++ b/scripts/pretrain/cifar/nnsiam.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/ressl.yaml b/scripts/pretrain/cifar/ressl.yaml index 1bc44a70..7272f622 100644 --- a/scripts/pretrain/cifar/ressl.yaml +++ b/scripts/pretrain/cifar/ressl.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/simclr.yaml b/scripts/pretrain/cifar/simclr.yaml index 6902362d..0531365a 100644 --- a/scripts/pretrain/cifar/simclr.yaml +++ b/scripts/pretrain/cifar/simclr.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/simsiam.yaml b/scripts/pretrain/cifar/simsiam.yaml index bc9874a4..dec94d43 100644 --- a/scripts/pretrain/cifar/simsiam.yaml +++ b/scripts/pretrain/cifar/simsiam.yaml @@ -47,4 +47,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/supcon.yaml b/scripts/pretrain/cifar/supcon.yaml index 392069de..365317b8 100644 --- a/scripts/pretrain/cifar/supcon.yaml +++ b/scripts/pretrain/cifar/supcon.yaml @@ -46,4 +46,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/swav.yaml b/scripts/pretrain/cifar/swav.yaml index 14f71dd1..01d6c431 100644 --- a/scripts/pretrain/cifar/swav.yaml +++ b/scripts/pretrain/cifar/swav.yaml @@ -54,4 +54,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/vibcreg.yaml b/scripts/pretrain/cifar/vibcreg.yaml index a8deb2cc..ebc2404f 100644 --- a/scripts/pretrain/cifar/vibcreg.yaml +++ b/scripts/pretrain/cifar/vibcreg.yaml @@ -74,4 +74,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/vicreg.yaml b/scripts/pretrain/cifar/vicreg.yaml index 4f04c709..0a8db311 100644 --- a/scripts/pretrain/cifar/vicreg.yaml +++ b/scripts/pretrain/cifar/vicreg.yaml @@ -80,4 +80,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/wmse.yaml b/scripts/pretrain/cifar/wmse.yaml index 76d6f652..7b77e45e 100644 --- a/scripts/pretrain/cifar/wmse.yaml +++ b/scripts/pretrain/cifar/wmse.yaml @@ -70,4 +70,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/custom/byol.yaml b/scripts/pretrain/custom/byol.yaml index 943ee52b..517dcb4a 100644 --- a/scripts/pretrain/custom/byol.yaml +++ b/scripts/pretrain/custom/byol.yaml @@ -60,4 +60,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100-multicrop/byol.yaml b/scripts/pretrain/imagenet-100-multicrop/byol.yaml index 345647d3..f17ede00 100644 --- a/scripts/pretrain/imagenet-100-multicrop/byol.yaml +++ b/scripts/pretrain/imagenet-100-multicrop/byol.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100-multicrop/simclr.yaml b/scripts/pretrain/imagenet-100-multicrop/simclr.yaml index 082bb666..cbd804bd 100644 --- a/scripts/pretrain/imagenet-100-multicrop/simclr.yaml +++ b/scripts/pretrain/imagenet-100-multicrop/simclr.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100-multicrop/supcon.yaml b/scripts/pretrain/imagenet-100-multicrop/supcon.yaml index e6b44e8c..5de0a77c 100644 --- a/scripts/pretrain/imagenet-100-multicrop/supcon.yaml +++ b/scripts/pretrain/imagenet-100-multicrop/supcon.yaml @@ -46,4 +46,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/barlow.yaml b/scripts/pretrain/imagenet-100/barlow.yaml index 6b56eb81..ddd2da67 100644 --- a/scripts/pretrain/imagenet-100/barlow.yaml +++ b/scripts/pretrain/imagenet-100/barlow.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/byol.yaml b/scripts/pretrain/imagenet-100/byol.yaml index c4a0170b..35cd7d56 100644 --- a/scripts/pretrain/imagenet-100/byol.yaml +++ b/scripts/pretrain/imagenet-100/byol.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/deepclusterv2.yaml b/scripts/pretrain/imagenet-100/deepclusterv2.yaml index 673bd8a9..f6c023f0 100644 --- a/scripts/pretrain/imagenet-100/deepclusterv2.yaml +++ b/scripts/pretrain/imagenet-100/deepclusterv2.yaml @@ -55,4 +55,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/dino.yaml b/scripts/pretrain/imagenet-100/dino.yaml index b38fbd75..1129e125 100644 --- a/scripts/pretrain/imagenet-100/dino.yaml +++ b/scripts/pretrain/imagenet-100/dino.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/dino_vit.yaml b/scripts/pretrain/imagenet-100/dino_vit.yaml index 2d70d023..89ff43b6 100644 --- a/scripts/pretrain/imagenet-100/dino_vit.yaml +++ b/scripts/pretrain/imagenet-100/dino_vit.yaml @@ -51,4 +51,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mae.yaml b/scripts/pretrain/imagenet-100/mae.yaml index bab22bcd..7366cd64 100644 --- a/scripts/pretrain/imagenet-100/mae.yaml +++ b/scripts/pretrain/imagenet-100/mae.yaml @@ -51,4 +51,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mocov2plus.yaml b/scripts/pretrain/imagenet-100/mocov2plus.yaml index a097a9dd..afbe0b4c 100644 --- a/scripts/pretrain/imagenet-100/mocov2plus.yaml +++ b/scripts/pretrain/imagenet-100/mocov2plus.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mocov3.yaml b/scripts/pretrain/imagenet-100/mocov3.yaml index fc44e880..df5d4def 100644 --- a/scripts/pretrain/imagenet-100/mocov3.yaml +++ b/scripts/pretrain/imagenet-100/mocov3.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mocov3_vit.yaml b/scripts/pretrain/imagenet-100/mocov3_vit.yaml index 1eda5376..af942c54 100644 --- a/scripts/pretrain/imagenet-100/mocov3_vit.yaml +++ b/scripts/pretrain/imagenet-100/mocov3_vit.yaml @@ -50,4 +50,4 @@ devices: [0, 1, 2, 3, 4, 5, 6, 7] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/nnclr.yaml b/scripts/pretrain/imagenet-100/nnclr.yaml index 08ec6806..422b7bec 100644 --- a/scripts/pretrain/imagenet-100/nnclr.yaml +++ b/scripts/pretrain/imagenet-100/nnclr.yaml @@ -52,4 +52,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/ressl.yaml b/scripts/pretrain/imagenet-100/ressl.yaml index 1e7402df..70416d60 100644 --- a/scripts/pretrain/imagenet-100/ressl.yaml +++ b/scripts/pretrain/imagenet-100/ressl.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/simclr.yaml b/scripts/pretrain/imagenet-100/simclr.yaml index 478851c1..8a07198f 100644 --- a/scripts/pretrain/imagenet-100/simclr.yaml +++ b/scripts/pretrain/imagenet-100/simclr.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/simsiam.yaml b/scripts/pretrain/imagenet-100/simsiam.yaml index 06c27a74..dab8055b 100644 --- a/scripts/pretrain/imagenet-100/simsiam.yaml +++ b/scripts/pretrain/imagenet-100/simsiam.yaml @@ -48,4 +48,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/supcon.yaml b/scripts/pretrain/imagenet-100/supcon.yaml index a101c56d..0b91b881 100644 --- a/scripts/pretrain/imagenet-100/supcon.yaml +++ b/scripts/pretrain/imagenet-100/supcon.yaml @@ -46,4 +46,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/swav.yaml b/scripts/pretrain/imagenet-100/swav.yaml index 17aae787..1833f54a 100644 --- a/scripts/pretrain/imagenet-100/swav.yaml +++ b/scripts/pretrain/imagenet-100/swav.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/vibcreg.yaml b/scripts/pretrain/imagenet-100/vibcreg.yaml index 7f343983..ba9c8910 100644 --- a/scripts/pretrain/imagenet-100/vibcreg.yaml +++ b/scripts/pretrain/imagenet-100/vibcreg.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/vicreg.yaml b/scripts/pretrain/imagenet-100/vicreg.yaml index 7263b5b4..68e817fb 100644 --- a/scripts/pretrain/imagenet-100/vicreg.yaml +++ b/scripts/pretrain/imagenet-100/vicreg.yaml @@ -81,4 +81,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/wmse.yaml b/scripts/pretrain/imagenet-100/wmse.yaml index 80713b5e..3e174784 100644 --- a/scripts/pretrain/imagenet-100/wmse.yaml +++ b/scripts/pretrain/imagenet-100/wmse.yaml @@ -47,4 +47,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet/barlow.yaml b/scripts/pretrain/imagenet/barlow.yaml index 5ef3d958..e799282b 100644 --- a/scripts/pretrain/imagenet/barlow.yaml +++ b/scripts/pretrain/imagenet/barlow.yaml @@ -51,4 +51,4 @@ devices: [0, 1, 2, 3] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet/byol.yaml b/scripts/pretrain/imagenet/byol.yaml index 2636108a..23a1069c 100644 --- a/scripts/pretrain/imagenet/byol.yaml +++ b/scripts/pretrain/imagenet/byol.yaml @@ -53,5 +53,5 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed accumulate_grad_batches: 16 diff --git a/scripts/pretrain/imagenet/mae.yaml b/scripts/pretrain/imagenet/mae.yaml index b9e327c8..96886368 100644 --- a/scripts/pretrain/imagenet/mae.yaml +++ b/scripts/pretrain/imagenet/mae.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet/mocov2plus.yaml b/scripts/pretrain/imagenet/mocov2plus.yaml index d86043de..0fabde50 100644 --- a/scripts/pretrain/imagenet/mocov2plus.yaml +++ b/scripts/pretrain/imagenet/mocov2plus.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/solo/data/dali_dataloader.py b/solo/data/dali_dataloader.py index aad21a5a..b10a399a 100644 --- a/solo/data/dali_dataloader.py +++ b/solo/data/dali_dataloader.py @@ -21,18 +21,19 @@ from pathlib import Path from typing import Callable, List, Optional, Union +import lightning.pytorch as pl import nvidia.dali.fn as fn import nvidia.dali.ops as ops import nvidia.dali.types as types import omegaconf -import pytorch_lightning as pl import torch import torch.nn as nn from nvidia.dali import pipeline_def from nvidia.dali.plugin.pytorch import DALIGenericIterator, LastBatchPolicy +from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD + from solo.data.temp_dali_fix import TempDALIGenericIterator from solo.utils.misc import omegaconf_select -from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD class RandomGrayScaleConversion: diff --git a/solo/methods/base.py b/solo/methods/base.py index af8bc352..d771b4f3 100644 --- a/solo/methods/base.py +++ b/solo/methods/base.py @@ -21,11 +21,13 @@ from functools import partial from typing import Any, Callable, Dict, List, Sequence, Tuple, Union +import lightning.pytorch as pl import omegaconf -import pytorch_lightning as pl import torch import torch.nn as nn import torch.nn.functional as F +from torch.optim.lr_scheduler import MultiStepLR + from solo.backbones import ( convnext_base, convnext_large, @@ -55,7 +57,6 @@ from solo.utils.metrics import accuracy_at_k, weighted_mean from solo.utils.misc import omegaconf_select, remove_bias_and_norm_from_weight_decay from solo.utils.momentum import MomentumUpdater, initialize_momentum_params -from torch.optim.lr_scheduler import MultiStepLR def static_lr( diff --git a/solo/methods/linear.py b/solo/methods/linear.py index d84cc6e4..066ffd63 100644 --- a/solo/methods/linear.py +++ b/solo/methods/linear.py @@ -20,11 +20,13 @@ import logging from typing import Any, Callable, Dict, List, Tuple, Union +import lightning.pytorch as pl import omegaconf -import pytorch_lightning as pl import torch import torch.nn as nn import torch.nn.functional as F +from torch.optim.lr_scheduler import ExponentialLR, MultiStepLR, ReduceLROnPlateau + from solo.utils.lars import LARS from solo.utils.lr_scheduler import LinearWarmupCosineAnnealingLR from solo.utils.metrics import accuracy_at_k, weighted_mean @@ -33,7 +35,6 @@ param_groups_layer_decay, remove_bias_and_norm_from_weight_decay, ) -from torch.optim.lr_scheduler import ExponentialLR, MultiStepLR, ReduceLROnPlateau class LinearModel(pl.LightningModule): diff --git a/solo/utils/auto_umap.py b/solo/utils/auto_umap.py index 7c397187..62e3d9e1 100644 --- a/solo/utils/auto_umap.py +++ b/solo/utils/auto_umap.py @@ -25,19 +25,20 @@ from pathlib import Path from typing import Optional, Union +import lightning.pytorch as pl import pandas as pd -import pytorch_lightning as pl import seaborn as sns import torch import torch.nn as nn import umap -import wandb +from lightning.pytorch.callbacks import Callback from matplotlib import pyplot as plt from omegaconf import DictConfig -from pytorch_lightning.callbacks import Callback -from solo.utils.misc import gather, omegaconf_select from tqdm import tqdm +import wandb +from solo.utils.misc import gather, omegaconf_select + class AutoUMAP(Callback): def __init__( diff --git a/solo/utils/checkpointer.py b/solo/utils/checkpointer.py index 14ded7bd..b376e7bb 100644 --- a/solo/utils/checkpointer.py +++ b/solo/utils/checkpointer.py @@ -25,9 +25,10 @@ from pathlib import Path from typing import Optional, Union -import pytorch_lightning as pl +import lightning.pytorch as pl +from lightning.pytorch.callbacks import Callback from omegaconf import DictConfig, OmegaConf -from pytorch_lightning.callbacks import Callback + from solo.utils.misc import omegaconf_select @@ -136,13 +137,20 @@ def save(self, trainer: pl.Trainer): trainer (pl.Trainer): pytorch lightning trainer object. """ - if trainer.is_global_zero and not trainer.sanity_checking: + if not trainer.sanity_checking: epoch = trainer.current_epoch # type: ignore ckpt = self.path / self.ckpt_placeholder.format(epoch) trainer.save_checkpoint(ckpt) - if self.last_ckpt and self.last_ckpt != ckpt and not self.keep_prev: - os.remove(self.last_ckpt) + if ( + trainer.is_global_zero + and self.last_ckpt + and self.last_ckpt != ckpt + and not self.keep_prev + ): + os.remove( + self.last_ckpt, + ) self.last_ckpt = ckpt def on_train_start(self, trainer: pl.Trainer, _): diff --git a/tests/methods/utils.py b/tests/methods/utils.py index 59c5e490..ced425fb 100644 --- a/tests/methods/utils.py +++ b/tests/methods/utils.py @@ -21,9 +21,13 @@ import numpy as np import torch +from lightning.pytorch import Trainer from omegaconf import OmegaConf from PIL import Image -from pytorch_lightning import Trainer +from torch.utils.data import DataLoader +from torchvision import transforms +from torchvision.datasets import FakeData + from solo.data.pretrain_dataloader import ( FullTransformPipeline, NCropAugmentation, @@ -31,9 +35,6 @@ dataset_with_index, prepare_dataloader, ) -from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import FakeData def gen_base_cfg(