From 7a071fa658033600b8517c15d076e5597be80b55 Mon Sep 17 00:00:00 2001 From: Mhrooz Date: Tue, 9 Jul 2024 14:20:03 +0200 Subject: [PATCH] for notes --- .gitignore | 8 +++--- UNet/train.py | 6 ++--- diffusion.ipynb | 65 ++++--------------------------------------------- 3 files changed, 13 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index c21194d..6c73cd0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ ./flowers/* .DS_Store -./UNet/train_image/* -./UNet/params/* -./UNet/__pycache__/* +UNet/train_image/* +UNet/params/* +UNet/__pycache__/* +UNet/test_image data/ archive.zip flowers/* +UNet/result/result.jpg diff --git a/UNet/train.py b/UNet/train.py index dd2f408..576983f 100644 --- a/UNet/train.py +++ b/UNet/train.py @@ -7,9 +7,9 @@ from net import * from torchvision.utils import save_image device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -weight_path = r'/Users/hanzhangma/Nextcloud/mhz/Study/SS24/MasterThesis/UNet/params/unet.pth' -data_path = r'/Users/hanzhangma/Document/DataSet/VOC2007' -save_path = r'/Users/hanzhangma/Nextcloud/mhz/Study/SS24/MasterThesis/Unet/train_image' +weight_path = r'D:\\MasterThesis\\UNet\\params\\unet.pth' +data_path = r'D:\\MasterThesis\\data\\VOCdevkit\\VOC2007' +save_path = r'D:\\MasterThesis\\UNet\\train_image' if __name__ == '__main__': data_loader = DataLoader(MyDataset(data_path), batch_size= 4, shuffle=True) diff --git a/diffusion.ipynb b/diffusion.ipynb index 344484a..0f699c5 100644 --- a/diffusion.ipynb +++ b/diffusion.ipynb @@ -665,6 +665,7 @@ "\n", " num_resolutions = len(base_channels_multiples)\n", "\n", + " # encoder blocks = resnetblock * 3 + \n", " self.encoder_blocks = nn.ModuleList()\n", " curr_channels = [base_channels]\n", " in_channels = base_channels\n", @@ -799,6 +800,7 @@ " self.sqrt_one_minus_alpha_cumulative = torch.sqrt(1-self.alpha_cumulative)\n", "\n", " def get_betas(self):\n", + " \"\"\"linear schedule, proposed in original ddpm paper 线性在原ddpm论文中提出\"\"\"\n", " scale = 1000 / self.num_diffusion_timesteps\n", " beta_start = scale * 1e-4\n", " beta_end = scale * 0.02\n", @@ -896,66 +898,6 @@ "## Training" ] }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [], - "source": [ - "@dataclass\n", - "class ModelConfig:\n", - " BASE_CH = 64 # 64, 128, 256, 256\n", - " BASE_CH_MULT = (1, 2, 4, 4) # 32, 16, 8, 8 \n", - " APPLY_ATTENTION = (False, True, True, False)\n", - " DROPOUT_RATE = 0.1\n", - " TIME_EMB_MULT = 4 # 128" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [], - "source": [ - "model = UNet(\n", - " input_channels = TrainingConfig.IMG_SHAPE[0],\n", - " output_channels = TrainingConfig.IMG_SHAPE[0],\n", - " base_channels = ModelConfig.BASE_CH,\n", - " base_channels_multiples = ModelConfig.BASE_CH_MULT,\n", - " apply_attention = ModelConfig.APPLY_ATTENTION,\n", - " dropout_rate = ModelConfig.DROPOUT_RATE,\n", - " time_multiple = ModelConfig.TIME_EMB_MULT,\n", - ")\n", - "model.to(BaseConfig.DEVICE)\n", - "\n", - "optimizer = torch.optim.AdamW(model.parameters(), lr=TrainingConfig.LR)\n", - "\n", - "dataloader = get_dataloader(\n", - " dataset_name = BaseConfig.DATASET,\n", - " batch_size = TrainingConfig.BATCH_SIZE,\n", - " device = BaseConfig.DEVICE,\n", - " pin_memory = True,\n", - " num_workers = TrainingConfig.NUM_WORKERS,\n", - ")\n", - "\n", - "loss_fn = nn.MSELoss()\n", - "\n", - "sd = SimpleDiffusion(\n", - " num_diffusion_timesteps = TrainingConfig.TIMESTEPS,\n", - " img_shape = TrainingConfig.IMG_SHAPE,\n", - " device = BaseConfig.DEVICE,\n", - ")\n", - "\n", - "scaler = amp.GradScaler()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training" - ] - }, { "cell_type": "code", "execution_count": 101, @@ -1051,13 +993,16 @@ " for x0s, _ in loader:\n", " tq.update(1)\n", " \n", + " # 生成噪声\n", " ts = torch.randint(low=1, high=training_config.TIMESTEPS, size=(x0s.shape[0],), device=base_config.DEVICE)\n", " xts, gt_noise = forward_diffusion(sd, x0s, ts)\n", "\n", + " # forward & get loss\n", " with amp.autocast():\n", " pred_noise = model(xts, ts)\n", " loss = loss_fn(gt_noise, pred_noise)\n", "\n", + " # 梯度缩放和反向传播\n", " optimizer.zero_grad(set_to_none=True)\n", " scaler.scale(loss).backward()\n", "\n",