Reformat

2023-11-16 22:36:46 +00:00
parent 378dd18298
commit 1958463f02
5 changed files with 227 additions and 68 deletions
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -48,7 +48,9 @@ def main(_):
        config.resume_from = os.path.normpath(os.path.expanduser(config.resume_from))
        if "checkpoint_" not in os.path.basename(config.resume_from):
            # get the most recent checkpoint in this directory
-            checkpoints = list(filter(lambda x: "checkpoint_" in x, os.listdir(config.resume_from)))
+            checkpoints = list(
+                filter(lambda x: "checkpoint_" in x, os.listdir(config.resume_from))
+            )
            if len(checkpoints) == 0:
                raise ValueError(f"No checkpoints found in {config.resume_from}")
            config.resume_from = os.path.join(
@@ -72,11 +74,14 @@ def main(_):
        # we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the
        # number of *samples* we accumulate across, so we need to multiply by the number of training timesteps to get
        # the total number of optimizer steps to accumulate across.
-        gradient_accumulation_steps=config.train.gradient_accumulation_steps * num_train_timesteps,
+        gradient_accumulation_steps=config.train.gradient_accumulation_steps
+        * num_train_timesteps,
    )
    if accelerator.is_main_process:
        accelerator.init_trackers(
-            project_name="ddpo-pytorch", config=config.to_dict(), init_kwargs={"wandb": {"name": config.run_name}}
+            project_name="ddpo-pytorch",
+            config=config.to_dict(),
+            init_kwargs={"wandb": {"name": config.run_name}},
        )
    logger.info(f"\n{config}")

@@ -84,7 +89,9 @@ def main(_):
    set_seed(config.seed, device_specific=True)

    # load scheduler, tokenizer and models.
-    pipeline = StableDiffusionPipeline.from_pretrained(config.pretrained.model, revision=config.pretrained.revision)
+    pipeline = StableDiffusionPipeline.from_pretrained(
+        config.pretrained.model, revision=config.pretrained.revision
+    )
    # freeze parameters of models to save more memory
    pipeline.vae.requires_grad_(False)
    pipeline.text_encoder.requires_grad_(False)
@@ -121,18 +128,24 @@ def main(_):
        lora_attn_procs = {}
        for name in pipeline.unet.attn_processors.keys():
            cross_attention_dim = (
-                None if name.endswith("attn1.processor") else pipeline.unet.config.cross_attention_dim
+                None
+                if name.endswith("attn1.processor")
+                else pipeline.unet.config.cross_attention_dim
            )
            if name.startswith("mid_block"):
                hidden_size = pipeline.unet.config.block_out_channels[-1]
            elif name.startswith("up_blocks"):
                block_id = int(name[len("up_blocks.")])
-                hidden_size = list(reversed(pipeline.unet.config.block_out_channels))[block_id]
+                hidden_size = list(reversed(pipeline.unet.config.block_out_channels))[
+                    block_id
+                ]
            elif name.startswith("down_blocks"):
                block_id = int(name[len("down_blocks.")])
                hidden_size = pipeline.unet.config.block_out_channels[block_id]

-            lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
+            lora_attn_procs[name] = LoRAAttnProcessor(
+                hidden_size=hidden_size, cross_attention_dim=cross_attention_dim
+            )
        pipeline.unet.set_attn_processor(lora_attn_procs)

        # this is a hack to synchronize gradients properly. the module that registers the parameters we care about (in
@@ -163,13 +176,19 @@ def main(_):
        if config.use_lora and isinstance(models[0], AttnProcsLayers):
            # pipeline.unet.load_attn_procs(input_dir)
            tmp_unet = UNet2DConditionModel.from_pretrained(
-                config.pretrained.model, revision=config.pretrained.revision, subfolder="unet"
+                config.pretrained.model,
+                revision=config.pretrained.revision,
+                subfolder="unet",
            )
            tmp_unet.load_attn_procs(input_dir)
-            models[0].load_state_dict(AttnProcsLayers(tmp_unet.attn_processors).state_dict())
+            models[0].load_state_dict(
+                AttnProcsLayers(tmp_unet.attn_processors).state_dict()
+            )
            del tmp_unet
        elif not config.use_lora and isinstance(models[0], UNet2DConditionModel):
-            load_model = UNet2DConditionModel.from_pretrained(input_dir, subfolder="unet")
+            load_model = UNet2DConditionModel.from_pretrained(
+                input_dir, subfolder="unet"
+            )
            models[0].register_to_config(**load_model.config)
            models[0].load_state_dict(load_model.state_dict())
            del load_model
@@ -243,20 +262,32 @@ def main(_):
    executor = futures.ThreadPoolExecutor(max_workers=2)

    # Train!
-    samples_per_epoch = config.sample.batch_size * accelerator.num_processes * config.sample.num_batches_per_epoch
+    samples_per_epoch = (
+        config.sample.batch_size
+        * accelerator.num_processes
+        * config.sample.num_batches_per_epoch
+    )
    total_train_batch_size = (
-        config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps
+        config.train.batch_size
+        * accelerator.num_processes
+        * config.train.gradient_accumulation_steps
    )

    logger.info("***** Running training *****")
    logger.info(f"  Num Epochs = {config.num_epochs}")
    logger.info(f"  Sample batch size per device = {config.sample.batch_size}")
    logger.info(f"  Train batch size per device = {config.train.batch_size}")
-    logger.info(f"  Gradient Accumulation steps = {config.train.gradient_accumulation_steps}")
+    logger.info(
+        f"  Gradient Accumulation steps = {config.train.gradient_accumulation_steps}"
+    )
    logger.info("")
    logger.info(f"  Total number of samples per epoch = {samples_per_epoch}")
-    logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size}")
-    logger.info(f"  Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}")
+    logger.info(
+        f"  Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size}"
+    )
+    logger.info(
+        f"  Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}"
+    )
    logger.info(f"  Number of inner epochs = {config.train.num_inner_epochs}")

    assert config.sample.batch_size >= config.train.batch_size
@@ -284,7 +315,10 @@ def main(_):
        ):
            # generate prompts
            prompts, prompt_metadata = zip(
-                *[prompt_fn(**config.prompt_fn_kwargs) for _ in range(config.sample.batch_size)]
+                *[
+                    prompt_fn(**config.prompt_fn_kwargs)
+                    for _ in range(config.sample.batch_size)
+                ]
            )

            # encode prompts
@@ -309,9 +343,13 @@ def main(_):
                    output_type="pt",
                )

-            latents = torch.stack(latents, dim=1)  # (batch_size, num_steps + 1, 4, 64, 64)
+            latents = torch.stack(
+                latents, dim=1
+            )  # (batch_size, num_steps + 1, 4, 64, 64)
            log_probs = torch.stack(log_probs, dim=1)  # (batch_size, num_steps, 1)
-            timesteps = pipeline.scheduler.timesteps.repeat(config.sample.batch_size, 1)  # (batch_size, num_steps)
+            timesteps = pipeline.scheduler.timesteps.repeat(
+                config.sample.batch_size, 1
+            )  # (batch_size, num_steps)

            # compute rewards asynchronously
            rewards = executor.submit(reward_fn, images, prompts, prompt_metadata)
@@ -323,8 +361,12 @@ def main(_):
                    "prompt_ids": prompt_ids,
                    "prompt_embeds": prompt_embeds,
                    "timesteps": timesteps,
-                    "latents": latents[:, :-1],  # each entry is the latent before timestep t
-                    "next_latents": latents[:, 1:],  # each entry is the latent after timestep t
+                    "latents": latents[
+                        :, :-1
+                    ],  # each entry is the latent before timestep t
+                    "next_latents": latents[
+                        :, 1:
+                    ],  # each entry is the latent after timestep t
                    "log_probs": log_probs,
                    "rewards": rewards,
                }
@@ -347,14 +389,21 @@ def main(_):
        # this is a hack to force wandb to log the images as JPEGs instead of PNGs
        with tempfile.TemporaryDirectory() as tmpdir:
            for i, image in enumerate(images):
-                pil = Image.fromarray((image.cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8))
+                pil = Image.fromarray(
+                    (image.cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)
+                )
                pil = pil.resize((256, 256))
                pil.save(os.path.join(tmpdir, f"{i}.jpg"))
            accelerator.log(
                {
                    "images": [
-                        wandb.Image(os.path.join(tmpdir, f"{i}.jpg"), caption=f"{prompt:.25} | {reward:.2f}")
-                        for i, (prompt, reward) in enumerate(zip(prompts, rewards))  # only log rewards from process 0
+                        wandb.Image(
+                            os.path.join(tmpdir, f"{i}.jpg"),
+                            caption=f"{prompt:.25} | {reward:.2f}",
+                        )
+                        for i, (prompt, reward) in enumerate(
+                            zip(prompts, rewards)
+                        )  # only log rewards from process 0
                    ],
                },
                step=global_step,
@@ -365,7 +414,12 @@ def main(_):

        # log rewards and images
        accelerator.log(
-            {"reward": rewards, "epoch": epoch, "reward_mean": rewards.mean(), "reward_std": rewards.std()},
+            {
+                "reward": rewards,
+                "epoch": epoch,
+                "reward_mean": rewards.mean(),
+                "reward_std": rewards.std(),
+            },
            step=global_step,
        )

@@ -373,7 +427,9 @@ def main(_):
        if config.per_prompt_stat_tracking:
            # gather the prompts across processes
            prompt_ids = accelerator.gather(samples["prompt_ids"]).cpu().numpy()
-            prompts = pipeline.tokenizer.batch_decode(prompt_ids, skip_special_tokens=True)
+            prompts = pipeline.tokenizer.batch_decode(
+                prompt_ids, skip_special_tokens=True
+            )
            advantages = stat_tracker.update(prompts, rewards)
        else:
            advantages = (rewards - rewards.mean()) / (rewards.std() + 1e-8)
@@ -389,7 +445,10 @@ def main(_):
        del samples["prompt_ids"]

        total_batch_size, num_timesteps = samples["timesteps"].shape
-        assert total_batch_size == config.sample.batch_size * config.sample.num_batches_per_epoch
+        assert (
+            total_batch_size
+            == config.sample.batch_size * config.sample.num_batches_per_epoch
+        )
        assert num_timesteps == config.sample.num_steps

        #################### TRAINING ####################
@@ -400,16 +459,27 @@ def main(_):

            # shuffle along time dimension independently for each sample
            perms = torch.stack(
-                [torch.randperm(num_timesteps, device=accelerator.device) for _ in range(total_batch_size)]
+                [
+                    torch.randperm(num_timesteps, device=accelerator.device)
+                    for _ in range(total_batch_size)
+                ]
            )
            for key in ["timesteps", "latents", "next_latents", "log_probs"]:
-                samples[key] = samples[key][torch.arange(total_batch_size, device=accelerator.device)[:, None], perms]
+                samples[key] = samples[key][
+                    torch.arange(total_batch_size, device=accelerator.device)[:, None],
+                    perms,
+                ]

            # rebatch for training
-            samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()}
+            samples_batched = {
+                k: v.reshape(-1, config.train.batch_size, *v.shape[1:])
+                for k, v in samples.items()
+            }

            # dict of lists -> list of dicts for easier iteration
-            samples_batched = [dict(zip(samples_batched, x)) for x in zip(*samples_batched.values())]
+            samples_batched = [
+                dict(zip(samples_batched, x)) for x in zip(*samples_batched.values())
+            ]

            # train
            pipeline.unet.train()
@@ -422,7 +492,9 @@ def main(_):
            ):
                if config.train.cfg:
                    # concat negative prompts to sample prompts to avoid two forward passes
-                    embeds = torch.cat([train_neg_prompt_embeds, sample["prompt_embeds"]])
+                    embeds = torch.cat(
+                        [train_neg_prompt_embeds, sample["prompt_embeds"]]
+                    )
                else:
                    embeds = sample["prompt_embeds"]

@@ -442,8 +514,10 @@ def main(_):
                                    embeds,
                                ).sample
                                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                                noise_pred = noise_pred_uncond + config.sample.guidance_scale * (
-                                    noise_pred_text - noise_pred_uncond
+                                noise_pred = (
+                                    noise_pred_uncond
+                                    + config.sample.guidance_scale
+                                    * (noise_pred_text - noise_pred_uncond)
                                )
                            else:
                                noise_pred = unet(
@@ -463,12 +537,16 @@ def main(_):

                        # ppo logic
                        advantages = torch.clamp(
-                            sample["advantages"], -config.train.adv_clip_max, config.train.adv_clip_max
+                            sample["advantages"],
+                            -config.train.adv_clip_max,
+                            config.train.adv_clip_max,
                        )
                        ratio = torch.exp(log_prob - sample["log_probs"][:, j])
                        unclipped_loss = -advantages * ratio
                        clipped_loss = -advantages * torch.clamp(
-                            ratio, 1.0 - config.train.clip_range, 1.0 + config.train.clip_range
+                            ratio,
+                            1.0 - config.train.clip_range,
+                            1.0 + config.train.clip_range,
                        )
                        loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss))

@@ -476,14 +554,25 @@ def main(_):
                        # John Schulman says that (ratio - 1) - log(ratio) is a better
                        # estimator, but most existing code uses this so...
                        # http://joschu.net/blog/kl-approx.html
-                        info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2))
-                        info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()))
+                        info["approx_kl"].append(
+                            0.5
+                            * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)
+                        )
+                        info["clipfrac"].append(
+                            torch.mean(
+                                (
+                                    torch.abs(ratio - 1.0) > config.train.clip_range
+                                ).float()
+                            )
+                        )
                        info["loss"].append(loss)

                        # backward pass
                        accelerator.backward(loss)
                        if accelerator.sync_gradients:
-                            accelerator.clip_grad_norm_(unet.parameters(), config.train.max_grad_norm)
+                            accelerator.clip_grad_norm_(
+                                unet.parameters(), config.train.max_grad_norm
+                            )
                        optimizer.step()
                        optimizer.zero_grad()