Continue implementation
This commit is contained in:
		| @@ -12,8 +12,12 @@ from ddpo_pytorch.stat_tracking import PerPromptStatTracker | ||||
| from ddpo_pytorch.diffusers_patch.pipeline_with_logprob import pipeline_with_logprob | ||||
| from ddpo_pytorch.diffusers_patch.ddim_with_logprob import ddim_step_with_logprob | ||||
| import torch | ||||
| import wandb | ||||
| from functools import partial | ||||
| import tqdm | ||||
|  | ||||
| tqdm = partial(tqdm.tqdm, dynamic_ncols=True) | ||||
|  | ||||
|  | ||||
| FLAGS = flags.FLAGS | ||||
| config_flags.DEFINE_config_file("config", "config/base.py", "Training configuration.") | ||||
| @@ -25,7 +29,7 @@ def main(_): | ||||
|     # basic Accelerate and logging setup | ||||
|     config = FLAGS.config | ||||
|     accelerator = Accelerator( | ||||
|         log_with="all", | ||||
|         log_with="wandb", | ||||
|         mixed_precision=config.mixed_precision, | ||||
|         project_dir=config.logdir, | ||||
|     ) | ||||
| @@ -163,11 +167,12 @@ def main(_): | ||||
|             config.per_prompt_stat_tracking.min_count, | ||||
|         ) | ||||
|  | ||||
|     global_step = 0 | ||||
|     for epoch in range(config.num_epochs): | ||||
|         #################### SAMPLING #################### | ||||
|         samples = [] | ||||
|         prompts = [] | ||||
|         for i in tqdm.tqdm( | ||||
|         for i in tqdm( | ||||
|             range(config.sample.num_batches_per_epoch), | ||||
|             desc=f"Epoch {epoch}: sampling", | ||||
|             disable=not accelerator.is_local_main_process, | ||||
| @@ -216,7 +221,7 @@ def main(_): | ||||
|                     "latents": latents[:, :-1],  # each entry is the latent before timestep t | ||||
|                     "next_latents": latents[:, 1:],  # each entry is the latent after timestep t | ||||
|                     "log_probs": log_probs, | ||||
|                     "rewards": torch.as_tensor(rewards), | ||||
|                     "rewards": torch.as_tensor(rewards, device=accelerator.device), | ||||
|                 } | ||||
|             ) | ||||
|  | ||||
| @@ -226,6 +231,13 @@ def main(_): | ||||
|         # gather rewards across processes | ||||
|         rewards = accelerator.gather(samples["rewards"]).cpu().numpy() | ||||
|  | ||||
|         # log sample-related stuff | ||||
|         accelerator.log({"reward": rewards, "epoch": epoch}, step=global_step) | ||||
|         accelerator.log( | ||||
|             {"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]}, | ||||
|             step=global_step, | ||||
|         ) | ||||
|  | ||||
|         # per-prompt mean/std tracking | ||||
|         if config.per_prompt_stat_tracking: | ||||
|             # gather the prompts across processes | ||||
| @@ -268,10 +280,11 @@ def main(_): | ||||
|             samples_batched = [dict(zip(samples_batched, x)) for x in zip(*samples_batched.values())] | ||||
|  | ||||
|             # train | ||||
|             for i, sample in tqdm.tqdm( | ||||
|             for i, sample in tqdm( | ||||
|                 list(enumerate(samples_batched)), | ||||
|                 desc=f"Outer epoch {epoch}, inner epoch {inner_epoch}: training", | ||||
|                 desc=f"Epoch {epoch}.{inner_epoch}: training", | ||||
|                 position=0, | ||||
|                 disable=not accelerator.is_local_main_process, | ||||
|             ): | ||||
|                 if config.train.cfg: | ||||
|                     # concat negative prompts to sample prompts to avoid two forward passes | ||||
| @@ -279,11 +292,12 @@ def main(_): | ||||
|                 else: | ||||
|                     embeds = sample["prompt_embeds"] | ||||
|  | ||||
|                 for j in tqdm.trange( | ||||
|                     num_timesteps, | ||||
|                 for j in tqdm( | ||||
|                     range(num_timesteps), | ||||
|                     desc=f"Timestep", | ||||
|                     position=1, | ||||
|                     leave=False, | ||||
|                     disable=not accelerator.is_local_main_process, | ||||
|                 ): | ||||
|                     with accelerator.accumulate(pipeline.unet): | ||||
|                         if config.train.cfg: | ||||
| @@ -311,7 +325,7 @@ def main(_): | ||||
|  | ||||
|                         # ppo logic | ||||
|                         advantages = torch.clamp( | ||||
|                             sample["advantages"][:, j], -config.train.adv_clip_max, config.train.adv_clip_max | ||||
|                             sample["advantages"], -config.train.adv_clip_max, config.train.adv_clip_max | ||||
|                         ) | ||||
|                         ratio = torch.exp(log_prob - sample["log_probs"][:, j]) | ||||
|                         unclipped_loss = -advantages * ratio | ||||
| @@ -326,9 +340,14 @@ def main(_): | ||||
|                         # estimator, but most existing code uses this so... | ||||
|                         # http://joschu.net/blog/kl-approx.html | ||||
|                         info["approx_kl"] = 0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2) | ||||
|                         info["clipfrac"] = torch.mean(torch.abs(ratio - 1.0) > config.train.clip_range) | ||||
|                         info["clipfrac"] = torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()) | ||||
|                         info["loss"] = loss | ||||
|  | ||||
|                         # log training-related stuff | ||||
|                         info.update({"epoch": epoch, "inner_epoch": inner_epoch, "timestep": j}) | ||||
|                         accelerator.log(info, step=global_step) | ||||
|                         global_step += 1 | ||||
|  | ||||
|                         # backward pass | ||||
|                         accelerator.backward(loss) | ||||
|                         if accelerator.sync_gradients: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user