From 1958463f020112c9a7bc85768d296daacc2e1b4b Mon Sep 17 00:00:00 2001 From: Kevin Black Date: Thu, 16 Nov 2023 22:36:46 +0000 Subject: [PATCH] Reformat --- ddpo_pytorch/aesthetic_scorer.py | 4 +- .../diffusers_patch/ddim_with_logprob.py | 53 ++++-- .../diffusers_patch/pipeline_with_logprob.py | 48 +++-- ddpo_pytorch/rewards.py | 25 ++- scripts/train.py | 165 ++++++++++++++---- 5 files changed, 227 insertions(+), 68 deletions(-) diff --git a/ddpo_pytorch/aesthetic_scorer.py b/ddpo_pytorch/aesthetic_scorer.py index 46e3202..cb64b61 100644 --- a/ddpo_pytorch/aesthetic_scorer.py +++ b/ddpo_pytorch/aesthetic_scorer.py @@ -35,7 +35,9 @@ class AestheticScorer(torch.nn.Module): self.clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14") self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") self.mlp = MLP() - state_dict = torch.load(ASSETS_PATH.joinpath("sac+logos+ava1-l14-linearMSE.pth")) + state_dict = torch.load( + ASSETS_PATH.joinpath("sac+logos+ava1-l14-linearMSE.pth") + ) self.mlp.load_state_dict(state_dict) self.dtype = dtype self.eval() diff --git a/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py b/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py index 0581a8a..7c1e824 100644 --- a/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py +++ b/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py @@ -20,9 +20,13 @@ def _left_broadcast(t, shape): def _get_variance(self, timestep, prev_timestep): - alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to(timestep.device) + alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to( + timestep.device + ) alpha_prod_t_prev = torch.where( - prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod + prev_timestep.cpu() >= 0, + self.alphas_cumprod.gather(0, prev_timestep.cpu()), + self.final_alpha_cumprod, ).to(timestep.device) beta_prod_t = 1 - alpha_prod_t beta_prod_t_prev = 1 - alpha_prod_t_prev @@ -86,31 +90,45 @@ def ddim_step_with_logprob( # - pred_prev_sample -> "x_t-1" # 1. get previous step value (=t-1) - prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + prev_timestep = ( + timestep - self.config.num_train_timesteps // self.num_inference_steps + ) # to prevent OOB on gather prev_timestep = torch.clamp(prev_timestep, 0, self.config.num_train_timesteps - 1) # 2. compute alphas, betas alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu()) alpha_prod_t_prev = torch.where( - prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod + prev_timestep.cpu() >= 0, + self.alphas_cumprod.gather(0, prev_timestep.cpu()), + self.final_alpha_cumprod, ) alpha_prod_t = _left_broadcast(alpha_prod_t, sample.shape).to(sample.device) - alpha_prod_t_prev = _left_broadcast(alpha_prod_t_prev, sample.shape).to(sample.device) + alpha_prod_t_prev = _left_broadcast(alpha_prod_t_prev, sample.shape).to( + sample.device + ) beta_prod_t = 1 - alpha_prod_t # 3. compute predicted original sample from predicted noise also called # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf if self.config.prediction_type == "epsilon": - pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + pred_original_sample = ( + sample - beta_prod_t ** (0.5) * model_output + ) / alpha_prod_t ** (0.5) pred_epsilon = model_output elif self.config.prediction_type == "sample": pred_original_sample = model_output - pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + pred_epsilon = ( + sample - alpha_prod_t ** (0.5) * pred_original_sample + ) / beta_prod_t ** (0.5) elif self.config.prediction_type == "v_prediction": - pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output - pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + pred_original_sample = (alpha_prod_t**0.5) * sample - ( + beta_prod_t**0.5 + ) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + ( + beta_prod_t**0.5 + ) * sample else: raise ValueError( f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" @@ -133,13 +151,19 @@ def ddim_step_with_logprob( if use_clipped_model_output: # the pred_epsilon is always re-derived from the clipped x_0 in Glide - pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + pred_epsilon = ( + sample - alpha_prod_t ** (0.5) * pred_original_sample + ) / beta_prod_t ** (0.5) # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf - pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** ( + 0.5 + ) * pred_epsilon # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf - prev_sample_mean = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + prev_sample_mean = ( + alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + ) if prev_sample is not None and generator is not None: raise ValueError( @@ -149,7 +173,10 @@ def ddim_step_with_logprob( if prev_sample is None: variance_noise = randn_tensor( - model_output.shape, generator=generator, device=model_output.device, dtype=model_output.dtype + model_output.shape, + generator=generator, + device=model_output.device, + dtype=model_output.dtype, ) prev_sample = prev_sample_mean + std_dev_t * variance_noise diff --git a/ddpo_pytorch/diffusers_patch/pipeline_with_logprob.py b/ddpo_pytorch/diffusers_patch/pipeline_with_logprob.py index 67726bf..b4320c9 100644 --- a/ddpo_pytorch/diffusers_patch/pipeline_with_logprob.py +++ b/ddpo_pytorch/diffusers_patch/pipeline_with_logprob.py @@ -116,7 +116,15 @@ def pipeline_with_logprob( width = width or self.unet.config.sample_size * self.vae_scale_factor # 1. Check inputs. Raise error if not correct - self.check_inputs(prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds) + self.check_inputs( + prompt, + height, + width, + callback_steps, + negative_prompt, + prompt_embeds, + negative_prompt_embeds, + ) # 2. Define call parameters if prompt is not None and isinstance(prompt, str): @@ -133,7 +141,11 @@ def pipeline_with_logprob( do_classifier_free_guidance = guidance_scale > 1.0 # 3. Encode input prompt - text_encoder_lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None + text_encoder_lora_scale = ( + cross_attention_kwargs.get("scale", None) + if cross_attention_kwargs is not None + else None + ) prompt_embeds = self._encode_prompt( prompt, device, @@ -172,7 +184,9 @@ def pipeline_with_logprob( with self.progress_bar(total=num_inference_steps) as progress_bar: for i, t in enumerate(timesteps): # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = ( + torch.cat([latents] * 2) if do_classifier_free_guidance else latents + ) latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) # predict the noise residual @@ -187,27 +201,39 @@ def pipeline_with_logprob( # perform guidance if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + noise_pred = noise_pred_uncond + guidance_scale * ( + noise_pred_text - noise_pred_uncond + ) if do_classifier_free_guidance and guidance_rescale > 0.0: # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) + noise_pred = rescale_noise_cfg( + noise_pred, noise_pred_text, guidance_rescale=guidance_rescale + ) # compute the previous noisy sample x_t -> x_t-1 - latents, log_prob = ddim_step_with_logprob(self.scheduler, noise_pred, t, latents, **extra_step_kwargs) + latents, log_prob = ddim_step_with_logprob( + self.scheduler, noise_pred, t, latents, **extra_step_kwargs + ) all_latents.append(latents) all_log_probs.append(log_prob) # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): + if i == len(timesteps) - 1 or ( + (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0 + ): progress_bar.update() if callback is not None and i % callback_steps == 0: callback(i, t, latents) if not output_type == "latent": - image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] - image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + image = self.vae.decode( + latents / self.vae.config.scaling_factor, return_dict=False + )[0] + image, has_nsfw_concept = self.run_safety_checker( + image, device, prompt_embeds.dtype + ) else: image = latents has_nsfw_concept = None @@ -217,7 +243,9 @@ def pipeline_with_logprob( else: do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) + image = self.image_processor.postprocess( + image, output_type=output_type, do_denormalize=do_denormalize + ) # Offload last model to CPU if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: diff --git a/ddpo_pytorch/rewards.py b/ddpo_pytorch/rewards.py index 669d8e0..9409a2f 100644 --- a/ddpo_pytorch/rewards.py +++ b/ddpo_pytorch/rewards.py @@ -35,7 +35,11 @@ def aesthetic_score(): scorer = AestheticScorer(dtype=torch.float32).cuda() def _fn(images, prompts, metadata): - images = (images * 255).round().clamp(0, 255).to(torch.uint8) + if isinstance(images, torch.Tensor): + images = (images * 255).round().clamp(0, 255).to(torch.uint8) + else: + images = images.transpose(0, 3, 1, 2) # NHWC -> NCHW + images = torch.tensor(images, dtype=torch.uint8) scores = scorer(images) return scores, {} @@ -55,7 +59,9 @@ def llava_strict_satisfaction(): batch_size = 4 url = "http://127.0.0.1:8085" sess = requests.Session() - retries = Retry(total=1000, backoff_factor=1, status_forcelist=[500], allowed_methods=False) + retries = Retry( + total=1000, backoff_factor=1, status_forcelist=[500], allowed_methods=False + ) sess.mount("http://", HTTPAdapter(max_retries=retries)) def _fn(images, prompts, metadata): @@ -121,7 +127,9 @@ def llava_bertscore(): batch_size = 16 url = "http://127.0.0.1:8085" sess = requests.Session() - retries = Retry(total=1000, backoff_factor=1, status_forcelist=[500], allowed_methods=False) + retries = Retry( + total=1000, backoff_factor=1, status_forcelist=[500], allowed_methods=False + ) sess.mount("http://", HTTPAdapter(max_retries=retries)) def _fn(images, prompts, metadata): @@ -152,8 +160,11 @@ def llava_bertscore(): # format for LLaVA server data = { "images": jpeg_images, - "queries": [["Answer concisely: what is going on in this image?"]] * len(image_batch), - "answers": [[f"The image contains {prompt}"] for prompt in prompt_batch], + "queries": [["Answer concisely: what is going on in this image?"]] + * len(image_batch), + "answers": [ + [f"The image contains {prompt}"] for prompt in prompt_batch + ], } data_bytes = pickle.dumps(data) @@ -167,7 +178,9 @@ def llava_bertscore(): all_scores += scores.tolist() # save the precision and f1 scores for analysis - all_info["precision"] += np.array(response_data["precision"]).squeeze().tolist() + all_info["precision"] += ( + np.array(response_data["precision"]).squeeze().tolist() + ) all_info["f1"] += np.array(response_data["f1"]).squeeze().tolist() all_info["outputs"] += np.array(response_data["outputs"]).squeeze().tolist() diff --git a/scripts/train.py b/scripts/train.py index e2bc11f..2c876b1 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -48,7 +48,9 @@ def main(_): config.resume_from = os.path.normpath(os.path.expanduser(config.resume_from)) if "checkpoint_" not in os.path.basename(config.resume_from): # get the most recent checkpoint in this directory - checkpoints = list(filter(lambda x: "checkpoint_" in x, os.listdir(config.resume_from))) + checkpoints = list( + filter(lambda x: "checkpoint_" in x, os.listdir(config.resume_from)) + ) if len(checkpoints) == 0: raise ValueError(f"No checkpoints found in {config.resume_from}") config.resume_from = os.path.join( @@ -72,11 +74,14 @@ def main(_): # we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the # number of *samples* we accumulate across, so we need to multiply by the number of training timesteps to get # the total number of optimizer steps to accumulate across. - gradient_accumulation_steps=config.train.gradient_accumulation_steps * num_train_timesteps, + gradient_accumulation_steps=config.train.gradient_accumulation_steps + * num_train_timesteps, ) if accelerator.is_main_process: accelerator.init_trackers( - project_name="ddpo-pytorch", config=config.to_dict(), init_kwargs={"wandb": {"name": config.run_name}} + project_name="ddpo-pytorch", + config=config.to_dict(), + init_kwargs={"wandb": {"name": config.run_name}}, ) logger.info(f"\n{config}") @@ -84,7 +89,9 @@ def main(_): set_seed(config.seed, device_specific=True) # load scheduler, tokenizer and models. - pipeline = StableDiffusionPipeline.from_pretrained(config.pretrained.model, revision=config.pretrained.revision) + pipeline = StableDiffusionPipeline.from_pretrained( + config.pretrained.model, revision=config.pretrained.revision + ) # freeze parameters of models to save more memory pipeline.vae.requires_grad_(False) pipeline.text_encoder.requires_grad_(False) @@ -121,18 +128,24 @@ def main(_): lora_attn_procs = {} for name in pipeline.unet.attn_processors.keys(): cross_attention_dim = ( - None if name.endswith("attn1.processor") else pipeline.unet.config.cross_attention_dim + None + if name.endswith("attn1.processor") + else pipeline.unet.config.cross_attention_dim ) if name.startswith("mid_block"): hidden_size = pipeline.unet.config.block_out_channels[-1] elif name.startswith("up_blocks"): block_id = int(name[len("up_blocks.")]) - hidden_size = list(reversed(pipeline.unet.config.block_out_channels))[block_id] + hidden_size = list(reversed(pipeline.unet.config.block_out_channels))[ + block_id + ] elif name.startswith("down_blocks"): block_id = int(name[len("down_blocks.")]) hidden_size = pipeline.unet.config.block_out_channels[block_id] - lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) + lora_attn_procs[name] = LoRAAttnProcessor( + hidden_size=hidden_size, cross_attention_dim=cross_attention_dim + ) pipeline.unet.set_attn_processor(lora_attn_procs) # this is a hack to synchronize gradients properly. the module that registers the parameters we care about (in @@ -163,13 +176,19 @@ def main(_): if config.use_lora and isinstance(models[0], AttnProcsLayers): # pipeline.unet.load_attn_procs(input_dir) tmp_unet = UNet2DConditionModel.from_pretrained( - config.pretrained.model, revision=config.pretrained.revision, subfolder="unet" + config.pretrained.model, + revision=config.pretrained.revision, + subfolder="unet", ) tmp_unet.load_attn_procs(input_dir) - models[0].load_state_dict(AttnProcsLayers(tmp_unet.attn_processors).state_dict()) + models[0].load_state_dict( + AttnProcsLayers(tmp_unet.attn_processors).state_dict() + ) del tmp_unet elif not config.use_lora and isinstance(models[0], UNet2DConditionModel): - load_model = UNet2DConditionModel.from_pretrained(input_dir, subfolder="unet") + load_model = UNet2DConditionModel.from_pretrained( + input_dir, subfolder="unet" + ) models[0].register_to_config(**load_model.config) models[0].load_state_dict(load_model.state_dict()) del load_model @@ -243,20 +262,32 @@ def main(_): executor = futures.ThreadPoolExecutor(max_workers=2) # Train! - samples_per_epoch = config.sample.batch_size * accelerator.num_processes * config.sample.num_batches_per_epoch + samples_per_epoch = ( + config.sample.batch_size + * accelerator.num_processes + * config.sample.num_batches_per_epoch + ) total_train_batch_size = ( - config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps + config.train.batch_size + * accelerator.num_processes + * config.train.gradient_accumulation_steps ) logger.info("***** Running training *****") logger.info(f" Num Epochs = {config.num_epochs}") logger.info(f" Sample batch size per device = {config.sample.batch_size}") logger.info(f" Train batch size per device = {config.train.batch_size}") - logger.info(f" Gradient Accumulation steps = {config.train.gradient_accumulation_steps}") + logger.info( + f" Gradient Accumulation steps = {config.train.gradient_accumulation_steps}" + ) logger.info("") logger.info(f" Total number of samples per epoch = {samples_per_epoch}") - logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size}") - logger.info(f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}") + logger.info( + f" Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size}" + ) + logger.info( + f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}" + ) logger.info(f" Number of inner epochs = {config.train.num_inner_epochs}") assert config.sample.batch_size >= config.train.batch_size @@ -284,7 +315,10 @@ def main(_): ): # generate prompts prompts, prompt_metadata = zip( - *[prompt_fn(**config.prompt_fn_kwargs) for _ in range(config.sample.batch_size)] + *[ + prompt_fn(**config.prompt_fn_kwargs) + for _ in range(config.sample.batch_size) + ] ) # encode prompts @@ -309,9 +343,13 @@ def main(_): output_type="pt", ) - latents = torch.stack(latents, dim=1) # (batch_size, num_steps + 1, 4, 64, 64) + latents = torch.stack( + latents, dim=1 + ) # (batch_size, num_steps + 1, 4, 64, 64) log_probs = torch.stack(log_probs, dim=1) # (batch_size, num_steps, 1) - timesteps = pipeline.scheduler.timesteps.repeat(config.sample.batch_size, 1) # (batch_size, num_steps) + timesteps = pipeline.scheduler.timesteps.repeat( + config.sample.batch_size, 1 + ) # (batch_size, num_steps) # compute rewards asynchronously rewards = executor.submit(reward_fn, images, prompts, prompt_metadata) @@ -323,8 +361,12 @@ def main(_): "prompt_ids": prompt_ids, "prompt_embeds": prompt_embeds, "timesteps": timesteps, - "latents": latents[:, :-1], # each entry is the latent before timestep t - "next_latents": latents[:, 1:], # each entry is the latent after timestep t + "latents": latents[ + :, :-1 + ], # each entry is the latent before timestep t + "next_latents": latents[ + :, 1: + ], # each entry is the latent after timestep t "log_probs": log_probs, "rewards": rewards, } @@ -347,14 +389,21 @@ def main(_): # this is a hack to force wandb to log the images as JPEGs instead of PNGs with tempfile.TemporaryDirectory() as tmpdir: for i, image in enumerate(images): - pil = Image.fromarray((image.cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)) + pil = Image.fromarray( + (image.cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8) + ) pil = pil.resize((256, 256)) pil.save(os.path.join(tmpdir, f"{i}.jpg")) accelerator.log( { "images": [ - wandb.Image(os.path.join(tmpdir, f"{i}.jpg"), caption=f"{prompt:.25} | {reward:.2f}") - for i, (prompt, reward) in enumerate(zip(prompts, rewards)) # only log rewards from process 0 + wandb.Image( + os.path.join(tmpdir, f"{i}.jpg"), + caption=f"{prompt:.25} | {reward:.2f}", + ) + for i, (prompt, reward) in enumerate( + zip(prompts, rewards) + ) # only log rewards from process 0 ], }, step=global_step, @@ -365,7 +414,12 @@ def main(_): # log rewards and images accelerator.log( - {"reward": rewards, "epoch": epoch, "reward_mean": rewards.mean(), "reward_std": rewards.std()}, + { + "reward": rewards, + "epoch": epoch, + "reward_mean": rewards.mean(), + "reward_std": rewards.std(), + }, step=global_step, ) @@ -373,7 +427,9 @@ def main(_): if config.per_prompt_stat_tracking: # gather the prompts across processes prompt_ids = accelerator.gather(samples["prompt_ids"]).cpu().numpy() - prompts = pipeline.tokenizer.batch_decode(prompt_ids, skip_special_tokens=True) + prompts = pipeline.tokenizer.batch_decode( + prompt_ids, skip_special_tokens=True + ) advantages = stat_tracker.update(prompts, rewards) else: advantages = (rewards - rewards.mean()) / (rewards.std() + 1e-8) @@ -389,7 +445,10 @@ def main(_): del samples["prompt_ids"] total_batch_size, num_timesteps = samples["timesteps"].shape - assert total_batch_size == config.sample.batch_size * config.sample.num_batches_per_epoch + assert ( + total_batch_size + == config.sample.batch_size * config.sample.num_batches_per_epoch + ) assert num_timesteps == config.sample.num_steps #################### TRAINING #################### @@ -400,16 +459,27 @@ def main(_): # shuffle along time dimension independently for each sample perms = torch.stack( - [torch.randperm(num_timesteps, device=accelerator.device) for _ in range(total_batch_size)] + [ + torch.randperm(num_timesteps, device=accelerator.device) + for _ in range(total_batch_size) + ] ) for key in ["timesteps", "latents", "next_latents", "log_probs"]: - samples[key] = samples[key][torch.arange(total_batch_size, device=accelerator.device)[:, None], perms] + samples[key] = samples[key][ + torch.arange(total_batch_size, device=accelerator.device)[:, None], + perms, + ] # rebatch for training - samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()} + samples_batched = { + k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) + for k, v in samples.items() + } # dict of lists -> list of dicts for easier iteration - samples_batched = [dict(zip(samples_batched, x)) for x in zip(*samples_batched.values())] + samples_batched = [ + dict(zip(samples_batched, x)) for x in zip(*samples_batched.values()) + ] # train pipeline.unet.train() @@ -422,7 +492,9 @@ def main(_): ): if config.train.cfg: # concat negative prompts to sample prompts to avoid two forward passes - embeds = torch.cat([train_neg_prompt_embeds, sample["prompt_embeds"]]) + embeds = torch.cat( + [train_neg_prompt_embeds, sample["prompt_embeds"]] + ) else: embeds = sample["prompt_embeds"] @@ -442,8 +514,10 @@ def main(_): embeds, ).sample noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + config.sample.guidance_scale * ( - noise_pred_text - noise_pred_uncond + noise_pred = ( + noise_pred_uncond + + config.sample.guidance_scale + * (noise_pred_text - noise_pred_uncond) ) else: noise_pred = unet( @@ -463,12 +537,16 @@ def main(_): # ppo logic advantages = torch.clamp( - sample["advantages"], -config.train.adv_clip_max, config.train.adv_clip_max + sample["advantages"], + -config.train.adv_clip_max, + config.train.adv_clip_max, ) ratio = torch.exp(log_prob - sample["log_probs"][:, j]) unclipped_loss = -advantages * ratio clipped_loss = -advantages * torch.clamp( - ratio, 1.0 - config.train.clip_range, 1.0 + config.train.clip_range + ratio, + 1.0 - config.train.clip_range, + 1.0 + config.train.clip_range, ) loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss)) @@ -476,14 +554,25 @@ def main(_): # John Schulman says that (ratio - 1) - log(ratio) is a better # estimator, but most existing code uses this so... # http://joschu.net/blog/kl-approx.html - info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)) - info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float())) + info["approx_kl"].append( + 0.5 + * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2) + ) + info["clipfrac"].append( + torch.mean( + ( + torch.abs(ratio - 1.0) > config.train.clip_range + ).float() + ) + ) info["loss"].append(loss) # backward pass accelerator.backward(loss) if accelerator.sync_gradients: - accelerator.clip_grad_norm_(unet.parameters(), config.train.max_grad_norm) + accelerator.clip_grad_norm_( + unet.parameters(), config.train.max_grad_norm + ) optimizer.step() optimizer.zero_grad()