Reformat
This commit is contained in:
165
scripts/train.py
165
scripts/train.py
@@ -48,7 +48,9 @@ def main(_):
|
||||
config.resume_from = os.path.normpath(os.path.expanduser(config.resume_from))
|
||||
if "checkpoint_" not in os.path.basename(config.resume_from):
|
||||
# get the most recent checkpoint in this directory
|
||||
checkpoints = list(filter(lambda x: "checkpoint_" in x, os.listdir(config.resume_from)))
|
||||
checkpoints = list(
|
||||
filter(lambda x: "checkpoint_" in x, os.listdir(config.resume_from))
|
||||
)
|
||||
if len(checkpoints) == 0:
|
||||
raise ValueError(f"No checkpoints found in {config.resume_from}")
|
||||
config.resume_from = os.path.join(
|
||||
@@ -72,11 +74,14 @@ def main(_):
|
||||
# we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the
|
||||
# number of *samples* we accumulate across, so we need to multiply by the number of training timesteps to get
|
||||
# the total number of optimizer steps to accumulate across.
|
||||
gradient_accumulation_steps=config.train.gradient_accumulation_steps * num_train_timesteps,
|
||||
gradient_accumulation_steps=config.train.gradient_accumulation_steps
|
||||
* num_train_timesteps,
|
||||
)
|
||||
if accelerator.is_main_process:
|
||||
accelerator.init_trackers(
|
||||
project_name="ddpo-pytorch", config=config.to_dict(), init_kwargs={"wandb": {"name": config.run_name}}
|
||||
project_name="ddpo-pytorch",
|
||||
config=config.to_dict(),
|
||||
init_kwargs={"wandb": {"name": config.run_name}},
|
||||
)
|
||||
logger.info(f"\n{config}")
|
||||
|
||||
@@ -84,7 +89,9 @@ def main(_):
|
||||
set_seed(config.seed, device_specific=True)
|
||||
|
||||
# load scheduler, tokenizer and models.
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(config.pretrained.model, revision=config.pretrained.revision)
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
config.pretrained.model, revision=config.pretrained.revision
|
||||
)
|
||||
# freeze parameters of models to save more memory
|
||||
pipeline.vae.requires_grad_(False)
|
||||
pipeline.text_encoder.requires_grad_(False)
|
||||
@@ -121,18 +128,24 @@ def main(_):
|
||||
lora_attn_procs = {}
|
||||
for name in pipeline.unet.attn_processors.keys():
|
||||
cross_attention_dim = (
|
||||
None if name.endswith("attn1.processor") else pipeline.unet.config.cross_attention_dim
|
||||
None
|
||||
if name.endswith("attn1.processor")
|
||||
else pipeline.unet.config.cross_attention_dim
|
||||
)
|
||||
if name.startswith("mid_block"):
|
||||
hidden_size = pipeline.unet.config.block_out_channels[-1]
|
||||
elif name.startswith("up_blocks"):
|
||||
block_id = int(name[len("up_blocks.")])
|
||||
hidden_size = list(reversed(pipeline.unet.config.block_out_channels))[block_id]
|
||||
hidden_size = list(reversed(pipeline.unet.config.block_out_channels))[
|
||||
block_id
|
||||
]
|
||||
elif name.startswith("down_blocks"):
|
||||
block_id = int(name[len("down_blocks.")])
|
||||
hidden_size = pipeline.unet.config.block_out_channels[block_id]
|
||||
|
||||
lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
|
||||
lora_attn_procs[name] = LoRAAttnProcessor(
|
||||
hidden_size=hidden_size, cross_attention_dim=cross_attention_dim
|
||||
)
|
||||
pipeline.unet.set_attn_processor(lora_attn_procs)
|
||||
|
||||
# this is a hack to synchronize gradients properly. the module that registers the parameters we care about (in
|
||||
@@ -163,13 +176,19 @@ def main(_):
|
||||
if config.use_lora and isinstance(models[0], AttnProcsLayers):
|
||||
# pipeline.unet.load_attn_procs(input_dir)
|
||||
tmp_unet = UNet2DConditionModel.from_pretrained(
|
||||
config.pretrained.model, revision=config.pretrained.revision, subfolder="unet"
|
||||
config.pretrained.model,
|
||||
revision=config.pretrained.revision,
|
||||
subfolder="unet",
|
||||
)
|
||||
tmp_unet.load_attn_procs(input_dir)
|
||||
models[0].load_state_dict(AttnProcsLayers(tmp_unet.attn_processors).state_dict())
|
||||
models[0].load_state_dict(
|
||||
AttnProcsLayers(tmp_unet.attn_processors).state_dict()
|
||||
)
|
||||
del tmp_unet
|
||||
elif not config.use_lora and isinstance(models[0], UNet2DConditionModel):
|
||||
load_model = UNet2DConditionModel.from_pretrained(input_dir, subfolder="unet")
|
||||
load_model = UNet2DConditionModel.from_pretrained(
|
||||
input_dir, subfolder="unet"
|
||||
)
|
||||
models[0].register_to_config(**load_model.config)
|
||||
models[0].load_state_dict(load_model.state_dict())
|
||||
del load_model
|
||||
@@ -243,20 +262,32 @@ def main(_):
|
||||
executor = futures.ThreadPoolExecutor(max_workers=2)
|
||||
|
||||
# Train!
|
||||
samples_per_epoch = config.sample.batch_size * accelerator.num_processes * config.sample.num_batches_per_epoch
|
||||
samples_per_epoch = (
|
||||
config.sample.batch_size
|
||||
* accelerator.num_processes
|
||||
* config.sample.num_batches_per_epoch
|
||||
)
|
||||
total_train_batch_size = (
|
||||
config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps
|
||||
config.train.batch_size
|
||||
* accelerator.num_processes
|
||||
* config.train.gradient_accumulation_steps
|
||||
)
|
||||
|
||||
logger.info("***** Running training *****")
|
||||
logger.info(f" Num Epochs = {config.num_epochs}")
|
||||
logger.info(f" Sample batch size per device = {config.sample.batch_size}")
|
||||
logger.info(f" Train batch size per device = {config.train.batch_size}")
|
||||
logger.info(f" Gradient Accumulation steps = {config.train.gradient_accumulation_steps}")
|
||||
logger.info(
|
||||
f" Gradient Accumulation steps = {config.train.gradient_accumulation_steps}"
|
||||
)
|
||||
logger.info("")
|
||||
logger.info(f" Total number of samples per epoch = {samples_per_epoch}")
|
||||
logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size}")
|
||||
logger.info(f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}")
|
||||
logger.info(
|
||||
f" Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size}"
|
||||
)
|
||||
logger.info(
|
||||
f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}"
|
||||
)
|
||||
logger.info(f" Number of inner epochs = {config.train.num_inner_epochs}")
|
||||
|
||||
assert config.sample.batch_size >= config.train.batch_size
|
||||
@@ -284,7 +315,10 @@ def main(_):
|
||||
):
|
||||
# generate prompts
|
||||
prompts, prompt_metadata = zip(
|
||||
*[prompt_fn(**config.prompt_fn_kwargs) for _ in range(config.sample.batch_size)]
|
||||
*[
|
||||
prompt_fn(**config.prompt_fn_kwargs)
|
||||
for _ in range(config.sample.batch_size)
|
||||
]
|
||||
)
|
||||
|
||||
# encode prompts
|
||||
@@ -309,9 +343,13 @@ def main(_):
|
||||
output_type="pt",
|
||||
)
|
||||
|
||||
latents = torch.stack(latents, dim=1) # (batch_size, num_steps + 1, 4, 64, 64)
|
||||
latents = torch.stack(
|
||||
latents, dim=1
|
||||
) # (batch_size, num_steps + 1, 4, 64, 64)
|
||||
log_probs = torch.stack(log_probs, dim=1) # (batch_size, num_steps, 1)
|
||||
timesteps = pipeline.scheduler.timesteps.repeat(config.sample.batch_size, 1) # (batch_size, num_steps)
|
||||
timesteps = pipeline.scheduler.timesteps.repeat(
|
||||
config.sample.batch_size, 1
|
||||
) # (batch_size, num_steps)
|
||||
|
||||
# compute rewards asynchronously
|
||||
rewards = executor.submit(reward_fn, images, prompts, prompt_metadata)
|
||||
@@ -323,8 +361,12 @@ def main(_):
|
||||
"prompt_ids": prompt_ids,
|
||||
"prompt_embeds": prompt_embeds,
|
||||
"timesteps": timesteps,
|
||||
"latents": latents[:, :-1], # each entry is the latent before timestep t
|
||||
"next_latents": latents[:, 1:], # each entry is the latent after timestep t
|
||||
"latents": latents[
|
||||
:, :-1
|
||||
], # each entry is the latent before timestep t
|
||||
"next_latents": latents[
|
||||
:, 1:
|
||||
], # each entry is the latent after timestep t
|
||||
"log_probs": log_probs,
|
||||
"rewards": rewards,
|
||||
}
|
||||
@@ -347,14 +389,21 @@ def main(_):
|
||||
# this is a hack to force wandb to log the images as JPEGs instead of PNGs
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
for i, image in enumerate(images):
|
||||
pil = Image.fromarray((image.cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8))
|
||||
pil = Image.fromarray(
|
||||
(image.cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)
|
||||
)
|
||||
pil = pil.resize((256, 256))
|
||||
pil.save(os.path.join(tmpdir, f"{i}.jpg"))
|
||||
accelerator.log(
|
||||
{
|
||||
"images": [
|
||||
wandb.Image(os.path.join(tmpdir, f"{i}.jpg"), caption=f"{prompt:.25} | {reward:.2f}")
|
||||
for i, (prompt, reward) in enumerate(zip(prompts, rewards)) # only log rewards from process 0
|
||||
wandb.Image(
|
||||
os.path.join(tmpdir, f"{i}.jpg"),
|
||||
caption=f"{prompt:.25} | {reward:.2f}",
|
||||
)
|
||||
for i, (prompt, reward) in enumerate(
|
||||
zip(prompts, rewards)
|
||||
) # only log rewards from process 0
|
||||
],
|
||||
},
|
||||
step=global_step,
|
||||
@@ -365,7 +414,12 @@ def main(_):
|
||||
|
||||
# log rewards and images
|
||||
accelerator.log(
|
||||
{"reward": rewards, "epoch": epoch, "reward_mean": rewards.mean(), "reward_std": rewards.std()},
|
||||
{
|
||||
"reward": rewards,
|
||||
"epoch": epoch,
|
||||
"reward_mean": rewards.mean(),
|
||||
"reward_std": rewards.std(),
|
||||
},
|
||||
step=global_step,
|
||||
)
|
||||
|
||||
@@ -373,7 +427,9 @@ def main(_):
|
||||
if config.per_prompt_stat_tracking:
|
||||
# gather the prompts across processes
|
||||
prompt_ids = accelerator.gather(samples["prompt_ids"]).cpu().numpy()
|
||||
prompts = pipeline.tokenizer.batch_decode(prompt_ids, skip_special_tokens=True)
|
||||
prompts = pipeline.tokenizer.batch_decode(
|
||||
prompt_ids, skip_special_tokens=True
|
||||
)
|
||||
advantages = stat_tracker.update(prompts, rewards)
|
||||
else:
|
||||
advantages = (rewards - rewards.mean()) / (rewards.std() + 1e-8)
|
||||
@@ -389,7 +445,10 @@ def main(_):
|
||||
del samples["prompt_ids"]
|
||||
|
||||
total_batch_size, num_timesteps = samples["timesteps"].shape
|
||||
assert total_batch_size == config.sample.batch_size * config.sample.num_batches_per_epoch
|
||||
assert (
|
||||
total_batch_size
|
||||
== config.sample.batch_size * config.sample.num_batches_per_epoch
|
||||
)
|
||||
assert num_timesteps == config.sample.num_steps
|
||||
|
||||
#################### TRAINING ####################
|
||||
@@ -400,16 +459,27 @@ def main(_):
|
||||
|
||||
# shuffle along time dimension independently for each sample
|
||||
perms = torch.stack(
|
||||
[torch.randperm(num_timesteps, device=accelerator.device) for _ in range(total_batch_size)]
|
||||
[
|
||||
torch.randperm(num_timesteps, device=accelerator.device)
|
||||
for _ in range(total_batch_size)
|
||||
]
|
||||
)
|
||||
for key in ["timesteps", "latents", "next_latents", "log_probs"]:
|
||||
samples[key] = samples[key][torch.arange(total_batch_size, device=accelerator.device)[:, None], perms]
|
||||
samples[key] = samples[key][
|
||||
torch.arange(total_batch_size, device=accelerator.device)[:, None],
|
||||
perms,
|
||||
]
|
||||
|
||||
# rebatch for training
|
||||
samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()}
|
||||
samples_batched = {
|
||||
k: v.reshape(-1, config.train.batch_size, *v.shape[1:])
|
||||
for k, v in samples.items()
|
||||
}
|
||||
|
||||
# dict of lists -> list of dicts for easier iteration
|
||||
samples_batched = [dict(zip(samples_batched, x)) for x in zip(*samples_batched.values())]
|
||||
samples_batched = [
|
||||
dict(zip(samples_batched, x)) for x in zip(*samples_batched.values())
|
||||
]
|
||||
|
||||
# train
|
||||
pipeline.unet.train()
|
||||
@@ -422,7 +492,9 @@ def main(_):
|
||||
):
|
||||
if config.train.cfg:
|
||||
# concat negative prompts to sample prompts to avoid two forward passes
|
||||
embeds = torch.cat([train_neg_prompt_embeds, sample["prompt_embeds"]])
|
||||
embeds = torch.cat(
|
||||
[train_neg_prompt_embeds, sample["prompt_embeds"]]
|
||||
)
|
||||
else:
|
||||
embeds = sample["prompt_embeds"]
|
||||
|
||||
@@ -442,8 +514,10 @@ def main(_):
|
||||
embeds,
|
||||
).sample
|
||||
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
||||
noise_pred = noise_pred_uncond + config.sample.guidance_scale * (
|
||||
noise_pred_text - noise_pred_uncond
|
||||
noise_pred = (
|
||||
noise_pred_uncond
|
||||
+ config.sample.guidance_scale
|
||||
* (noise_pred_text - noise_pred_uncond)
|
||||
)
|
||||
else:
|
||||
noise_pred = unet(
|
||||
@@ -463,12 +537,16 @@ def main(_):
|
||||
|
||||
# ppo logic
|
||||
advantages = torch.clamp(
|
||||
sample["advantages"], -config.train.adv_clip_max, config.train.adv_clip_max
|
||||
sample["advantages"],
|
||||
-config.train.adv_clip_max,
|
||||
config.train.adv_clip_max,
|
||||
)
|
||||
ratio = torch.exp(log_prob - sample["log_probs"][:, j])
|
||||
unclipped_loss = -advantages * ratio
|
||||
clipped_loss = -advantages * torch.clamp(
|
||||
ratio, 1.0 - config.train.clip_range, 1.0 + config.train.clip_range
|
||||
ratio,
|
||||
1.0 - config.train.clip_range,
|
||||
1.0 + config.train.clip_range,
|
||||
)
|
||||
loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss))
|
||||
|
||||
@@ -476,14 +554,25 @@ def main(_):
|
||||
# John Schulman says that (ratio - 1) - log(ratio) is a better
|
||||
# estimator, but most existing code uses this so...
|
||||
# http://joschu.net/blog/kl-approx.html
|
||||
info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2))
|
||||
info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()))
|
||||
info["approx_kl"].append(
|
||||
0.5
|
||||
* torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)
|
||||
)
|
||||
info["clipfrac"].append(
|
||||
torch.mean(
|
||||
(
|
||||
torch.abs(ratio - 1.0) > config.train.clip_range
|
||||
).float()
|
||||
)
|
||||
)
|
||||
info["loss"].append(loss)
|
||||
|
||||
# backward pass
|
||||
accelerator.backward(loss)
|
||||
if accelerator.sync_gradients:
|
||||
accelerator.clip_grad_norm_(unet.parameters(), config.train.max_grad_norm)
|
||||
accelerator.clip_grad_norm_(
|
||||
unet.parameters(), config.train.max_grad_norm
|
||||
)
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
|
Reference in New Issue
Block a user