Working on DGX
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from collections import defaultdict
|
||||
from absl import app, flags, logging
|
||||
from ml_collections import config_flags
|
||||
from accelerate import Accelerator
|
||||
@@ -6,6 +7,7 @@ from accelerate.logging import get_logger
|
||||
from diffusers import StableDiffusionPipeline, DDIMScheduler
|
||||
from diffusers.loaders import AttnProcsLayers
|
||||
from diffusers.models.attention_processor import LoRAAttnProcessor
|
||||
import numpy as np
|
||||
import ddpo_pytorch.prompts
|
||||
import ddpo_pytorch.rewards
|
||||
from ddpo_pytorch.stat_tracking import PerPromptStatTracker
|
||||
@@ -20,7 +22,7 @@ tqdm = partial(tqdm.tqdm, dynamic_ncols=True)
|
||||
|
||||
|
||||
FLAGS = flags.FLAGS
|
||||
config_flags.DEFINE_config_file("config", "config/base.py", "Training configuration.")
|
||||
config_flags.DEFINE_config_file("config", "ddpo_pytorch/config/base.py", "Training configuration.")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -32,9 +34,10 @@ def main(_):
|
||||
log_with="wandb",
|
||||
mixed_precision=config.mixed_precision,
|
||||
project_dir=config.logdir,
|
||||
gradient_accumulation_steps=config.train.gradient_accumulation_steps * config.sample.num_steps,
|
||||
)
|
||||
if accelerator.is_main_process:
|
||||
accelerator.init_trackers(project_name="ddpo-pytorch", config=config)
|
||||
accelerator.init_trackers(project_name="ddpo-pytorch", config=config.to_dict())
|
||||
logger.info(config)
|
||||
|
||||
# set seed
|
||||
@@ -93,14 +96,6 @@ def main(_):
|
||||
if config.allow_tf32:
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
|
||||
if config.train.scale_lr:
|
||||
config.train.learning_rate = (
|
||||
config.train.learning_rate
|
||||
* config.train.gradient_accumulation_steps
|
||||
* config.train.batch_size
|
||||
* accelerator.num_processes
|
||||
)
|
||||
|
||||
# Initialize the optimizer
|
||||
if config.train.use_8bit_adam:
|
||||
try:
|
||||
@@ -135,9 +130,6 @@ def main(_):
|
||||
config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps
|
||||
)
|
||||
|
||||
assert config.sample.batch_size % config.train.batch_size == 0
|
||||
assert samples_per_epoch % total_train_batch_size == 0
|
||||
|
||||
logger.info("***** Running training *****")
|
||||
logger.info(f" Num Epochs = {config.num_epochs}")
|
||||
logger.info(f" Sample batch size per device = {config.sample.batch_size}")
|
||||
@@ -149,6 +141,9 @@ def main(_):
|
||||
logger.info(f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}")
|
||||
logger.info(f" Number of inner epochs = {config.train.num_inner_epochs}")
|
||||
|
||||
assert config.sample.batch_size % config.train.batch_size == 0
|
||||
assert samples_per_epoch % total_train_batch_size == 0
|
||||
|
||||
neg_prompt_embed = pipeline.text_encoder(
|
||||
pipeline.tokenizer(
|
||||
[""],
|
||||
@@ -237,6 +232,8 @@ def main(_):
|
||||
{"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]},
|
||||
step=global_step,
|
||||
)
|
||||
# from PIL import Image
|
||||
# Image.fromarray((images[0].cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)).save(f"test.png")
|
||||
|
||||
# per-prompt mean/std tracking
|
||||
if config.per_prompt_stat_tracking:
|
||||
@@ -267,12 +264,6 @@ def main(_):
|
||||
indices = torch.randperm(total_batch_size, device=accelerator.device)
|
||||
samples = {k: v[indices] for k, v in samples.items()}
|
||||
|
||||
# shuffle along time dimension, independently for each sample
|
||||
for i in range(total_batch_size):
|
||||
indices = torch.randperm(num_timesteps, device=accelerator.device)
|
||||
for key in ["timesteps", "latents", "next_latents"]:
|
||||
samples[key][i] = samples[key][i][indices]
|
||||
|
||||
# rebatch for training
|
||||
samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()}
|
||||
|
||||
@@ -292,6 +283,7 @@ def main(_):
|
||||
else:
|
||||
embeds = sample["prompt_embeds"]
|
||||
|
||||
info = defaultdict(list)
|
||||
for j in tqdm(
|
||||
range(num_timesteps),
|
||||
desc=f"Timestep",
|
||||
@@ -335,18 +327,12 @@ def main(_):
|
||||
loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss))
|
||||
|
||||
# debugging values
|
||||
info = {}
|
||||
# John Schulman says that (ratio - 1) - log(ratio) is a better
|
||||
# estimator, but most existing code uses this so...
|
||||
# http://joschu.net/blog/kl-approx.html
|
||||
info["approx_kl"] = 0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)
|
||||
info["clipfrac"] = torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float())
|
||||
info["loss"] = loss
|
||||
|
||||
# log training-related stuff
|
||||
info.update({"epoch": epoch, "inner_epoch": inner_epoch, "timestep": j})
|
||||
accelerator.log(info, step=global_step)
|
||||
global_step += 1
|
||||
info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2))
|
||||
info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()))
|
||||
info["loss"].append(loss)
|
||||
|
||||
# backward pass
|
||||
accelerator.backward(loss)
|
||||
@@ -355,6 +341,14 @@ def main(_):
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
if accelerator.sync_gradients:
|
||||
# log training-related stuff
|
||||
info = {k: torch.mean(torch.stack(v)) for k, v in info.items()}
|
||||
info.update({"epoch": epoch, "inner_epoch": inner_epoch})
|
||||
accelerator.log(info, step=global_step)
|
||||
global_step += 1
|
||||
info = defaultdict(list)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(main)
|
||||
|
Reference in New Issue
Block a user