From c680890d5c9de3ee13188136696cb9604041e6cb Mon Sep 17 00:00:00 2001 From: Kevin Black Date: Sat, 24 Jun 2023 00:07:55 -0700 Subject: [PATCH] Working on DGX --- {config => ddpo_pytorch/config}/base.py | 7 ++- ddpo_pytorch/config/dgx.py | 20 ++++++++ .../diffusers_patch/ddim_with_logprob.py | 15 ++++-- scripts/train.py | 50 ++++++++----------- setup.py | 14 ++++-- 5 files changed, 67 insertions(+), 39 deletions(-) rename {config => ddpo_pytorch/config}/base.py (90%) create mode 100644 ddpo_pytorch/config/dgx.py diff --git a/config/base.py b/ddpo_pytorch/config/base.py similarity index 90% rename from config/base.py rename to ddpo_pytorch/config/base.py index 8a3118b..b70042f 100644 --- a/config/base.py +++ b/ddpo_pytorch/config/base.py @@ -18,7 +18,6 @@ def get_config(): # training config.train = train = ml_collections.ConfigDict() - train.mixed_precision = "fp16" train.batch_size = 1 train.use_8bit_adam = False train.scale_lr = False @@ -27,7 +26,7 @@ def get_config(): train.adam_beta2 = 0.999 train.adam_weight_decay = 1e-4 train.adam_epsilon = 1e-8 - train.gradient_accumulation_steps = 32 + train.gradient_accumulation_steps = 1 train.max_grad_norm = 1.0 train.num_inner_epochs = 1 train.cfg = True @@ -39,8 +38,8 @@ def get_config(): sample.num_steps = 30 sample.eta = 1.0 sample.guidance_scale = 5.0 - sample.batch_size = 4 - sample.num_batches_per_epoch = 8 + sample.batch_size = 1 + sample.num_batches_per_epoch = 1 # prompting config.prompt_fn = "imagenet_animals" diff --git a/ddpo_pytorch/config/dgx.py b/ddpo_pytorch/config/dgx.py new file mode 100644 index 0000000..80bc342 --- /dev/null +++ b/ddpo_pytorch/config/dgx.py @@ -0,0 +1,20 @@ +import ml_collections +from ddpo_pytorch.config import base + +def get_config(): + config = base.get_config() + + config.mixed_precision = "bf16" + config.allow_tf32 = True + + config.train.batch_size = 8 + config.train.gradient_accumulation_steps = 4 + + # sampling + config.sample.num_steps = 50 + config.sample.batch_size = 8 + config.sample.num_batches_per_epoch = 4 + + config.per_prompt_stat_tracking = None + + return config \ No newline at end of file diff --git a/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py b/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py index be5f421..0581a8a 100644 --- a/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py +++ b/ddpo_pytorch/diffusers_patch/ddim_with_logprob.py @@ -14,6 +14,11 @@ from diffusers.utils import randn_tensor from diffusers.schedulers.scheduling_ddim import DDIMSchedulerOutput, DDIMScheduler +def _left_broadcast(t, shape): + assert t.ndim <= len(shape) + return t.reshape(t.shape + (1,) * (len(shape) - t.ndim)).broadcast_to(shape) + + def _get_variance(self, timestep, prev_timestep): alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to(timestep.device) alpha_prod_t_prev = torch.where( @@ -82,13 +87,16 @@ def ddim_step_with_logprob( # 1. get previous step value (=t-1) prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + # to prevent OOB on gather prev_timestep = torch.clamp(prev_timestep, 0, self.config.num_train_timesteps - 1) # 2. compute alphas, betas - alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu()).to(timestep.device) + alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu()) alpha_prod_t_prev = torch.where( prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod - ).to(timestep.device) + ) + alpha_prod_t = _left_broadcast(alpha_prod_t, sample.shape).to(sample.device) + alpha_prod_t_prev = _left_broadcast(alpha_prod_t_prev, sample.shape).to(sample.device) beta_prod_t = 1 - alpha_prod_t @@ -121,6 +129,7 @@ def ddim_step_with_logprob( # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) variance = _get_variance(self, timestep, prev_timestep) std_dev_t = eta * variance ** (0.5) + std_dev_t = _left_broadcast(std_dev_t, sample.shape).to(sample.device) if use_clipped_model_output: # the pred_epsilon is always re-derived from the clipped x_0 in Glide @@ -153,4 +162,4 @@ def ddim_step_with_logprob( # mean along all but batch dimension log_prob = log_prob.mean(dim=tuple(range(1, log_prob.ndim))) - return prev_sample, log_prob + return prev_sample.type(sample.dtype), log_prob diff --git a/scripts/train.py b/scripts/train.py index 86f66ea..6cba4a2 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,3 +1,4 @@ +from collections import defaultdict from absl import app, flags, logging from ml_collections import config_flags from accelerate import Accelerator @@ -6,6 +7,7 @@ from accelerate.logging import get_logger from diffusers import StableDiffusionPipeline, DDIMScheduler from diffusers.loaders import AttnProcsLayers from diffusers.models.attention_processor import LoRAAttnProcessor +import numpy as np import ddpo_pytorch.prompts import ddpo_pytorch.rewards from ddpo_pytorch.stat_tracking import PerPromptStatTracker @@ -20,7 +22,7 @@ tqdm = partial(tqdm.tqdm, dynamic_ncols=True) FLAGS = flags.FLAGS -config_flags.DEFINE_config_file("config", "config/base.py", "Training configuration.") +config_flags.DEFINE_config_file("config", "ddpo_pytorch/config/base.py", "Training configuration.") logger = get_logger(__name__) @@ -32,9 +34,10 @@ def main(_): log_with="wandb", mixed_precision=config.mixed_precision, project_dir=config.logdir, + gradient_accumulation_steps=config.train.gradient_accumulation_steps * config.sample.num_steps, ) if accelerator.is_main_process: - accelerator.init_trackers(project_name="ddpo-pytorch", config=config) + accelerator.init_trackers(project_name="ddpo-pytorch", config=config.to_dict()) logger.info(config) # set seed @@ -93,14 +96,6 @@ def main(_): if config.allow_tf32: torch.backends.cuda.matmul.allow_tf32 = True - if config.train.scale_lr: - config.train.learning_rate = ( - config.train.learning_rate - * config.train.gradient_accumulation_steps - * config.train.batch_size - * accelerator.num_processes - ) - # Initialize the optimizer if config.train.use_8bit_adam: try: @@ -135,9 +130,6 @@ def main(_): config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps ) - assert config.sample.batch_size % config.train.batch_size == 0 - assert samples_per_epoch % total_train_batch_size == 0 - logger.info("***** Running training *****") logger.info(f" Num Epochs = {config.num_epochs}") logger.info(f" Sample batch size per device = {config.sample.batch_size}") @@ -149,6 +141,9 @@ def main(_): logger.info(f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}") logger.info(f" Number of inner epochs = {config.train.num_inner_epochs}") + assert config.sample.batch_size % config.train.batch_size == 0 + assert samples_per_epoch % total_train_batch_size == 0 + neg_prompt_embed = pipeline.text_encoder( pipeline.tokenizer( [""], @@ -237,6 +232,8 @@ def main(_): {"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]}, step=global_step, ) + # from PIL import Image + # Image.fromarray((images[0].cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)).save(f"test.png") # per-prompt mean/std tracking if config.per_prompt_stat_tracking: @@ -267,12 +264,6 @@ def main(_): indices = torch.randperm(total_batch_size, device=accelerator.device) samples = {k: v[indices] for k, v in samples.items()} - # shuffle along time dimension, independently for each sample - for i in range(total_batch_size): - indices = torch.randperm(num_timesteps, device=accelerator.device) - for key in ["timesteps", "latents", "next_latents"]: - samples[key][i] = samples[key][i][indices] - # rebatch for training samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()} @@ -292,6 +283,7 @@ def main(_): else: embeds = sample["prompt_embeds"] + info = defaultdict(list) for j in tqdm( range(num_timesteps), desc=f"Timestep", @@ -335,18 +327,12 @@ def main(_): loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss)) # debugging values - info = {} # John Schulman says that (ratio - 1) - log(ratio) is a better # estimator, but most existing code uses this so... # http://joschu.net/blog/kl-approx.html - info["approx_kl"] = 0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2) - info["clipfrac"] = torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()) - info["loss"] = loss - - # log training-related stuff - info.update({"epoch": epoch, "inner_epoch": inner_epoch, "timestep": j}) - accelerator.log(info, step=global_step) - global_step += 1 + info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)) + info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float())) + info["loss"].append(loss) # backward pass accelerator.backward(loss) @@ -355,6 +341,14 @@ def main(_): optimizer.step() optimizer.zero_grad() + if accelerator.sync_gradients: + # log training-related stuff + info = {k: torch.mean(torch.stack(v)) for k, v in info.items()} + info.update({"epoch": epoch, "inner_epoch": inner_epoch}) + accelerator.log(info, step=global_step) + global_step += 1 + info = defaultdict(list) + if __name__ == "__main__": app.run(main) diff --git a/setup.py b/setup.py index 076cb2a..dee0576 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,16 @@ from setuptools import setup, find_packages setup( - name='ddpo-pytorch', - version='0.0.1', + name="ddpo-pytorch", + version="0.0.1", packages=["ddpo_pytorch"], install_requires=[ - "ml-collections", "absl-py" + "ml-collections", + "absl-py", + "diffusers[torch]==0.17.1", + "wandb", + "torchvision", + "inflect==6.0.4", + "transformers==4.30.2", ], -) \ No newline at end of file +)