Working on DGX
This commit is contained in:
		| @@ -18,7 +18,6 @@ def get_config(): | |||||||
| 
 | 
 | ||||||
|     # training |     # training | ||||||
|     config.train = train = ml_collections.ConfigDict() |     config.train = train = ml_collections.ConfigDict() | ||||||
|     train.mixed_precision = "fp16" |  | ||||||
|     train.batch_size = 1 |     train.batch_size = 1 | ||||||
|     train.use_8bit_adam = False |     train.use_8bit_adam = False | ||||||
|     train.scale_lr = False |     train.scale_lr = False | ||||||
| @@ -27,7 +26,7 @@ def get_config(): | |||||||
|     train.adam_beta2 = 0.999 |     train.adam_beta2 = 0.999 | ||||||
|     train.adam_weight_decay = 1e-4 |     train.adam_weight_decay = 1e-4 | ||||||
|     train.adam_epsilon = 1e-8 |     train.adam_epsilon = 1e-8 | ||||||
|     train.gradient_accumulation_steps = 32 |     train.gradient_accumulation_steps = 1 | ||||||
|     train.max_grad_norm = 1.0 |     train.max_grad_norm = 1.0 | ||||||
|     train.num_inner_epochs = 1 |     train.num_inner_epochs = 1 | ||||||
|     train.cfg = True |     train.cfg = True | ||||||
| @@ -39,8 +38,8 @@ def get_config(): | |||||||
|     sample.num_steps = 30 |     sample.num_steps = 30 | ||||||
|     sample.eta = 1.0 |     sample.eta = 1.0 | ||||||
|     sample.guidance_scale = 5.0 |     sample.guidance_scale = 5.0 | ||||||
|     sample.batch_size = 4 |     sample.batch_size = 1 | ||||||
|     sample.num_batches_per_epoch = 8 |     sample.num_batches_per_epoch = 1 | ||||||
| 
 | 
 | ||||||
|     # prompting |     # prompting | ||||||
|     config.prompt_fn = "imagenet_animals" |     config.prompt_fn = "imagenet_animals" | ||||||
							
								
								
									
										20
									
								
								ddpo_pytorch/config/dgx.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								ddpo_pytorch/config/dgx.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | import ml_collections | ||||||
|  | from ddpo_pytorch.config import base | ||||||
|  |  | ||||||
|  | def get_config(): | ||||||
|  |     config = base.get_config() | ||||||
|  |  | ||||||
|  |     config.mixed_precision = "bf16" | ||||||
|  |     config.allow_tf32 = True | ||||||
|  |  | ||||||
|  |     config.train.batch_size = 8 | ||||||
|  |     config.train.gradient_accumulation_steps = 4 | ||||||
|  |  | ||||||
|  |     # sampling | ||||||
|  |     config.sample.num_steps = 50 | ||||||
|  |     config.sample.batch_size = 8 | ||||||
|  |     config.sample.num_batches_per_epoch = 4 | ||||||
|  |  | ||||||
|  |     config.per_prompt_stat_tracking = None | ||||||
|  |  | ||||||
|  |     return config | ||||||
| @@ -14,6 +14,11 @@ from diffusers.utils import randn_tensor | |||||||
| from diffusers.schedulers.scheduling_ddim import DDIMSchedulerOutput, DDIMScheduler | from diffusers.schedulers.scheduling_ddim import DDIMSchedulerOutput, DDIMScheduler | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _left_broadcast(t, shape): | ||||||
|  |     assert t.ndim <= len(shape) | ||||||
|  |     return t.reshape(t.shape + (1,) * (len(shape) - t.ndim)).broadcast_to(shape) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _get_variance(self, timestep, prev_timestep): | def _get_variance(self, timestep, prev_timestep): | ||||||
|     alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to(timestep.device) |     alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to(timestep.device) | ||||||
|     alpha_prod_t_prev = torch.where( |     alpha_prod_t_prev = torch.where( | ||||||
| @@ -82,13 +87,16 @@ def ddim_step_with_logprob( | |||||||
|  |  | ||||||
|     # 1. get previous step value (=t-1) |     # 1. get previous step value (=t-1) | ||||||
|     prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps |     prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps | ||||||
|  |     # to prevent OOB on gather | ||||||
|     prev_timestep = torch.clamp(prev_timestep, 0, self.config.num_train_timesteps - 1) |     prev_timestep = torch.clamp(prev_timestep, 0, self.config.num_train_timesteps - 1) | ||||||
|  |  | ||||||
|     # 2. compute alphas, betas |     # 2. compute alphas, betas | ||||||
|     alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu()).to(timestep.device) |     alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu()) | ||||||
|     alpha_prod_t_prev = torch.where( |     alpha_prod_t_prev = torch.where( | ||||||
|         prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod |         prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod | ||||||
|     ).to(timestep.device) |     ) | ||||||
|  |     alpha_prod_t = _left_broadcast(alpha_prod_t, sample.shape).to(sample.device) | ||||||
|  |     alpha_prod_t_prev = _left_broadcast(alpha_prod_t_prev, sample.shape).to(sample.device) | ||||||
|  |  | ||||||
|     beta_prod_t = 1 - alpha_prod_t |     beta_prod_t = 1 - alpha_prod_t | ||||||
|  |  | ||||||
| @@ -121,6 +129,7 @@ def ddim_step_with_logprob( | |||||||
|     # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) |     # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) | ||||||
|     variance = _get_variance(self, timestep, prev_timestep) |     variance = _get_variance(self, timestep, prev_timestep) | ||||||
|     std_dev_t = eta * variance ** (0.5) |     std_dev_t = eta * variance ** (0.5) | ||||||
|  |     std_dev_t = _left_broadcast(std_dev_t, sample.shape).to(sample.device) | ||||||
|  |  | ||||||
|     if use_clipped_model_output: |     if use_clipped_model_output: | ||||||
|         # the pred_epsilon is always re-derived from the clipped x_0 in Glide |         # the pred_epsilon is always re-derived from the clipped x_0 in Glide | ||||||
| @@ -153,4 +162,4 @@ def ddim_step_with_logprob( | |||||||
|     # mean along all but batch dimension |     # mean along all but batch dimension | ||||||
|     log_prob = log_prob.mean(dim=tuple(range(1, log_prob.ndim))) |     log_prob = log_prob.mean(dim=tuple(range(1, log_prob.ndim))) | ||||||
|  |  | ||||||
|     return prev_sample, log_prob |     return prev_sample.type(sample.dtype), log_prob | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | from collections import defaultdict | ||||||
| from absl import app, flags, logging | from absl import app, flags, logging | ||||||
| from ml_collections import config_flags | from ml_collections import config_flags | ||||||
| from accelerate import Accelerator | from accelerate import Accelerator | ||||||
| @@ -6,6 +7,7 @@ from accelerate.logging import get_logger | |||||||
| from diffusers import StableDiffusionPipeline, DDIMScheduler | from diffusers import StableDiffusionPipeline, DDIMScheduler | ||||||
| from diffusers.loaders import AttnProcsLayers | from diffusers.loaders import AttnProcsLayers | ||||||
| from diffusers.models.attention_processor import LoRAAttnProcessor | from diffusers.models.attention_processor import LoRAAttnProcessor | ||||||
|  | import numpy as np | ||||||
| import ddpo_pytorch.prompts | import ddpo_pytorch.prompts | ||||||
| import ddpo_pytorch.rewards | import ddpo_pytorch.rewards | ||||||
| from ddpo_pytorch.stat_tracking import PerPromptStatTracker | from ddpo_pytorch.stat_tracking import PerPromptStatTracker | ||||||
| @@ -20,7 +22,7 @@ tqdm = partial(tqdm.tqdm, dynamic_ncols=True) | |||||||
|  |  | ||||||
|  |  | ||||||
| FLAGS = flags.FLAGS | FLAGS = flags.FLAGS | ||||||
| config_flags.DEFINE_config_file("config", "config/base.py", "Training configuration.") | config_flags.DEFINE_config_file("config", "ddpo_pytorch/config/base.py", "Training configuration.") | ||||||
|  |  | ||||||
| logger = get_logger(__name__) | logger = get_logger(__name__) | ||||||
|  |  | ||||||
| @@ -32,9 +34,10 @@ def main(_): | |||||||
|         log_with="wandb", |         log_with="wandb", | ||||||
|         mixed_precision=config.mixed_precision, |         mixed_precision=config.mixed_precision, | ||||||
|         project_dir=config.logdir, |         project_dir=config.logdir, | ||||||
|  |         gradient_accumulation_steps=config.train.gradient_accumulation_steps * config.sample.num_steps, | ||||||
|     ) |     ) | ||||||
|     if accelerator.is_main_process: |     if accelerator.is_main_process: | ||||||
|         accelerator.init_trackers(project_name="ddpo-pytorch", config=config) |         accelerator.init_trackers(project_name="ddpo-pytorch", config=config.to_dict()) | ||||||
|     logger.info(config) |     logger.info(config) | ||||||
|  |  | ||||||
|     # set seed |     # set seed | ||||||
| @@ -93,14 +96,6 @@ def main(_): | |||||||
|     if config.allow_tf32: |     if config.allow_tf32: | ||||||
|         torch.backends.cuda.matmul.allow_tf32 = True |         torch.backends.cuda.matmul.allow_tf32 = True | ||||||
|  |  | ||||||
|     if config.train.scale_lr: |  | ||||||
|         config.train.learning_rate = ( |  | ||||||
|             config.train.learning_rate |  | ||||||
|             * config.train.gradient_accumulation_steps |  | ||||||
|             * config.train.batch_size |  | ||||||
|             * accelerator.num_processes |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|     # Initialize the optimizer |     # Initialize the optimizer | ||||||
|     if config.train.use_8bit_adam: |     if config.train.use_8bit_adam: | ||||||
|         try: |         try: | ||||||
| @@ -135,9 +130,6 @@ def main(_): | |||||||
|         config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps |         config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     assert config.sample.batch_size % config.train.batch_size == 0 |  | ||||||
|     assert samples_per_epoch % total_train_batch_size == 0 |  | ||||||
|  |  | ||||||
|     logger.info("***** Running training *****") |     logger.info("***** Running training *****") | ||||||
|     logger.info(f"  Num Epochs = {config.num_epochs}") |     logger.info(f"  Num Epochs = {config.num_epochs}") | ||||||
|     logger.info(f"  Sample batch size per device = {config.sample.batch_size}") |     logger.info(f"  Sample batch size per device = {config.sample.batch_size}") | ||||||
| @@ -149,6 +141,9 @@ def main(_): | |||||||
|     logger.info(f"  Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}") |     logger.info(f"  Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}") | ||||||
|     logger.info(f"  Number of inner epochs = {config.train.num_inner_epochs}") |     logger.info(f"  Number of inner epochs = {config.train.num_inner_epochs}") | ||||||
|  |  | ||||||
|  |     assert config.sample.batch_size % config.train.batch_size == 0 | ||||||
|  |     assert samples_per_epoch % total_train_batch_size == 0 | ||||||
|  |  | ||||||
|     neg_prompt_embed = pipeline.text_encoder( |     neg_prompt_embed = pipeline.text_encoder( | ||||||
|         pipeline.tokenizer( |         pipeline.tokenizer( | ||||||
|             [""], |             [""], | ||||||
| @@ -237,6 +232,8 @@ def main(_): | |||||||
|             {"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]}, |             {"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]}, | ||||||
|             step=global_step, |             step=global_step, | ||||||
|         ) |         ) | ||||||
|  |         # from PIL import Image | ||||||
|  |         # Image.fromarray((images[0].cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)).save(f"test.png") | ||||||
|  |  | ||||||
|         # per-prompt mean/std tracking |         # per-prompt mean/std tracking | ||||||
|         if config.per_prompt_stat_tracking: |         if config.per_prompt_stat_tracking: | ||||||
| @@ -267,12 +264,6 @@ def main(_): | |||||||
|             indices = torch.randperm(total_batch_size, device=accelerator.device) |             indices = torch.randperm(total_batch_size, device=accelerator.device) | ||||||
|             samples = {k: v[indices] for k, v in samples.items()} |             samples = {k: v[indices] for k, v in samples.items()} | ||||||
|  |  | ||||||
|             # shuffle along time dimension, independently for each sample |  | ||||||
|             for i in range(total_batch_size): |  | ||||||
|                 indices = torch.randperm(num_timesteps, device=accelerator.device) |  | ||||||
|                 for key in ["timesteps", "latents", "next_latents"]: |  | ||||||
|                     samples[key][i] = samples[key][i][indices] |  | ||||||
|  |  | ||||||
|             # rebatch for training |             # rebatch for training | ||||||
|             samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()} |             samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()} | ||||||
|  |  | ||||||
| @@ -292,6 +283,7 @@ def main(_): | |||||||
|                 else: |                 else: | ||||||
|                     embeds = sample["prompt_embeds"] |                     embeds = sample["prompt_embeds"] | ||||||
|  |  | ||||||
|  |                 info = defaultdict(list) | ||||||
|                 for j in tqdm( |                 for j in tqdm( | ||||||
|                     range(num_timesteps), |                     range(num_timesteps), | ||||||
|                     desc=f"Timestep", |                     desc=f"Timestep", | ||||||
| @@ -335,18 +327,12 @@ def main(_): | |||||||
|                         loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss)) |                         loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss)) | ||||||
|  |  | ||||||
|                         # debugging values |                         # debugging values | ||||||
|                         info = {} |  | ||||||
|                         # John Schulman says that (ratio - 1) - log(ratio) is a better |                         # John Schulman says that (ratio - 1) - log(ratio) is a better | ||||||
|                         # estimator, but most existing code uses this so... |                         # estimator, but most existing code uses this so... | ||||||
|                         # http://joschu.net/blog/kl-approx.html |                         # http://joschu.net/blog/kl-approx.html | ||||||
|                         info["approx_kl"] = 0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2) |                         info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)) | ||||||
|                         info["clipfrac"] = torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()) |                         info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float())) | ||||||
|                         info["loss"] = loss |                         info["loss"].append(loss) | ||||||
|  |  | ||||||
|                         # log training-related stuff |  | ||||||
|                         info.update({"epoch": epoch, "inner_epoch": inner_epoch, "timestep": j}) |  | ||||||
|                         accelerator.log(info, step=global_step) |  | ||||||
|                         global_step += 1 |  | ||||||
|  |  | ||||||
|                         # backward pass |                         # backward pass | ||||||
|                         accelerator.backward(loss) |                         accelerator.backward(loss) | ||||||
| @@ -355,6 +341,14 @@ def main(_): | |||||||
|                         optimizer.step() |                         optimizer.step() | ||||||
|                         optimizer.zero_grad() |                         optimizer.zero_grad() | ||||||
|  |  | ||||||
|  |                     if accelerator.sync_gradients: | ||||||
|  |                         # log training-related stuff | ||||||
|  |                         info = {k: torch.mean(torch.stack(v)) for k, v in info.items()} | ||||||
|  |                         info.update({"epoch": epoch, "inner_epoch": inner_epoch}) | ||||||
|  |                         accelerator.log(info, step=global_step) | ||||||
|  |                         global_step += 1 | ||||||
|  |                         info = defaultdict(list) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     app.run(main) |     app.run(main) | ||||||
|   | |||||||
							
								
								
									
										14
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								setup.py
									
									
									
									
									
								
							| @@ -1,10 +1,16 @@ | |||||||
| from setuptools import setup, find_packages | from setuptools import setup, find_packages | ||||||
|  |  | ||||||
| setup( | setup( | ||||||
|     name='ddpo-pytorch', |     name="ddpo-pytorch", | ||||||
|     version='0.0.1', |     version="0.0.1", | ||||||
|     packages=["ddpo_pytorch"], |     packages=["ddpo_pytorch"], | ||||||
|     install_requires=[ |     install_requires=[ | ||||||
|         "ml-collections", "absl-py" |         "ml-collections", | ||||||
|  |         "absl-py", | ||||||
|  |         "diffusers[torch]==0.17.1", | ||||||
|  |         "wandb", | ||||||
|  |         "torchvision", | ||||||
|  |         "inflect==6.0.4", | ||||||
|  |         "transformers==4.30.2", | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user