Working on DGX
This commit is contained in:
parent
92fc030123
commit
c680890d5c
@ -18,7 +18,6 @@ def get_config():
|
|||||||
|
|
||||||
# training
|
# training
|
||||||
config.train = train = ml_collections.ConfigDict()
|
config.train = train = ml_collections.ConfigDict()
|
||||||
train.mixed_precision = "fp16"
|
|
||||||
train.batch_size = 1
|
train.batch_size = 1
|
||||||
train.use_8bit_adam = False
|
train.use_8bit_adam = False
|
||||||
train.scale_lr = False
|
train.scale_lr = False
|
||||||
@ -27,7 +26,7 @@ def get_config():
|
|||||||
train.adam_beta2 = 0.999
|
train.adam_beta2 = 0.999
|
||||||
train.adam_weight_decay = 1e-4
|
train.adam_weight_decay = 1e-4
|
||||||
train.adam_epsilon = 1e-8
|
train.adam_epsilon = 1e-8
|
||||||
train.gradient_accumulation_steps = 32
|
train.gradient_accumulation_steps = 1
|
||||||
train.max_grad_norm = 1.0
|
train.max_grad_norm = 1.0
|
||||||
train.num_inner_epochs = 1
|
train.num_inner_epochs = 1
|
||||||
train.cfg = True
|
train.cfg = True
|
||||||
@ -39,8 +38,8 @@ def get_config():
|
|||||||
sample.num_steps = 30
|
sample.num_steps = 30
|
||||||
sample.eta = 1.0
|
sample.eta = 1.0
|
||||||
sample.guidance_scale = 5.0
|
sample.guidance_scale = 5.0
|
||||||
sample.batch_size = 4
|
sample.batch_size = 1
|
||||||
sample.num_batches_per_epoch = 8
|
sample.num_batches_per_epoch = 1
|
||||||
|
|
||||||
# prompting
|
# prompting
|
||||||
config.prompt_fn = "imagenet_animals"
|
config.prompt_fn = "imagenet_animals"
|
20
ddpo_pytorch/config/dgx.py
Normal file
20
ddpo_pytorch/config/dgx.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import ml_collections
|
||||||
|
from ddpo_pytorch.config import base
|
||||||
|
|
||||||
|
def get_config():
|
||||||
|
config = base.get_config()
|
||||||
|
|
||||||
|
config.mixed_precision = "bf16"
|
||||||
|
config.allow_tf32 = True
|
||||||
|
|
||||||
|
config.train.batch_size = 8
|
||||||
|
config.train.gradient_accumulation_steps = 4
|
||||||
|
|
||||||
|
# sampling
|
||||||
|
config.sample.num_steps = 50
|
||||||
|
config.sample.batch_size = 8
|
||||||
|
config.sample.num_batches_per_epoch = 4
|
||||||
|
|
||||||
|
config.per_prompt_stat_tracking = None
|
||||||
|
|
||||||
|
return config
|
@ -14,6 +14,11 @@ from diffusers.utils import randn_tensor
|
|||||||
from diffusers.schedulers.scheduling_ddim import DDIMSchedulerOutput, DDIMScheduler
|
from diffusers.schedulers.scheduling_ddim import DDIMSchedulerOutput, DDIMScheduler
|
||||||
|
|
||||||
|
|
||||||
|
def _left_broadcast(t, shape):
|
||||||
|
assert t.ndim <= len(shape)
|
||||||
|
return t.reshape(t.shape + (1,) * (len(shape) - t.ndim)).broadcast_to(shape)
|
||||||
|
|
||||||
|
|
||||||
def _get_variance(self, timestep, prev_timestep):
|
def _get_variance(self, timestep, prev_timestep):
|
||||||
alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to(timestep.device)
|
alpha_prod_t = torch.gather(self.alphas_cumprod, 0, timestep.cpu()).to(timestep.device)
|
||||||
alpha_prod_t_prev = torch.where(
|
alpha_prod_t_prev = torch.where(
|
||||||
@ -82,13 +87,16 @@ def ddim_step_with_logprob(
|
|||||||
|
|
||||||
# 1. get previous step value (=t-1)
|
# 1. get previous step value (=t-1)
|
||||||
prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps
|
prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps
|
||||||
|
# to prevent OOB on gather
|
||||||
prev_timestep = torch.clamp(prev_timestep, 0, self.config.num_train_timesteps - 1)
|
prev_timestep = torch.clamp(prev_timestep, 0, self.config.num_train_timesteps - 1)
|
||||||
|
|
||||||
# 2. compute alphas, betas
|
# 2. compute alphas, betas
|
||||||
alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu()).to(timestep.device)
|
alpha_prod_t = self.alphas_cumprod.gather(0, timestep.cpu())
|
||||||
alpha_prod_t_prev = torch.where(
|
alpha_prod_t_prev = torch.where(
|
||||||
prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod
|
prev_timestep.cpu() >= 0, self.alphas_cumprod.gather(0, prev_timestep.cpu()), self.final_alpha_cumprod
|
||||||
).to(timestep.device)
|
)
|
||||||
|
alpha_prod_t = _left_broadcast(alpha_prod_t, sample.shape).to(sample.device)
|
||||||
|
alpha_prod_t_prev = _left_broadcast(alpha_prod_t_prev, sample.shape).to(sample.device)
|
||||||
|
|
||||||
beta_prod_t = 1 - alpha_prod_t
|
beta_prod_t = 1 - alpha_prod_t
|
||||||
|
|
||||||
@ -121,6 +129,7 @@ def ddim_step_with_logprob(
|
|||||||
# σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1)
|
# σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1)
|
||||||
variance = _get_variance(self, timestep, prev_timestep)
|
variance = _get_variance(self, timestep, prev_timestep)
|
||||||
std_dev_t = eta * variance ** (0.5)
|
std_dev_t = eta * variance ** (0.5)
|
||||||
|
std_dev_t = _left_broadcast(std_dev_t, sample.shape).to(sample.device)
|
||||||
|
|
||||||
if use_clipped_model_output:
|
if use_clipped_model_output:
|
||||||
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
||||||
@ -153,4 +162,4 @@ def ddim_step_with_logprob(
|
|||||||
# mean along all but batch dimension
|
# mean along all but batch dimension
|
||||||
log_prob = log_prob.mean(dim=tuple(range(1, log_prob.ndim)))
|
log_prob = log_prob.mean(dim=tuple(range(1, log_prob.ndim)))
|
||||||
|
|
||||||
return prev_sample, log_prob
|
return prev_sample.type(sample.dtype), log_prob
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
from collections import defaultdict
|
||||||
from absl import app, flags, logging
|
from absl import app, flags, logging
|
||||||
from ml_collections import config_flags
|
from ml_collections import config_flags
|
||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
@ -6,6 +7,7 @@ from accelerate.logging import get_logger
|
|||||||
from diffusers import StableDiffusionPipeline, DDIMScheduler
|
from diffusers import StableDiffusionPipeline, DDIMScheduler
|
||||||
from diffusers.loaders import AttnProcsLayers
|
from diffusers.loaders import AttnProcsLayers
|
||||||
from diffusers.models.attention_processor import LoRAAttnProcessor
|
from diffusers.models.attention_processor import LoRAAttnProcessor
|
||||||
|
import numpy as np
|
||||||
import ddpo_pytorch.prompts
|
import ddpo_pytorch.prompts
|
||||||
import ddpo_pytorch.rewards
|
import ddpo_pytorch.rewards
|
||||||
from ddpo_pytorch.stat_tracking import PerPromptStatTracker
|
from ddpo_pytorch.stat_tracking import PerPromptStatTracker
|
||||||
@ -20,7 +22,7 @@ tqdm = partial(tqdm.tqdm, dynamic_ncols=True)
|
|||||||
|
|
||||||
|
|
||||||
FLAGS = flags.FLAGS
|
FLAGS = flags.FLAGS
|
||||||
config_flags.DEFINE_config_file("config", "config/base.py", "Training configuration.")
|
config_flags.DEFINE_config_file("config", "ddpo_pytorch/config/base.py", "Training configuration.")
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@ -32,9 +34,10 @@ def main(_):
|
|||||||
log_with="wandb",
|
log_with="wandb",
|
||||||
mixed_precision=config.mixed_precision,
|
mixed_precision=config.mixed_precision,
|
||||||
project_dir=config.logdir,
|
project_dir=config.logdir,
|
||||||
|
gradient_accumulation_steps=config.train.gradient_accumulation_steps * config.sample.num_steps,
|
||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
accelerator.init_trackers(project_name="ddpo-pytorch", config=config)
|
accelerator.init_trackers(project_name="ddpo-pytorch", config=config.to_dict())
|
||||||
logger.info(config)
|
logger.info(config)
|
||||||
|
|
||||||
# set seed
|
# set seed
|
||||||
@ -93,14 +96,6 @@ def main(_):
|
|||||||
if config.allow_tf32:
|
if config.allow_tf32:
|
||||||
torch.backends.cuda.matmul.allow_tf32 = True
|
torch.backends.cuda.matmul.allow_tf32 = True
|
||||||
|
|
||||||
if config.train.scale_lr:
|
|
||||||
config.train.learning_rate = (
|
|
||||||
config.train.learning_rate
|
|
||||||
* config.train.gradient_accumulation_steps
|
|
||||||
* config.train.batch_size
|
|
||||||
* accelerator.num_processes
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize the optimizer
|
# Initialize the optimizer
|
||||||
if config.train.use_8bit_adam:
|
if config.train.use_8bit_adam:
|
||||||
try:
|
try:
|
||||||
@ -135,9 +130,6 @@ def main(_):
|
|||||||
config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps
|
config.train.batch_size * accelerator.num_processes * config.train.gradient_accumulation_steps
|
||||||
)
|
)
|
||||||
|
|
||||||
assert config.sample.batch_size % config.train.batch_size == 0
|
|
||||||
assert samples_per_epoch % total_train_batch_size == 0
|
|
||||||
|
|
||||||
logger.info("***** Running training *****")
|
logger.info("***** Running training *****")
|
||||||
logger.info(f" Num Epochs = {config.num_epochs}")
|
logger.info(f" Num Epochs = {config.num_epochs}")
|
||||||
logger.info(f" Sample batch size per device = {config.sample.batch_size}")
|
logger.info(f" Sample batch size per device = {config.sample.batch_size}")
|
||||||
@ -149,6 +141,9 @@ def main(_):
|
|||||||
logger.info(f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}")
|
logger.info(f" Number of gradient updates per inner epoch = {samples_per_epoch // total_train_batch_size}")
|
||||||
logger.info(f" Number of inner epochs = {config.train.num_inner_epochs}")
|
logger.info(f" Number of inner epochs = {config.train.num_inner_epochs}")
|
||||||
|
|
||||||
|
assert config.sample.batch_size % config.train.batch_size == 0
|
||||||
|
assert samples_per_epoch % total_train_batch_size == 0
|
||||||
|
|
||||||
neg_prompt_embed = pipeline.text_encoder(
|
neg_prompt_embed = pipeline.text_encoder(
|
||||||
pipeline.tokenizer(
|
pipeline.tokenizer(
|
||||||
[""],
|
[""],
|
||||||
@ -237,6 +232,8 @@ def main(_):
|
|||||||
{"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]},
|
{"images": [wandb.Image(image, caption=prompt) for image, prompt in zip(images, prompts)]},
|
||||||
step=global_step,
|
step=global_step,
|
||||||
)
|
)
|
||||||
|
# from PIL import Image
|
||||||
|
# Image.fromarray((images[0].cpu().numpy().transpose(1, 2, 0) * 255).astype(np.uint8)).save(f"test.png")
|
||||||
|
|
||||||
# per-prompt mean/std tracking
|
# per-prompt mean/std tracking
|
||||||
if config.per_prompt_stat_tracking:
|
if config.per_prompt_stat_tracking:
|
||||||
@ -267,12 +264,6 @@ def main(_):
|
|||||||
indices = torch.randperm(total_batch_size, device=accelerator.device)
|
indices = torch.randperm(total_batch_size, device=accelerator.device)
|
||||||
samples = {k: v[indices] for k, v in samples.items()}
|
samples = {k: v[indices] for k, v in samples.items()}
|
||||||
|
|
||||||
# shuffle along time dimension, independently for each sample
|
|
||||||
for i in range(total_batch_size):
|
|
||||||
indices = torch.randperm(num_timesteps, device=accelerator.device)
|
|
||||||
for key in ["timesteps", "latents", "next_latents"]:
|
|
||||||
samples[key][i] = samples[key][i][indices]
|
|
||||||
|
|
||||||
# rebatch for training
|
# rebatch for training
|
||||||
samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()}
|
samples_batched = {k: v.reshape(-1, config.train.batch_size, *v.shape[1:]) for k, v in samples.items()}
|
||||||
|
|
||||||
@ -292,6 +283,7 @@ def main(_):
|
|||||||
else:
|
else:
|
||||||
embeds = sample["prompt_embeds"]
|
embeds = sample["prompt_embeds"]
|
||||||
|
|
||||||
|
info = defaultdict(list)
|
||||||
for j in tqdm(
|
for j in tqdm(
|
||||||
range(num_timesteps),
|
range(num_timesteps),
|
||||||
desc=f"Timestep",
|
desc=f"Timestep",
|
||||||
@ -335,18 +327,12 @@ def main(_):
|
|||||||
loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss))
|
loss = torch.mean(torch.maximum(unclipped_loss, clipped_loss))
|
||||||
|
|
||||||
# debugging values
|
# debugging values
|
||||||
info = {}
|
|
||||||
# John Schulman says that (ratio - 1) - log(ratio) is a better
|
# John Schulman says that (ratio - 1) - log(ratio) is a better
|
||||||
# estimator, but most existing code uses this so...
|
# estimator, but most existing code uses this so...
|
||||||
# http://joschu.net/blog/kl-approx.html
|
# http://joschu.net/blog/kl-approx.html
|
||||||
info["approx_kl"] = 0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2)
|
info["approx_kl"].append(0.5 * torch.mean((log_prob - sample["log_probs"][:, j]) ** 2))
|
||||||
info["clipfrac"] = torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float())
|
info["clipfrac"].append(torch.mean((torch.abs(ratio - 1.0) > config.train.clip_range).float()))
|
||||||
info["loss"] = loss
|
info["loss"].append(loss)
|
||||||
|
|
||||||
# log training-related stuff
|
|
||||||
info.update({"epoch": epoch, "inner_epoch": inner_epoch, "timestep": j})
|
|
||||||
accelerator.log(info, step=global_step)
|
|
||||||
global_step += 1
|
|
||||||
|
|
||||||
# backward pass
|
# backward pass
|
||||||
accelerator.backward(loss)
|
accelerator.backward(loss)
|
||||||
@ -355,6 +341,14 @@ def main(_):
|
|||||||
optimizer.step()
|
optimizer.step()
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
if accelerator.sync_gradients:
|
||||||
|
# log training-related stuff
|
||||||
|
info = {k: torch.mean(torch.stack(v)) for k, v in info.items()}
|
||||||
|
info.update({"epoch": epoch, "inner_epoch": inner_epoch})
|
||||||
|
accelerator.log(info, step=global_step)
|
||||||
|
global_step += 1
|
||||||
|
info = defaultdict(list)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(main)
|
app.run(main)
|
||||||
|
12
setup.py
12
setup.py
@ -1,10 +1,16 @@
|
|||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='ddpo-pytorch',
|
name="ddpo-pytorch",
|
||||||
version='0.0.1',
|
version="0.0.1",
|
||||||
packages=["ddpo_pytorch"],
|
packages=["ddpo_pytorch"],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"ml-collections", "absl-py"
|
"ml-collections",
|
||||||
|
"absl-py",
|
||||||
|
"diffusers[torch]==0.17.1",
|
||||||
|
"wandb",
|
||||||
|
"torchvision",
|
||||||
|
"inflect==6.0.4",
|
||||||
|
"transformers==4.30.2",
|
||||||
],
|
],
|
||||||
)
|
)
|
Loading…
Reference in New Issue
Block a user