ddpo-pytorch/config/base.py

114 lines
5.5 KiB
Python
Raw Normal View History

2023-06-24 04:25:54 +02:00
import ml_collections
2023-06-29 09:51:38 +02:00
def get_config():
2023-06-24 04:25:54 +02:00
config = ml_collections.ConfigDict()
2023-06-29 09:51:38 +02:00
###### General ######
# run name for wandb logging and checkpoint saving -- if not provided, will be auto-generated based on the datetime.
2023-06-29 02:58:25 +02:00
config.run_name = ""
2023-06-29 09:51:38 +02:00
# random seed for reproducibility.
2023-06-24 04:25:54 +02:00
config.seed = 42
2023-06-29 09:51:38 +02:00
# top-level logging directory for checkpoint saving.
2023-06-24 04:25:54 +02:00
config.logdir = "logs"
2023-06-29 09:51:38 +02:00
# number of epochs to train for. each epoch is one round of sampling from the model followed by training on those
# samples.
2023-06-24 04:25:54 +02:00
config.num_epochs = 100
2023-06-29 09:51:38 +02:00
# number of epochs between saving model checkpoints.
2023-06-29 02:58:25 +02:00
config.save_freq = 20
2023-06-29 09:51:38 +02:00
# number of checkpoints to keep before overwriting old ones.
2023-06-29 02:58:25 +02:00
config.num_checkpoint_limit = 5
2023-06-29 09:51:38 +02:00
# mixed precision training. options are "fp16", "bf16", and "no". half-precision speeds up training significantly.
2023-06-24 04:25:54 +02:00
config.mixed_precision = "fp16"
2023-06-29 09:51:38 +02:00
# allow tf32 on Ampere GPUs, which can speed up training.
2023-06-24 04:25:54 +02:00
config.allow_tf32 = True
2023-06-29 09:51:38 +02:00
# resume training from a checkpoint. either an exact checkpoint directory (e.g. checkpoint_50), or a directory
# containing checkpoints, in which case the latest one will be used. `config.use_lora` must be set to the same value
# as the run that generated the saved checkpoint.
2023-06-29 02:58:25 +02:00
config.resume_from = ""
2023-06-29 09:51:38 +02:00
# whether or not to use LoRA. LoRA reduces memory usage significantly by injecting small weight matrices into the
# attention layers of the UNet. with LoRA, fp16, and a batch size of 1, finetuning Stable Diffusion should take
# about 10GB of GPU memory. beware that if LoRA is disabled, training will take a lot of memory and saved checkpoint
# files will also be large.
config.use_lora = True
2023-06-24 04:25:54 +02:00
2023-06-29 09:51:38 +02:00
###### Pretrained Model ######
2023-06-24 04:25:54 +02:00
config.pretrained = pretrained = ml_collections.ConfigDict()
2023-06-29 09:51:38 +02:00
# base model to load. either a path to a local directory, or a model name from the HuggingFace model hub.
2023-06-24 04:25:54 +02:00
pretrained.model = "runwayml/stable-diffusion-v1-5"
2023-06-29 09:51:38 +02:00
# revision of the model to load.
2023-06-24 04:25:54 +02:00
pretrained.revision = "main"
2023-06-29 09:51:38 +02:00
###### Sampling ######
config.sample = sample = ml_collections.ConfigDict()
# number of sampler inference steps.
2023-07-04 09:25:37 +02:00
sample.num_steps = 50
2023-06-29 09:51:38 +02:00
# eta parameter for the DDIM sampler. this controls the amount of noise injected into the sampling process, with 0.0
# being fully deterministic and 1.0 being equivalent to the DDPM sampler.
sample.eta = 1.0
# classifier-free guidance weight. 1.0 is no guidance.
sample.guidance_scale = 5.0
# batch size (per GPU!) to use for sampling.
sample.batch_size = 1
# number of batches to sample per epoch. the total number of samples per epoch is `num_batches_per_epoch *
# batch_size * num_gpus`.
sample.num_batches_per_epoch = 2
###### Training ######
2023-06-24 04:25:54 +02:00
config.train = train = ml_collections.ConfigDict()
2023-06-29 09:51:38 +02:00
# batch size (per GPU!) to use for training.
2023-06-24 04:25:54 +02:00
train.batch_size = 1
2023-06-29 09:51:38 +02:00
# whether to use the 8bit Adam optimizer from bitsandbytes.
2023-06-24 04:25:54 +02:00
train.use_8bit_adam = False
2023-06-29 09:51:38 +02:00
# learning rate.
2023-07-04 09:25:37 +02:00
train.learning_rate = 3e-4
2023-06-29 09:51:38 +02:00
# Adam beta1.
2023-06-24 04:25:54 +02:00
train.adam_beta1 = 0.9
2023-06-29 09:51:38 +02:00
# Adam beta2.
2023-06-24 04:25:54 +02:00
train.adam_beta2 = 0.999
2023-06-29 09:51:38 +02:00
# Adam weight decay.
2023-06-24 06:08:32 +02:00
train.adam_weight_decay = 1e-4
2023-06-29 09:51:38 +02:00
# Adam epsilon.
2023-06-24 04:25:54 +02:00
train.adam_epsilon = 1e-8
2023-06-29 09:51:38 +02:00
# number of gradient accumulation steps. the effective batch size is `batch_size * num_gpus *
# gradient_accumulation_steps`.
2023-06-24 09:07:55 +02:00
train.gradient_accumulation_steps = 1
2023-06-29 09:51:38 +02:00
# maximum gradient norm for gradient clipping.
2023-06-24 04:25:54 +02:00
train.max_grad_norm = 1.0
2023-06-29 09:51:38 +02:00
# number of inner epochs per outer epoch. each inner epoch is one iteration through the data collected during one
# outer epoch's round of sampling.
2023-06-24 04:25:54 +02:00
train.num_inner_epochs = 1
2023-06-29 09:51:38 +02:00
# whether or not to use classifier-free guidance during training. if enabled, the same guidance scale used during
# sampling will be used during training.
2023-06-24 04:25:54 +02:00
train.cfg = True
2023-06-29 09:51:38 +02:00
# clip advantages to the range [-adv_clip_max, adv_clip_max].
2023-07-04 09:25:37 +02:00
train.adv_clip_max = 5
2023-06-29 09:51:38 +02:00
# the PPO clip range.
2023-06-24 04:25:54 +02:00
train.clip_range = 1e-4
2023-06-29 09:51:38 +02:00
# the fraction of timesteps to train on. if set to less than 1.0, the model will be trained on a subset of the
# timesteps for each sample. this will speed up training but reduce the accuracy of policy gradient estimates.
train.timestep_fraction = 1.0
2023-06-24 04:25:54 +02:00
2023-06-29 09:51:38 +02:00
###### Prompt Function ######
# prompt function to use. see `prompts.py` for available prompt functions.
2023-06-24 04:25:54 +02:00
config.prompt_fn = "imagenet_animals"
2023-06-29 09:51:38 +02:00
# kwargs to pass to the prompt function.
2023-06-24 04:25:54 +02:00
config.prompt_fn_kwargs = {}
2023-06-29 09:51:38 +02:00
###### Reward Function ######
# reward function to use. see `rewards.py` for available reward functions.
2023-06-24 04:25:54 +02:00
config.reward_fn = "jpeg_compressibility"
2023-06-29 09:51:38 +02:00
###### Per-Prompt Stat Tracking ######
# when enabled, the model will track the mean and std of reward on a per-prompt basis and use that to compute
# advantages. set `config.per_prompt_stat_tracking` to None to disable per-prompt stat tracking, in which case
# advantages will be calculated using the mean and std of the entire batch.
2023-06-24 04:25:54 +02:00
config.per_prompt_stat_tracking = ml_collections.ConfigDict()
2023-06-29 09:51:38 +02:00
# number of reward values to store in the buffer for each prompt. the buffer persists across epochs.
config.per_prompt_stat_tracking.buffer_size = 16
2023-06-29 09:51:38 +02:00
# the minimum number of reward values to store in the buffer before using the per-prompt mean and std. if the buffer
# contains fewer than `min_count` values, the mean and std of the entire batch will be used instead.
2023-06-24 04:25:54 +02:00
config.per_prompt_stat_tracking.min_count = 16
2023-06-29 09:51:38 +02:00
return config