ddpo-pytorch/config/base.py

import ml_collections


def get_config():
    config = ml_collections.ConfigDict()

    ###### General ######
    # run name for wandb logging and checkpoint saving -- if not provided, will be auto-generated based on the datetime.
    config.run_name = ""
    # random seed for reproducibility.
    config.seed = 42
    # top-level logging directory for checkpoint saving.
    config.logdir = "logs"
    # number of epochs to train for. each epoch is one round of sampling from the model followed by training on those
    # samples.
    config.num_epochs = 100
    # number of epochs between saving model checkpoints.
    config.save_freq = 20
    # number of checkpoints to keep before overwriting old ones.
    config.num_checkpoint_limit = 5
    # mixed precision training. options are "fp16", "bf16", and "no". half-precision speeds up training significantly.
    config.mixed_precision = "fp16"
    # allow tf32 on Ampere GPUs, which can speed up training.
    config.allow_tf32 = True
    # resume training from a checkpoint. either an exact checkpoint directory (e.g. checkpoint_50), or a directory
    # containing checkpoints, in which case the latest one will be used. `config.use_lora` must be set to the same value
    # as the run that generated the saved checkpoint.
    config.resume_from = ""
    # whether or not to use LoRA. LoRA reduces memory usage significantly by injecting small weight matrices into the
    # attention layers of the UNet. with LoRA, fp16, and a batch size of 1, finetuning Stable Diffusion should take
    # about 10GB of GPU memory. beware that if LoRA is disabled, training will take a lot of memory and saved checkpoint
    # files will also be large.
    config.use_lora = True

    ###### Pretrained Model ######
    config.pretrained = pretrained = ml_collections.ConfigDict()
    # base model to load. either a path to a local directory, or a model name from the HuggingFace model hub.
    pretrained.model = "runwayml/stable-diffusion-v1-5"
    # revision of the model to load.
    pretrained.revision = "main"

    ###### Sampling ######
    config.sample = sample = ml_collections.ConfigDict()
    # number of sampler inference steps.
    sample.num_steps = 50
    # eta parameter for the DDIM sampler. this controls the amount of noise injected into the sampling process, with 0.0
    # being fully deterministic and 1.0 being equivalent to the DDPM sampler.
    sample.eta = 1.0
    # classifier-free guidance weight. 1.0 is no guidance.
    sample.guidance_scale = 5.0
    # batch size (per GPU!) to use for sampling.
    sample.batch_size = 1
    # number of batches to sample per epoch. the total number of samples per epoch is `num_batches_per_epoch *
    # batch_size * num_gpus`.
    sample.num_batches_per_epoch = 2

    ###### Training ######
    config.train = train = ml_collections.ConfigDict()
    # batch size (per GPU!) to use for training.
    train.batch_size = 1
    # whether to use the 8bit Adam optimizer from bitsandbytes.
    train.use_8bit_adam = False
    # learning rate.
    train.learning_rate = 3e-4
    # Adam beta1.
    train.adam_beta1 = 0.9
    # Adam beta2.
    train.adam_beta2 = 0.999
    # Adam weight decay.
    train.adam_weight_decay = 1e-4
    # Adam epsilon.
    train.adam_epsilon = 1e-8
    # number of gradient accumulation steps. the effective batch size is `batch_size * num_gpus *
    # gradient_accumulation_steps`.
    train.gradient_accumulation_steps = 1
    # maximum gradient norm for gradient clipping.
    train.max_grad_norm = 1.0
    # number of inner epochs per outer epoch. each inner epoch is one iteration through the data collected during one
    # outer epoch's round of sampling.
    train.num_inner_epochs = 1
    # whether or not to use classifier-free guidance during training. if enabled, the same guidance scale used during
    # sampling will be used during training.
    train.cfg = True
    # clip advantages to the range [-adv_clip_max, adv_clip_max].
    train.adv_clip_max = 5
    # the PPO clip range.
    train.clip_range = 1e-4
    # the fraction of timesteps to train on. if set to less than 1.0, the model will be trained on a subset of the
    # timesteps for each sample. this will speed up training but reduce the accuracy of policy gradient estimates.
    train.timestep_fraction = 1.0

    ###### Prompt Function ######
    # prompt function to use. see `prompts.py` for available prompt functions.
    config.prompt_fn = "imagenet_animals"
    # kwargs to pass to the prompt function.
    config.prompt_fn_kwargs = {}

    ###### Reward Function ######
    # reward function to use. see `rewards.py` for available reward functions.
    config.reward_fn = "jpeg_compressibility"

    ###### Per-Prompt Stat Tracking ######
    # when enabled, the model will track the mean and std of reward on a per-prompt basis and use that to compute
    # advantages. set `config.per_prompt_stat_tracking` to None to disable per-prompt stat tracking, in which case
    # advantages will be calculated using the mean and std of the entire batch.
    config.per_prompt_stat_tracking = ml_collections.ConfigDict()
    # number of reward values to store in the buffer for each prompt. the buffer persists across epochs.
    config.per_prompt_stat_tracking.buffer_size = 16
    # the minimum number of reward values to store in the buffer before using the per-prompt mean and std. if the buffer
    # contains fewer than `min_count` values, the mean and std of the entire batch will be used instead.
    config.per_prompt_stat_tracking.min_count = 16

    return config
Initial commit 2023-06-24 04:25:54 +02:00			`import ml_collections`


Commenting pass 2023-06-29 09:51:38 +02:00			`def get_config():`
Initial commit 2023-06-24 04:25:54 +02:00			`config = ml_collections.ConfigDict()`

Commenting pass 2023-06-29 09:51:38 +02:00			`###### General ######`
			`# run name for wandb logging and checkpoint saving -- if not provided, will be auto-generated based on the datetime.`
Adding checkpointing and resuming 2023-06-29 02:58:25 +02:00			`config.run_name = ""`
Commenting pass 2023-06-29 09:51:38 +02:00			`# random seed for reproducibility.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.seed = 42`
Commenting pass 2023-06-29 09:51:38 +02:00			`# top-level logging directory for checkpoint saving.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.logdir = "logs"`
Commenting pass 2023-06-29 09:51:38 +02:00			`# number of epochs to train for. each epoch is one round of sampling from the model followed by training on those`
			`# samples.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.num_epochs = 100`
Commenting pass 2023-06-29 09:51:38 +02:00			`# number of epochs between saving model checkpoints.`
Adding checkpointing and resuming 2023-06-29 02:58:25 +02:00			`config.save_freq = 20`
Commenting pass 2023-06-29 09:51:38 +02:00			`# number of checkpoints to keep before overwriting old ones.`
Adding checkpointing and resuming 2023-06-29 02:58:25 +02:00			`config.num_checkpoint_limit = 5`
Commenting pass 2023-06-29 09:51:38 +02:00			`# mixed precision training. options are "fp16", "bf16", and "no". half-precision speeds up training significantly.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.mixed_precision = "fp16"`
Commenting pass 2023-06-29 09:51:38 +02:00			`# allow tf32 on Ampere GPUs, which can speed up training.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.allow_tf32 = True`
Commenting pass 2023-06-29 09:51:38 +02:00			`# resume training from a checkpoint. either an exact checkpoint directory (e.g. checkpoint_50), or a directory`
			# containing checkpoints, in which case the latest one will be used. `config.use_lora` must be set to the same value
			`# as the run that generated the saved checkpoint.`
Adding checkpointing and resuming 2023-06-29 02:58:25 +02:00			`config.resume_from = ""`
Commenting pass 2023-06-29 09:51:38 +02:00			`# whether or not to use LoRA. LoRA reduces memory usage significantly by injecting small weight matrices into the`
			`# attention layers of the UNet. with LoRA, fp16, and a batch size of 1, finetuning Stable Diffusion should take`
			`# about 10GB of GPU memory. beware that if LoRA is disabled, training will take a lot of memory and saved checkpoint`
			`# files will also be large.`
			`config.use_lora = True`
Initial commit 2023-06-24 04:25:54 +02:00
Commenting pass 2023-06-29 09:51:38 +02:00			`###### Pretrained Model ######`
Initial commit 2023-06-24 04:25:54 +02:00			`config.pretrained = pretrained = ml_collections.ConfigDict()`
Commenting pass 2023-06-29 09:51:38 +02:00			`# base model to load. either a path to a local directory, or a model name from the HuggingFace model hub.`
Initial commit 2023-06-24 04:25:54 +02:00			`pretrained.model = "runwayml/stable-diffusion-v1-5"`
Commenting pass 2023-06-29 09:51:38 +02:00			`# revision of the model to load.`
Initial commit 2023-06-24 04:25:54 +02:00			`pretrained.revision = "main"`

Commenting pass 2023-06-29 09:51:38 +02:00			`###### Sampling ######`
			`config.sample = sample = ml_collections.ConfigDict()`
			`# number of sampler inference steps.`
Update configs 2023-07-04 09:25:37 +02:00			`sample.num_steps = 50`
Commenting pass 2023-06-29 09:51:38 +02:00			`# eta parameter for the DDIM sampler. this controls the amount of noise injected into the sampling process, with 0.0`
			`# being fully deterministic and 1.0 being equivalent to the DDPM sampler.`
			`sample.eta = 1.0`
			`# classifier-free guidance weight. 1.0 is no guidance.`
			`sample.guidance_scale = 5.0`
			`# batch size (per GPU!) to use for sampling.`
			`sample.batch_size = 1`
			# number of batches to sample per epoch. the total number of samples per epoch is `num_batches_per_epoch *
			# batch_size * num_gpus`.
			`sample.num_batches_per_epoch = 2`

			`###### Training ######`
Initial commit 2023-06-24 04:25:54 +02:00			`config.train = train = ml_collections.ConfigDict()`
Commenting pass 2023-06-29 09:51:38 +02:00			`# batch size (per GPU!) to use for training.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.batch_size = 1`
Commenting pass 2023-06-29 09:51:38 +02:00			`# whether to use the 8bit Adam optimizer from bitsandbytes.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.use_8bit_adam = False`
Commenting pass 2023-06-29 09:51:38 +02:00			`# learning rate.`
Update configs 2023-07-04 09:25:37 +02:00			`train.learning_rate = 3e-4`
Commenting pass 2023-06-29 09:51:38 +02:00			`# Adam beta1.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.adam_beta1 = 0.9`
Commenting pass 2023-06-29 09:51:38 +02:00			`# Adam beta2.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.adam_beta2 = 0.999`
Commenting pass 2023-06-29 09:51:38 +02:00			`# Adam weight decay.`
Continue implementation 2023-06-24 06:08:32 +02:00			`train.adam_weight_decay = 1e-4`
Commenting pass 2023-06-29 09:51:38 +02:00			`# Adam epsilon.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.adam_epsilon = 1e-8`
Commenting pass 2023-06-29 09:51:38 +02:00			# number of gradient accumulation steps. the effective batch size is `batch_size * num_gpus *
			# gradient_accumulation_steps`.
Working on DGX 2023-06-24 09:07:55 +02:00			`train.gradient_accumulation_steps = 1`
Commenting pass 2023-06-29 09:51:38 +02:00			`# maximum gradient norm for gradient clipping.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.max_grad_norm = 1.0`
Commenting pass 2023-06-29 09:51:38 +02:00			`# number of inner epochs per outer epoch. each inner epoch is one iteration through the data collected during one`
			`# outer epoch's round of sampling.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.num_inner_epochs = 1`
Commenting pass 2023-06-29 09:51:38 +02:00			`# whether or not to use classifier-free guidance during training. if enabled, the same guidance scale used during`
			`# sampling will be used during training.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.cfg = True`
Commenting pass 2023-06-29 09:51:38 +02:00			`# clip advantages to the range [-adv_clip_max, adv_clip_max].`
Update configs 2023-07-04 09:25:37 +02:00			`train.adv_clip_max = 5`
Commenting pass 2023-06-29 09:51:38 +02:00			`# the PPO clip range.`
Initial commit 2023-06-24 04:25:54 +02:00			`train.clip_range = 1e-4`
Commenting pass 2023-06-29 09:51:38 +02:00			`# the fraction of timesteps to train on. if set to less than 1.0, the model will be trained on a subset of the`
			`# timesteps for each sample. this will speed up training but reduce the accuracy of policy gradient estimates.`
Minor changes; add train_timestep_fraction 2023-06-28 07:17:32 +02:00			`train.timestep_fraction = 1.0`
Initial commit 2023-06-24 04:25:54 +02:00
Commenting pass 2023-06-29 09:51:38 +02:00			`###### Prompt Function ######`
			# prompt function to use. see `prompts.py` for available prompt functions.
Initial commit 2023-06-24 04:25:54 +02:00			`config.prompt_fn = "imagenet_animals"`
Commenting pass 2023-06-29 09:51:38 +02:00			`# kwargs to pass to the prompt function.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.prompt_fn_kwargs = {}`

Commenting pass 2023-06-29 09:51:38 +02:00			`###### Reward Function ######`
			# reward function to use. see `rewards.py` for available reward functions.
Initial commit 2023-06-24 04:25:54 +02:00			`config.reward_fn = "jpeg_compressibility"`

Commenting pass 2023-06-29 09:51:38 +02:00			`###### Per-Prompt Stat Tracking ######`
			`# when enabled, the model will track the mean and std of reward on a per-prompt basis and use that to compute`
			# advantages. set `config.per_prompt_stat_tracking` to None to disable per-prompt stat tracking, in which case
			`# advantages will be calculated using the mean and std of the entire batch.`
Initial commit 2023-06-24 04:25:54 +02:00			`config.per_prompt_stat_tracking = ml_collections.ConfigDict()`
Commenting pass 2023-06-29 09:51:38 +02:00			`# number of reward values to store in the buffer for each prompt. the buffer persists across epochs.`
Minor changes; add train_timestep_fraction 2023-06-28 07:17:32 +02:00			`config.per_prompt_stat_tracking.buffer_size = 16`
Commenting pass 2023-06-29 09:51:38 +02:00			`# the minimum number of reward values to store in the buffer before using the per-prompt mean and std. if the buffer`
			# contains fewer than `min_count` values, the mean and std of the entire batch will be used instead.
Initial commit 2023-06-24 04:25:54 +02:00			`config.per_prompt_stat_tracking.min_count = 16`

Commenting pass 2023-06-29 09:51:38 +02:00			`return config`