#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Configuration file (powered by YACS).""" import argparse import os import sys from pycls.core.io import cache_url from yacs.config import CfgNode as CfgNode # Global config object _C = CfgNode() # Example usage: # from core.config import cfg cfg = _C # ------------------------------------------------------------------------------------ # # Model options # ------------------------------------------------------------------------------------ # _C.MODEL = CfgNode() # Model type _C.MODEL.TYPE = "" # Number of weight layers _C.MODEL.DEPTH = 0 # Number of input channels _C.MODEL.INPUT_CHANNELS = 3 # Number of classes _C.MODEL.NUM_CLASSES = 10 # Loss function (see pycls/core/builders.py for options) _C.MODEL.LOSS_FUN = "cross_entropy" # Label smoothing eps _C.MODEL.LABEL_SMOOTHING_EPS = 0.0 # ASPP channels _C.MODEL.ASPP_CHANNELS = 256 # ASPP dilation rates _C.MODEL.ASPP_RATES = [6, 12, 18] # ------------------------------------------------------------------------------------ # # ResNet options # ------------------------------------------------------------------------------------ # _C.RESNET = CfgNode() # Transformation function (see pycls/models/resnet.py for options) _C.RESNET.TRANS_FUN = "basic_transform" # Number of groups to use (1 -> ResNet; > 1 -> ResNeXt) _C.RESNET.NUM_GROUPS = 1 # Width of each group (64 -> ResNet; 4 -> ResNeXt) _C.RESNET.WIDTH_PER_GROUP = 64 # Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch) _C.RESNET.STRIDE_1X1 = True # ------------------------------------------------------------------------------------ # # AnyNet options # ------------------------------------------------------------------------------------ # _C.ANYNET = CfgNode() # Stem type _C.ANYNET.STEM_TYPE = "simple_stem_in" # Stem width _C.ANYNET.STEM_W = 32 # Block type _C.ANYNET.BLOCK_TYPE = "res_bottleneck_block" # Depth for each stage (number of blocks in the stage) _C.ANYNET.DEPTHS = [] # Width for each stage (width of each block in the stage) _C.ANYNET.WIDTHS = [] # Strides for each stage (applies to the first block of each stage) _C.ANYNET.STRIDES = [] # Bottleneck multipliers for each stage (applies to bottleneck block) _C.ANYNET.BOT_MULS = [] # Group widths for each stage (applies to bottleneck block) _C.ANYNET.GROUP_WS = [] # Whether SE is enabled for res_bottleneck_block _C.ANYNET.SE_ON = False # SE ratio _C.ANYNET.SE_R = 0.25 # ------------------------------------------------------------------------------------ # # RegNet options # ------------------------------------------------------------------------------------ # _C.REGNET = CfgNode() # Stem type _C.REGNET.STEM_TYPE = "simple_stem_in" # Stem width _C.REGNET.STEM_W = 32 # Block type _C.REGNET.BLOCK_TYPE = "res_bottleneck_block" # Stride of each stage _C.REGNET.STRIDE = 2 # Squeeze-and-Excitation (RegNetY) _C.REGNET.SE_ON = False _C.REGNET.SE_R = 0.25 # Depth _C.REGNET.DEPTH = 10 # Initial width _C.REGNET.W0 = 32 # Slope _C.REGNET.WA = 5.0 # Quantization _C.REGNET.WM = 2.5 # Group width _C.REGNET.GROUP_W = 16 # Bottleneck multiplier (bm = 1 / b from the paper) _C.REGNET.BOT_MUL = 1.0 # ------------------------------------------------------------------------------------ # # EfficientNet options # ------------------------------------------------------------------------------------ # _C.EN = CfgNode() # Stem width _C.EN.STEM_W = 32 # Depth for each stage (number of blocks in the stage) _C.EN.DEPTHS = [] # Width for each stage (width of each block in the stage) _C.EN.WIDTHS = [] # Expansion ratios for MBConv blocks in each stage _C.EN.EXP_RATIOS = [] # Squeeze-and-Excitation (SE) ratio _C.EN.SE_R = 0.25 # Strides for each stage (applies to the first block of each stage) _C.EN.STRIDES = [] # Kernel sizes for each stage _C.EN.KERNELS = [] # Head width _C.EN.HEAD_W = 1280 # Drop connect ratio _C.EN.DC_RATIO = 0.0 # Dropout ratio _C.EN.DROPOUT_RATIO = 0.0 # ---------------------------------------------------------------------------- # # NAS options # ---------------------------------------------------------------------------- # _C.NAS = CfgNode() # Cell genotype _C.NAS.GENOTYPE = 'nas' # Custom genotype _C.NAS.CUSTOM_GENOTYPE = [] # Base NAS width _C.NAS.WIDTH = 16 # Total number of cells _C.NAS.DEPTH = 20 # Auxiliary heads _C.NAS.AUX = False # Weight for auxiliary heads _C.NAS.AUX_WEIGHT = 0.4 # Drop path probability _C.NAS.DROP_PROB = 0.0 # Matrix in NAS Bench _C.NAS.MATRIX = [] # Operations in NAS Bench _C.NAS.OPS = [] # Number of stacks in NAS Bench _C.NAS.NUM_STACKS = 3 # Number of modules per stack in NAS Bench _C.NAS.NUM_MODULES_PER_STACK = 3 # ------------------------------------------------------------------------------------ # # Batch norm options # ------------------------------------------------------------------------------------ # _C.BN = CfgNode() # BN epsilon _C.BN.EPS = 1e-5 # BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2) _C.BN.MOM = 0.1 # Precise BN stats _C.BN.USE_PRECISE_STATS = False _C.BN.NUM_SAMPLES_PRECISE = 1024 # Initialize the gamma of the final BN of each block to zero _C.BN.ZERO_INIT_FINAL_GAMMA = False # Use a different weight decay for BN layers _C.BN.USE_CUSTOM_WEIGHT_DECAY = False _C.BN.CUSTOM_WEIGHT_DECAY = 0.0 # ------------------------------------------------------------------------------------ # # Optimizer options # ------------------------------------------------------------------------------------ # _C.OPTIM = CfgNode() # Base learning rate _C.OPTIM.BASE_LR = 0.1 # Learning rate policy select from {'cos', 'exp', 'steps'} _C.OPTIM.LR_POLICY = "cos" # Exponential decay factor _C.OPTIM.GAMMA = 0.1 # Steps for 'steps' policy (in epochs) _C.OPTIM.STEPS = [] # Learning rate multiplier for 'steps' policy _C.OPTIM.LR_MULT = 0.1 # Maximal number of epochs _C.OPTIM.MAX_EPOCH = 200 # Momentum _C.OPTIM.MOMENTUM = 0.9 # Momentum dampening _C.OPTIM.DAMPENING = 0.0 # Nesterov momentum _C.OPTIM.NESTEROV = True # L2 regularization _C.OPTIM.WEIGHT_DECAY = 5e-4 # Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR _C.OPTIM.WARMUP_FACTOR = 0.1 # Gradually warm up the OPTIM.BASE_LR over this number of epochs _C.OPTIM.WARMUP_EPOCHS = 0 # Update the learning rate per iter _C.OPTIM.ITER_LR = False # Base learning rate for arch _C.OPTIM.ARCH_BASE_LR = 0.0003 # L2 regularization for arch _C.OPTIM.ARCH_WEIGHT_DECAY = 0.001 # Optimizer for arch _C.OPTIM.ARCH_OPTIM = 'adam' # Epoch to start optimizing arch _C.OPTIM.ARCH_EPOCH = 0.0 # ------------------------------------------------------------------------------------ # # Training options # ------------------------------------------------------------------------------------ # _C.TRAIN = CfgNode() # Dataset and split _C.TRAIN.DATASET = "" _C.TRAIN.SPLIT = "train" # Total mini-batch size _C.TRAIN.BATCH_SIZE = 128 # Image size _C.TRAIN.IM_SIZE = 224 # Evaluate model on test data every eval period epochs _C.TRAIN.EVAL_PERIOD = 1 # Save model checkpoint every checkpoint period epochs _C.TRAIN.CHECKPOINT_PERIOD = 1 # Resume training from the latest checkpoint in the output directory _C.TRAIN.AUTO_RESUME = True # Weights to start training from _C.TRAIN.WEIGHTS = "" # Percentage of gray images in jig _C.TRAIN.GRAY_PERCENTAGE = 0.0 # Portion to create trainA/trainB split _C.TRAIN.PORTION = 1.0 # ------------------------------------------------------------------------------------ # # Testing options # ------------------------------------------------------------------------------------ # _C.TEST = CfgNode() # Dataset and split _C.TEST.DATASET = "" _C.TEST.SPLIT = "val" # Total mini-batch size _C.TEST.BATCH_SIZE = 200 # Image size _C.TEST.IM_SIZE = 256 # Weights to use for testing _C.TEST.WEIGHTS = "" # ------------------------------------------------------------------------------------ # # Common train/test data loader options # ------------------------------------------------------------------------------------ # _C.DATA_LOADER = CfgNode() # Number of data loader workers per process _C.DATA_LOADER.NUM_WORKERS = 8 # Load data to pinned host memory _C.DATA_LOADER.PIN_MEMORY = True # ------------------------------------------------------------------------------------ # # Memory options # ------------------------------------------------------------------------------------ # _C.MEM = CfgNode() # Perform ReLU inplace _C.MEM.RELU_INPLACE = True # ------------------------------------------------------------------------------------ # # CUDNN options # ------------------------------------------------------------------------------------ # _C.CUDNN = CfgNode() # Perform benchmarking to select the fastest CUDNN algorithms to use # Note that this may increase the memory usage and will likely not result # in overall speedups when variable size inputs are used (e.g. COCO training) _C.CUDNN.BENCHMARK = True # ------------------------------------------------------------------------------------ # # Precise timing options # ------------------------------------------------------------------------------------ # _C.PREC_TIME = CfgNode() # Number of iterations to warm up the caches _C.PREC_TIME.WARMUP_ITER = 3 # Number of iterations to compute avg time _C.PREC_TIME.NUM_ITER = 30 # ------------------------------------------------------------------------------------ # # Misc options # ------------------------------------------------------------------------------------ # # Number of GPUs to use (applies to both training and testing) _C.NUM_GPUS = 1 # Task (cls, seg, rot, col, jig) _C.TASK = "cls" # Grid in Jigsaw (2, 3); no effect if TASK is not jig _C.JIGSAW_GRID = 3 # Output directory _C.OUT_DIR = "/tmp" # Config destination (in OUT_DIR) _C.CFG_DEST = "config.yaml" # Note that non-determinism may still be present due to non-deterministic # operator implementations in GPU operator libraries _C.RNG_SEED = 1 # Log destination ('stdout' or 'file') _C.LOG_DEST = "stdout" # Log period in iters _C.LOG_PERIOD = 10 # Distributed backend _C.DIST_BACKEND = "nccl" # Hostname and port for initializing multi-process groups _C.HOST = "localhost" _C.PORT = 10001 # Models weights referred to by URL are downloaded to this local cache _C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache" # ------------------------------------------------------------------------------------ # # Deprecated keys # ------------------------------------------------------------------------------------ # _C.register_deprecated_key("PREC_TIME.BATCH_SIZE") _C.register_deprecated_key("PREC_TIME.ENABLED") def assert_and_infer_cfg(cache_urls=True): """Checks config values invariants.""" err_str = "The first lr step must start at 0" assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str data_splits = ["train", "val", "test"] err_str = "Data split '{}' not supported" assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT) assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT) err_str = "Mini-batch size should be a multiple of NUM_GPUS." assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str err_str = "Precise BN stats computation not verified for > 1 GPU" assert not _C.BN.USE_PRECISE_STATS or _C.NUM_GPUS == 1, err_str err_str = "Log destination '{}' not supported" assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST) if cache_urls: cache_cfg_urls() def cache_cfg_urls(): """Download URLs in config, cache them, and rewrite cfg to use cached file.""" _C.TRAIN.WEIGHTS = cache_url(_C.TRAIN.WEIGHTS, _C.DOWNLOAD_CACHE) _C.TEST.WEIGHTS = cache_url(_C.TEST.WEIGHTS, _C.DOWNLOAD_CACHE) def dump_cfg(): """Dumps the config to the output directory.""" cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST) with open(cfg_file, "w") as f: _C.dump(stream=f) def load_cfg(out_dir, cfg_dest="config.yaml"): """Loads config from specified output directory.""" cfg_file = os.path.join(out_dir, cfg_dest) _C.merge_from_file(cfg_file) def load_cfg_fom_args(description="Config file options."): """Load config from command line arguments and set any specified options.""" parser = argparse.ArgumentParser(description=description) help_s = "Config file location" parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) help_s = "See pycls/core/config.py for all options" parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() _C.merge_from_file(args.cfg_file) _C.merge_from_list(args.opts)