501 lines
13 KiB
Python
501 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
"""Configuration file (powered by YACS)."""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
|
|
from pycls.core.io import cache_url
|
|
from yacs.config import CfgNode as CfgNode
|
|
|
|
|
|
# Global config object
|
|
_C = CfgNode()
|
|
|
|
# Example usage:
|
|
# from core.config import cfg
|
|
cfg = _C
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Model options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.MODEL = CfgNode()
|
|
|
|
# Model type
|
|
_C.MODEL.TYPE = ""
|
|
|
|
# Number of weight layers
|
|
_C.MODEL.DEPTH = 0
|
|
|
|
# Number of input channels
|
|
_C.MODEL.INPUT_CHANNELS = 3
|
|
|
|
# Number of classes
|
|
_C.MODEL.NUM_CLASSES = 10
|
|
|
|
# Loss function (see pycls/core/builders.py for options)
|
|
_C.MODEL.LOSS_FUN = "cross_entropy"
|
|
|
|
# Label smoothing eps
|
|
_C.MODEL.LABEL_SMOOTHING_EPS = 0.0
|
|
|
|
# ASPP channels
|
|
_C.MODEL.ASPP_CHANNELS = 256
|
|
|
|
# ASPP dilation rates
|
|
_C.MODEL.ASPP_RATES = [6, 12, 18]
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# ResNet options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.RESNET = CfgNode()
|
|
|
|
# Transformation function (see pycls/models/resnet.py for options)
|
|
_C.RESNET.TRANS_FUN = "basic_transform"
|
|
|
|
# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt)
|
|
_C.RESNET.NUM_GROUPS = 1
|
|
|
|
# Width of each group (64 -> ResNet; 4 -> ResNeXt)
|
|
_C.RESNET.WIDTH_PER_GROUP = 64
|
|
|
|
# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch)
|
|
_C.RESNET.STRIDE_1X1 = True
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# AnyNet options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.ANYNET = CfgNode()
|
|
|
|
# Stem type
|
|
_C.ANYNET.STEM_TYPE = "simple_stem_in"
|
|
|
|
# Stem width
|
|
_C.ANYNET.STEM_W = 32
|
|
|
|
# Block type
|
|
_C.ANYNET.BLOCK_TYPE = "res_bottleneck_block"
|
|
|
|
# Depth for each stage (number of blocks in the stage)
|
|
_C.ANYNET.DEPTHS = []
|
|
|
|
# Width for each stage (width of each block in the stage)
|
|
_C.ANYNET.WIDTHS = []
|
|
|
|
# Strides for each stage (applies to the first block of each stage)
|
|
_C.ANYNET.STRIDES = []
|
|
|
|
# Bottleneck multipliers for each stage (applies to bottleneck block)
|
|
_C.ANYNET.BOT_MULS = []
|
|
|
|
# Group widths for each stage (applies to bottleneck block)
|
|
_C.ANYNET.GROUP_WS = []
|
|
|
|
# Whether SE is enabled for res_bottleneck_block
|
|
_C.ANYNET.SE_ON = False
|
|
|
|
# SE ratio
|
|
_C.ANYNET.SE_R = 0.25
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# RegNet options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.REGNET = CfgNode()
|
|
|
|
# Stem type
|
|
_C.REGNET.STEM_TYPE = "simple_stem_in"
|
|
|
|
# Stem width
|
|
_C.REGNET.STEM_W = 32
|
|
|
|
# Block type
|
|
_C.REGNET.BLOCK_TYPE = "res_bottleneck_block"
|
|
|
|
# Stride of each stage
|
|
_C.REGNET.STRIDE = 2
|
|
|
|
# Squeeze-and-Excitation (RegNetY)
|
|
_C.REGNET.SE_ON = False
|
|
_C.REGNET.SE_R = 0.25
|
|
|
|
# Depth
|
|
_C.REGNET.DEPTH = 10
|
|
|
|
# Initial width
|
|
_C.REGNET.W0 = 32
|
|
|
|
# Slope
|
|
_C.REGNET.WA = 5.0
|
|
|
|
# Quantization
|
|
_C.REGNET.WM = 2.5
|
|
|
|
# Group width
|
|
_C.REGNET.GROUP_W = 16
|
|
|
|
# Bottleneck multiplier (bm = 1 / b from the paper)
|
|
_C.REGNET.BOT_MUL = 1.0
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# EfficientNet options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.EN = CfgNode()
|
|
|
|
# Stem width
|
|
_C.EN.STEM_W = 32
|
|
|
|
# Depth for each stage (number of blocks in the stage)
|
|
_C.EN.DEPTHS = []
|
|
|
|
# Width for each stage (width of each block in the stage)
|
|
_C.EN.WIDTHS = []
|
|
|
|
# Expansion ratios for MBConv blocks in each stage
|
|
_C.EN.EXP_RATIOS = []
|
|
|
|
# Squeeze-and-Excitation (SE) ratio
|
|
_C.EN.SE_R = 0.25
|
|
|
|
# Strides for each stage (applies to the first block of each stage)
|
|
_C.EN.STRIDES = []
|
|
|
|
# Kernel sizes for each stage
|
|
_C.EN.KERNELS = []
|
|
|
|
# Head width
|
|
_C.EN.HEAD_W = 1280
|
|
|
|
# Drop connect ratio
|
|
_C.EN.DC_RATIO = 0.0
|
|
|
|
# Dropout ratio
|
|
_C.EN.DROPOUT_RATIO = 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------- #
|
|
# NAS options
|
|
# ---------------------------------------------------------------------------- #
|
|
_C.NAS = CfgNode()
|
|
|
|
# Cell genotype
|
|
_C.NAS.GENOTYPE = 'nas'
|
|
|
|
# Custom genotype
|
|
_C.NAS.CUSTOM_GENOTYPE = []
|
|
|
|
# Base NAS width
|
|
_C.NAS.WIDTH = 16
|
|
|
|
# Total number of cells
|
|
_C.NAS.DEPTH = 20
|
|
|
|
# Auxiliary heads
|
|
_C.NAS.AUX = False
|
|
|
|
# Weight for auxiliary heads
|
|
_C.NAS.AUX_WEIGHT = 0.4
|
|
|
|
# Drop path probability
|
|
_C.NAS.DROP_PROB = 0.0
|
|
|
|
# Matrix in NAS Bench
|
|
_C.NAS.MATRIX = []
|
|
|
|
# Operations in NAS Bench
|
|
_C.NAS.OPS = []
|
|
|
|
# Number of stacks in NAS Bench
|
|
_C.NAS.NUM_STACKS = 3
|
|
|
|
# Number of modules per stack in NAS Bench
|
|
_C.NAS.NUM_MODULES_PER_STACK = 3
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Batch norm options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.BN = CfgNode()
|
|
|
|
# BN epsilon
|
|
_C.BN.EPS = 1e-5
|
|
|
|
# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2)
|
|
_C.BN.MOM = 0.1
|
|
|
|
# Precise BN stats
|
|
_C.BN.USE_PRECISE_STATS = False
|
|
_C.BN.NUM_SAMPLES_PRECISE = 1024
|
|
|
|
# Initialize the gamma of the final BN of each block to zero
|
|
_C.BN.ZERO_INIT_FINAL_GAMMA = False
|
|
|
|
# Use a different weight decay for BN layers
|
|
_C.BN.USE_CUSTOM_WEIGHT_DECAY = False
|
|
_C.BN.CUSTOM_WEIGHT_DECAY = 0.0
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Optimizer options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.OPTIM = CfgNode()
|
|
|
|
# Base learning rate
|
|
_C.OPTIM.BASE_LR = 0.1
|
|
|
|
# Learning rate policy select from {'cos', 'exp', 'steps'}
|
|
_C.OPTIM.LR_POLICY = "cos"
|
|
|
|
# Exponential decay factor
|
|
_C.OPTIM.GAMMA = 0.1
|
|
|
|
# Steps for 'steps' policy (in epochs)
|
|
_C.OPTIM.STEPS = []
|
|
|
|
# Learning rate multiplier for 'steps' policy
|
|
_C.OPTIM.LR_MULT = 0.1
|
|
|
|
# Maximal number of epochs
|
|
_C.OPTIM.MAX_EPOCH = 200
|
|
|
|
# Momentum
|
|
_C.OPTIM.MOMENTUM = 0.9
|
|
|
|
# Momentum dampening
|
|
_C.OPTIM.DAMPENING = 0.0
|
|
|
|
# Nesterov momentum
|
|
_C.OPTIM.NESTEROV = True
|
|
|
|
# L2 regularization
|
|
_C.OPTIM.WEIGHT_DECAY = 5e-4
|
|
|
|
# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR
|
|
_C.OPTIM.WARMUP_FACTOR = 0.1
|
|
|
|
# Gradually warm up the OPTIM.BASE_LR over this number of epochs
|
|
_C.OPTIM.WARMUP_EPOCHS = 0
|
|
|
|
# Update the learning rate per iter
|
|
_C.OPTIM.ITER_LR = False
|
|
|
|
# Base learning rate for arch
|
|
_C.OPTIM.ARCH_BASE_LR = 0.0003
|
|
|
|
# L2 regularization for arch
|
|
_C.OPTIM.ARCH_WEIGHT_DECAY = 0.001
|
|
|
|
# Optimizer for arch
|
|
_C.OPTIM.ARCH_OPTIM = 'adam'
|
|
|
|
# Epoch to start optimizing arch
|
|
_C.OPTIM.ARCH_EPOCH = 0.0
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Training options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.TRAIN = CfgNode()
|
|
|
|
# Dataset and split
|
|
_C.TRAIN.DATASET = ""
|
|
_C.TRAIN.SPLIT = "train"
|
|
|
|
# Total mini-batch size
|
|
_C.TRAIN.BATCH_SIZE = 128
|
|
|
|
# Image size
|
|
_C.TRAIN.IM_SIZE = 224
|
|
|
|
# Evaluate model on test data every eval period epochs
|
|
_C.TRAIN.EVAL_PERIOD = 1
|
|
|
|
# Save model checkpoint every checkpoint period epochs
|
|
_C.TRAIN.CHECKPOINT_PERIOD = 1
|
|
|
|
# Resume training from the latest checkpoint in the output directory
|
|
_C.TRAIN.AUTO_RESUME = True
|
|
|
|
# Weights to start training from
|
|
_C.TRAIN.WEIGHTS = ""
|
|
|
|
# Percentage of gray images in jig
|
|
_C.TRAIN.GRAY_PERCENTAGE = 0.0
|
|
|
|
# Portion to create trainA/trainB split
|
|
_C.TRAIN.PORTION = 1.0
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Testing options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.TEST = CfgNode()
|
|
|
|
# Dataset and split
|
|
_C.TEST.DATASET = ""
|
|
_C.TEST.SPLIT = "val"
|
|
|
|
# Total mini-batch size
|
|
_C.TEST.BATCH_SIZE = 200
|
|
|
|
# Image size
|
|
_C.TEST.IM_SIZE = 256
|
|
|
|
# Weights to use for testing
|
|
_C.TEST.WEIGHTS = ""
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Common train/test data loader options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.DATA_LOADER = CfgNode()
|
|
|
|
# Number of data loader workers per process
|
|
_C.DATA_LOADER.NUM_WORKERS = 8
|
|
|
|
# Load data to pinned host memory
|
|
_C.DATA_LOADER.PIN_MEMORY = True
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Memory options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.MEM = CfgNode()
|
|
|
|
# Perform ReLU inplace
|
|
_C.MEM.RELU_INPLACE = True
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# CUDNN options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.CUDNN = CfgNode()
|
|
|
|
# Perform benchmarking to select the fastest CUDNN algorithms to use
|
|
# Note that this may increase the memory usage and will likely not result
|
|
# in overall speedups when variable size inputs are used (e.g. COCO training)
|
|
_C.CUDNN.BENCHMARK = True
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Precise timing options
|
|
# ------------------------------------------------------------------------------------ #
|
|
_C.PREC_TIME = CfgNode()
|
|
|
|
# Number of iterations to warm up the caches
|
|
_C.PREC_TIME.WARMUP_ITER = 3
|
|
|
|
# Number of iterations to compute avg time
|
|
_C.PREC_TIME.NUM_ITER = 30
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Misc options
|
|
# ------------------------------------------------------------------------------------ #
|
|
|
|
# Number of GPUs to use (applies to both training and testing)
|
|
_C.NUM_GPUS = 1
|
|
|
|
# Task (cls, seg, rot, col, jig)
|
|
_C.TASK = "cls"
|
|
|
|
# Grid in Jigsaw (2, 3); no effect if TASK is not jig
|
|
_C.JIGSAW_GRID = 3
|
|
|
|
# Output directory
|
|
_C.OUT_DIR = "/tmp"
|
|
|
|
# Config destination (in OUT_DIR)
|
|
_C.CFG_DEST = "config.yaml"
|
|
|
|
# Note that non-determinism may still be present due to non-deterministic
|
|
# operator implementations in GPU operator libraries
|
|
_C.RNG_SEED = 1
|
|
|
|
# Log destination ('stdout' or 'file')
|
|
_C.LOG_DEST = "stdout"
|
|
|
|
# Log period in iters
|
|
_C.LOG_PERIOD = 10
|
|
|
|
# Distributed backend
|
|
_C.DIST_BACKEND = "nccl"
|
|
|
|
# Hostname and port for initializing multi-process groups
|
|
_C.HOST = "localhost"
|
|
_C.PORT = 10001
|
|
|
|
# Models weights referred to by URL are downloaded to this local cache
|
|
_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache"
|
|
|
|
|
|
# ------------------------------------------------------------------------------------ #
|
|
# Deprecated keys
|
|
# ------------------------------------------------------------------------------------ #
|
|
|
|
_C.register_deprecated_key("PREC_TIME.BATCH_SIZE")
|
|
_C.register_deprecated_key("PREC_TIME.ENABLED")
|
|
|
|
|
|
def assert_and_infer_cfg(cache_urls=True):
|
|
"""Checks config values invariants."""
|
|
err_str = "The first lr step must start at 0"
|
|
assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str
|
|
data_splits = ["train", "val", "test"]
|
|
err_str = "Data split '{}' not supported"
|
|
assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT)
|
|
assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT)
|
|
err_str = "Mini-batch size should be a multiple of NUM_GPUS."
|
|
assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
|
|
assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
|
|
err_str = "Precise BN stats computation not verified for > 1 GPU"
|
|
assert not _C.BN.USE_PRECISE_STATS or _C.NUM_GPUS == 1, err_str
|
|
err_str = "Log destination '{}' not supported"
|
|
assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST)
|
|
if cache_urls:
|
|
cache_cfg_urls()
|
|
|
|
|
|
def cache_cfg_urls():
|
|
"""Download URLs in config, cache them, and rewrite cfg to use cached file."""
|
|
_C.TRAIN.WEIGHTS = cache_url(_C.TRAIN.WEIGHTS, _C.DOWNLOAD_CACHE)
|
|
_C.TEST.WEIGHTS = cache_url(_C.TEST.WEIGHTS, _C.DOWNLOAD_CACHE)
|
|
|
|
|
|
def dump_cfg():
|
|
"""Dumps the config to the output directory."""
|
|
cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST)
|
|
with open(cfg_file, "w") as f:
|
|
_C.dump(stream=f)
|
|
|
|
|
|
def load_cfg(out_dir, cfg_dest="config.yaml"):
|
|
"""Loads config from specified output directory."""
|
|
cfg_file = os.path.join(out_dir, cfg_dest)
|
|
_C.merge_from_file(cfg_file)
|
|
|
|
|
|
def load_cfg_fom_args(description="Config file options."):
|
|
"""Load config from command line arguments and set any specified options."""
|
|
parser = argparse.ArgumentParser(description=description)
|
|
help_s = "Config file location"
|
|
parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str)
|
|
help_s = "See pycls/core/config.py for all options"
|
|
parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
|
|
if len(sys.argv) == 1:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
args = parser.parse_args()
|
|
_C.merge_from_file(args.cfg_file)
|
|
_C.merge_from_list(args.opts)
|