update ImageNet training
This commit is contained in:
parent
666c105f51
commit
4121d1719f
@ -24,8 +24,8 @@ CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_V1 cifar100 cut
|
||||
|
||||
Train the searched CNN on ImageNet
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14 B128 -1
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14 B128 -1
|
||||
```
|
||||
|
||||
Evaluate a trained CNN model
|
||||
|
15
configs/nas-imagenet-B128.config
Normal file
15
configs/nas-imagenet-B128.config
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"type" : ["str", "steplr"],
|
||||
"batch_size": ["int", 128],
|
||||
"epochs" : ["int", 250],
|
||||
"decay_period": ["int", 1],
|
||||
"gamma" : ["float", 0.97],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.00003],
|
||||
"LR" : ["float", 0.1],
|
||||
"label_smooth": ["float", 0.1],
|
||||
"auxiliary" : ["bool", 1],
|
||||
"auxiliary_weight" : ["float", 0.4],
|
||||
"grad_clip" : ["float", 5],
|
||||
"drop_path_prob" : ["float", 0]
|
||||
}
|
15
configs/nas-imagenet-B256.config
Normal file
15
configs/nas-imagenet-B256.config
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"type" : ["str", "steplr"],
|
||||
"batch_size": ["int", 256],
|
||||
"epochs" : ["int", 250],
|
||||
"decay_period": ["int", 1],
|
||||
"gamma" : ["float", 0.97],
|
||||
"momentum" : ["float", 0.9],
|
||||
"decay" : ["float", 0.00003],
|
||||
"LR" : ["float", 0.1],
|
||||
"label_smooth": ["float", 0.1],
|
||||
"auxiliary" : ["bool", 1],
|
||||
"auxiliary_weight" : ["float", 0.4],
|
||||
"grad_clip" : ["float", 5],
|
||||
"drop_path_prob" : ["float", 0]
|
||||
}
|
@ -42,7 +42,7 @@ else : print('Find CUDA_VISIBLE_DEVICES={:
|
||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||
|
||||
|
||||
if args.manualSeed is None:
|
||||
if args.manualSeed is None or args.manualSeed < 0:
|
||||
args.manualSeed = random.randint(1, 10000)
|
||||
random.seed(args.manualSeed)
|
||||
cudnn.benchmark = True
|
||||
@ -54,10 +54,10 @@ torch.cuda.manual_seed_all(args.manualSeed)
|
||||
def main():
|
||||
|
||||
# Init logger
|
||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
#args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||
if not os.path.isdir(args.save_path):
|
||||
os.makedirs(args.save_path)
|
||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
||||
log = open(os.path.join(args.save_path, 'seed-{:}-log.txt'.format(args.manualSeed)), 'w')
|
||||
print_log('Save Path : {:}'.format(args.save_path), log)
|
||||
state = {k: v for k, v in args._get_kwargs()}
|
||||
print_log(state, log)
|
||||
|
@ -59,8 +59,8 @@ def main_procedure(config, dataset, data_path, args, genotype, init_channels, la
|
||||
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
||||
|
||||
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-{:}-model.pth'.format(dataset))
|
||||
checkpoint_best = os.path.join(args.save_path, 'checkpoint-{:}-best.pth'.format(dataset))
|
||||
checkpoint_path = os.path.join(args.save_path, 'seed-{:}-checkpoint-{:}-model.pth'.format(args.manualSeed, dataset))
|
||||
checkpoint_best = os.path.join(args.save_path, 'seed-{:}-checkpoint-{:}-best.pth'.format(args.manualSeed, dataset))
|
||||
if pure_evaluate:
|
||||
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
|
||||
basemodel.load_state_dict( pure_evaluate )
|
||||
|
@ -81,8 +81,8 @@ def main_procedure_imagenet(config, data_path, args, genotype, init_channels, la
|
||||
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
||||
|
||||
|
||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-imagenet-model.pth')
|
||||
checkpoint_best = os.path.join(args.save_path, 'checkpoint-imagenet-best.pth')
|
||||
checkpoint_path = os.path.join(args.save_path, 'seed-{:}-checkpoint-imagenet-model.pth'.format(args.manualSeed))
|
||||
checkpoint_best = os.path.join(args.save_path, 'seed-{:}-checkpoint-imagenet-best.pth'.format(args.manualSeed))
|
||||
|
||||
if pure_evaluate:
|
||||
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
|
||||
|
@ -1,15 +1,16 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
echo "CHECK-DATA-DIR START"
|
||||
#sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
|
||||
# COMM_KM_Data COMM_km_2018 \
|
||||
# `pwd`/hadoop-data \
|
||||
# afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
|
||||
sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
|
||||
COMM_KM_Data COMM_km_2018 \
|
||||
`pwd`/hadoop-data \
|
||||
afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
|
||||
|
||||
export TORCH_HOME="./data/data/"
|
||||
wget -q http://10.127.2.44:8000/cifar.python.tar --directory-prefix=${TORCH_HOME}
|
||||
tar xvf ${TORCH_HOME}/cifar.python.tar -C ${TORCH_HOME}
|
||||
rm ${TORCH_HOME}/cifar.python.tar
|
||||
#wget -q http://10.127.2.44:8000/cifar.python.tar --directory-prefix=${TORCH_HOME}
|
||||
#tar -xvf ${TORCH_HOME}/cifar.python.tar -C ${TORCH_HOME}
|
||||
tar -xf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
|
||||
#rm ${TORCH_HOME}/cifar.python.tar
|
||||
#tar xvf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||
|
||||
cifar_dir="${TORCH_HOME}/cifar.python"
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env sh
|
||||
if [ "$#" -ne 3 ] ;then
|
||||
if [ "$#" -ne 5 ] ;then
|
||||
echo "Input illegal number of parameters " $#
|
||||
echo "Need 3 parameters for the architecture, and the channel and the layers"
|
||||
echo "Need 5 parameters for the architecture, and the channel, and the layers, and the batch-size, and the seed"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$TORCH_HOME" = "" ]; then
|
||||
@ -15,7 +15,9 @@ arch=$1
|
||||
dataset=imagenet
|
||||
channels=$2
|
||||
layers=$3
|
||||
SAVED=./output/NAS-CNN/${arch}-${dataset}-C${channels}-L${layers}-E250
|
||||
BATCH=$4
|
||||
seed=$5
|
||||
SAVED=./output/NAS-CNN/${arch}-${dataset}-C${channels}-L${layers}-${BATCH}-E250
|
||||
|
||||
PY_C="./env/bin/python"
|
||||
#PY_C="$CONDA_PYTHON_EXE"
|
||||
@ -27,8 +29,8 @@ else
|
||||
echo "Cluster Run with Python: "${PY_C}
|
||||
echo "Unzip ILSVRC2012"
|
||||
tar --version
|
||||
#tar xf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||
commands="./data/data/get_imagenet.sh"
|
||||
tar -xf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||
#commands="./data/data/get_imagenet.sh"
|
||||
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
|
||||
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
|
||||
#bash ./data/data/get_imagenet.sh
|
||||
@ -42,16 +44,16 @@ else
|
||||
# free -g
|
||||
#done < "${commands}"
|
||||
#wget http://10.127.2.44:8000/ILSVRC2012.tar --directory-prefix=${TORCH_HOME}
|
||||
${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
|
||||
count=0
|
||||
while read -r line; do
|
||||
temp_file="./data/data/TEMP-${count}.sh"
|
||||
echo "${line}" > ${temp_file}
|
||||
bash ${temp_file}
|
||||
count=$((count+1))
|
||||
#${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
|
||||
#count=0
|
||||
#while read -r line; do
|
||||
# temp_file="./data/data/TEMP-${count}.sh"
|
||||
# echo "${line}" > ${temp_file}
|
||||
# bash ${temp_file}
|
||||
# count=$((count+1))
|
||||
#${PY_C} ./data/ps_mem.py -p $$
|
||||
# free -g
|
||||
done < "${commands}"
|
||||
#done < "${commands}"
|
||||
#echo "Copy ILSVRC2012 done"
|
||||
#tar -xvf ${TORCH_HOME}/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||
#rm ${TORCH_HOME}/ILSVRC2012.tar
|
||||
@ -66,5 +68,6 @@ ${PY_C} ./exps-cnn/train_base.py \
|
||||
--save_path ${SAVED} \
|
||||
--grad_clip 5 \
|
||||
--init_channels ${channels} --layers ${layers} \
|
||||
--model_config ./configs/nas-imagenet.config \
|
||||
--model_config ./configs/nas-imagenet-${BATCH}.config \
|
||||
--manualSeed ${seed} \
|
||||
--print_freq 200 --workers 20
|
||||
|
Loading…
Reference in New Issue
Block a user