update ImageNet training
This commit is contained in:
parent
666c105f51
commit
4121d1719f
@ -24,8 +24,8 @@ CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_V1 cifar100 cut
|
|||||||
|
|
||||||
Train the searched CNN on ImageNet
|
Train the searched CNN on ImageNet
|
||||||
```
|
```
|
||||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14
|
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14 B128 -1
|
||||||
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14
|
CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14 B128 -1
|
||||||
```
|
```
|
||||||
|
|
||||||
Evaluate a trained CNN model
|
Evaluate a trained CNN model
|
||||||
|
15
configs/nas-imagenet-B128.config
Normal file
15
configs/nas-imagenet-B128.config
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"type" : ["str", "steplr"],
|
||||||
|
"batch_size": ["int", 128],
|
||||||
|
"epochs" : ["int", 250],
|
||||||
|
"decay_period": ["int", 1],
|
||||||
|
"gamma" : ["float", 0.97],
|
||||||
|
"momentum" : ["float", 0.9],
|
||||||
|
"decay" : ["float", 0.00003],
|
||||||
|
"LR" : ["float", 0.1],
|
||||||
|
"label_smooth": ["float", 0.1],
|
||||||
|
"auxiliary" : ["bool", 1],
|
||||||
|
"auxiliary_weight" : ["float", 0.4],
|
||||||
|
"grad_clip" : ["float", 5],
|
||||||
|
"drop_path_prob" : ["float", 0]
|
||||||
|
}
|
15
configs/nas-imagenet-B256.config
Normal file
15
configs/nas-imagenet-B256.config
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"type" : ["str", "steplr"],
|
||||||
|
"batch_size": ["int", 256],
|
||||||
|
"epochs" : ["int", 250],
|
||||||
|
"decay_period": ["int", 1],
|
||||||
|
"gamma" : ["float", 0.97],
|
||||||
|
"momentum" : ["float", 0.9],
|
||||||
|
"decay" : ["float", 0.00003],
|
||||||
|
"LR" : ["float", 0.1],
|
||||||
|
"label_smooth": ["float", 0.1],
|
||||||
|
"auxiliary" : ["bool", 1],
|
||||||
|
"auxiliary_weight" : ["float", 0.4],
|
||||||
|
"grad_clip" : ["float", 5],
|
||||||
|
"drop_path_prob" : ["float", 0]
|
||||||
|
}
|
@ -42,7 +42,7 @@ else : print('Find CUDA_VISIBLE_DEVICES={:
|
|||||||
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
assert torch.cuda.is_available(), 'torch.cuda is not available'
|
||||||
|
|
||||||
|
|
||||||
if args.manualSeed is None:
|
if args.manualSeed is None or args.manualSeed < 0:
|
||||||
args.manualSeed = random.randint(1, 10000)
|
args.manualSeed = random.randint(1, 10000)
|
||||||
random.seed(args.manualSeed)
|
random.seed(args.manualSeed)
|
||||||
cudnn.benchmark = True
|
cudnn.benchmark = True
|
||||||
@ -54,10 +54,10 @@ torch.cuda.manual_seed_all(args.manualSeed)
|
|||||||
def main():
|
def main():
|
||||||
|
|
||||||
# Init logger
|
# Init logger
|
||||||
args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
#args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
|
||||||
if not os.path.isdir(args.save_path):
|
if not os.path.isdir(args.save_path):
|
||||||
os.makedirs(args.save_path)
|
os.makedirs(args.save_path)
|
||||||
log = open(os.path.join(args.save_path, 'log-seed-{:}.txt'.format(args.manualSeed)), 'w')
|
log = open(os.path.join(args.save_path, 'seed-{:}-log.txt'.format(args.manualSeed)), 'w')
|
||||||
print_log('Save Path : {:}'.format(args.save_path), log)
|
print_log('Save Path : {:}'.format(args.save_path), log)
|
||||||
state = {k: v for k, v in args._get_kwargs()}
|
state = {k: v for k, v in args._get_kwargs()}
|
||||||
print_log(state, log)
|
print_log(state, log)
|
||||||
|
@ -59,8 +59,8 @@ def main_procedure(config, dataset, data_path, args, genotype, init_channels, la
|
|||||||
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
||||||
|
|
||||||
|
|
||||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-{:}-model.pth'.format(dataset))
|
checkpoint_path = os.path.join(args.save_path, 'seed-{:}-checkpoint-{:}-model.pth'.format(args.manualSeed, dataset))
|
||||||
checkpoint_best = os.path.join(args.save_path, 'checkpoint-{:}-best.pth'.format(dataset))
|
checkpoint_best = os.path.join(args.save_path, 'seed-{:}-checkpoint-{:}-best.pth'.format(args.manualSeed, dataset))
|
||||||
if pure_evaluate:
|
if pure_evaluate:
|
||||||
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
|
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
|
||||||
basemodel.load_state_dict( pure_evaluate )
|
basemodel.load_state_dict( pure_evaluate )
|
||||||
|
@ -81,8 +81,8 @@ def main_procedure_imagenet(config, data_path, args, genotype, init_channels, la
|
|||||||
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
raise ValueError('Can not find the schedular type : {:}'.format(config.type))
|
||||||
|
|
||||||
|
|
||||||
checkpoint_path = os.path.join(args.save_path, 'checkpoint-imagenet-model.pth')
|
checkpoint_path = os.path.join(args.save_path, 'seed-{:}-checkpoint-imagenet-model.pth'.format(args.manualSeed))
|
||||||
checkpoint_best = os.path.join(args.save_path, 'checkpoint-imagenet-best.pth')
|
checkpoint_best = os.path.join(args.save_path, 'seed-{:}-checkpoint-imagenet-best.pth'.format(args.manualSeed))
|
||||||
|
|
||||||
if pure_evaluate:
|
if pure_evaluate:
|
||||||
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
|
print_log('-'*20 + 'Pure Evaluation' + '-'*20, log)
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
echo "CHECK-DATA-DIR START"
|
echo "CHECK-DATA-DIR START"
|
||||||
#sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
|
sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
|
||||||
# COMM_KM_Data COMM_km_2018 \
|
COMM_KM_Data COMM_km_2018 \
|
||||||
# `pwd`/hadoop-data \
|
`pwd`/hadoop-data \
|
||||||
# afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
|
afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
|
||||||
|
|
||||||
export TORCH_HOME="./data/data/"
|
export TORCH_HOME="./data/data/"
|
||||||
wget -q http://10.127.2.44:8000/cifar.python.tar --directory-prefix=${TORCH_HOME}
|
#wget -q http://10.127.2.44:8000/cifar.python.tar --directory-prefix=${TORCH_HOME}
|
||||||
tar xvf ${TORCH_HOME}/cifar.python.tar -C ${TORCH_HOME}
|
#tar -xvf ${TORCH_HOME}/cifar.python.tar -C ${TORCH_HOME}
|
||||||
rm ${TORCH_HOME}/cifar.python.tar
|
tar -xf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
|
||||||
|
#rm ${TORCH_HOME}/cifar.python.tar
|
||||||
#tar xvf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
#tar xvf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||||
|
|
||||||
cifar_dir="${TORCH_HOME}/cifar.python"
|
cifar_dir="${TORCH_HOME}/cifar.python"
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env sh
|
#!/usr/bin/env sh
|
||||||
if [ "$#" -ne 3 ] ;then
|
if [ "$#" -ne 5 ] ;then
|
||||||
echo "Input illegal number of parameters " $#
|
echo "Input illegal number of parameters " $#
|
||||||
echo "Need 3 parameters for the architecture, and the channel and the layers"
|
echo "Need 5 parameters for the architecture, and the channel, and the layers, and the batch-size, and the seed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ "$TORCH_HOME" = "" ]; then
|
if [ "$TORCH_HOME" = "" ]; then
|
||||||
@ -15,7 +15,9 @@ arch=$1
|
|||||||
dataset=imagenet
|
dataset=imagenet
|
||||||
channels=$2
|
channels=$2
|
||||||
layers=$3
|
layers=$3
|
||||||
SAVED=./output/NAS-CNN/${arch}-${dataset}-C${channels}-L${layers}-E250
|
BATCH=$4
|
||||||
|
seed=$5
|
||||||
|
SAVED=./output/NAS-CNN/${arch}-${dataset}-C${channels}-L${layers}-${BATCH}-E250
|
||||||
|
|
||||||
PY_C="./env/bin/python"
|
PY_C="./env/bin/python"
|
||||||
#PY_C="$CONDA_PYTHON_EXE"
|
#PY_C="$CONDA_PYTHON_EXE"
|
||||||
@ -27,8 +29,8 @@ else
|
|||||||
echo "Cluster Run with Python: "${PY_C}
|
echo "Cluster Run with Python: "${PY_C}
|
||||||
echo "Unzip ILSVRC2012"
|
echo "Unzip ILSVRC2012"
|
||||||
tar --version
|
tar --version
|
||||||
#tar xf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
tar -xf ./hadoop-data/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||||
commands="./data/data/get_imagenet.sh"
|
#commands="./data/data/get_imagenet.sh"
|
||||||
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
|
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-TAR ./data/data/ILSVRC2012 tar > ${commands}
|
||||||
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
|
#${PY_C} ./data/decompress.py ./hadoop-data/ILSVRC2012-ZIP ./data/data/ILSVRC2012 zip > ./data/data/get_imagenet.sh
|
||||||
#bash ./data/data/get_imagenet.sh
|
#bash ./data/data/get_imagenet.sh
|
||||||
@ -42,16 +44,16 @@ else
|
|||||||
# free -g
|
# free -g
|
||||||
#done < "${commands}"
|
#done < "${commands}"
|
||||||
#wget http://10.127.2.44:8000/ILSVRC2012.tar --directory-prefix=${TORCH_HOME}
|
#wget http://10.127.2.44:8000/ILSVRC2012.tar --directory-prefix=${TORCH_HOME}
|
||||||
${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
|
#${PY_C} ./data/decompress.py ./data/classes.txt ${TORCH_HOME}/ILSVRC2012 wget > ${commands}
|
||||||
count=0
|
#count=0
|
||||||
while read -r line; do
|
#while read -r line; do
|
||||||
temp_file="./data/data/TEMP-${count}.sh"
|
# temp_file="./data/data/TEMP-${count}.sh"
|
||||||
echo "${line}" > ${temp_file}
|
# echo "${line}" > ${temp_file}
|
||||||
bash ${temp_file}
|
# bash ${temp_file}
|
||||||
count=$((count+1))
|
# count=$((count+1))
|
||||||
#${PY_C} ./data/ps_mem.py -p $$
|
#${PY_C} ./data/ps_mem.py -p $$
|
||||||
# free -g
|
# free -g
|
||||||
done < "${commands}"
|
#done < "${commands}"
|
||||||
#echo "Copy ILSVRC2012 done"
|
#echo "Copy ILSVRC2012 done"
|
||||||
#tar -xvf ${TORCH_HOME}/ILSVRC2012.tar -C ${TORCH_HOME}
|
#tar -xvf ${TORCH_HOME}/ILSVRC2012.tar -C ${TORCH_HOME}
|
||||||
#rm ${TORCH_HOME}/ILSVRC2012.tar
|
#rm ${TORCH_HOME}/ILSVRC2012.tar
|
||||||
@ -66,5 +68,6 @@ ${PY_C} ./exps-cnn/train_base.py \
|
|||||||
--save_path ${SAVED} \
|
--save_path ${SAVED} \
|
||||||
--grad_clip 5 \
|
--grad_clip 5 \
|
||||||
--init_channels ${channels} --layers ${layers} \
|
--init_channels ${channels} --layers ${layers} \
|
||||||
--model_config ./configs/nas-imagenet.config \
|
--model_config ./configs/nas-imagenet-${BATCH}.config \
|
||||||
|
--manualSeed ${seed} \
|
||||||
--print_freq 200 --workers 20
|
--print_freq 200 --workers 20
|
||||||
|
Loading…
Reference in New Issue
Block a user