diff --git a/README.md b/README.md index 4eea8c4..7c1984f 100644 --- a/README.md +++ b/README.md @@ -15,15 +15,15 @@ conda install pytorch torchvision cuda100 -c pytorch Train the searched CNN on CIFAR ``` -bash ./scripts-cnn/train-cifar.sh 0 GDAS_FG cifar10 cut -bash ./scripts-cnn/train-cifar.sh 0 GDAS_F1 cifar10 cut -bash ./scripts-cnn/train-cifar.sh 0 GDAS_V1 cifar100 cut +CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_FG cifar10 cut +CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10 cut +CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-cifar.sh GDAS_V1 cifar100 cut ``` Train the searched CNN on ImageNet ``` -bash ./scripts-cnn/train-imagenet.sh 0 GDAS_F1 52 14 -bash ./scripts-cnn/train-imagenet.sh 0 GDAS_V1 50 14 +CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14 +CUDA_VISIBLE_DEVICES=0 bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14 ``` diff --git a/scripts-cluster/README.md b/scripts-cluster/README.md index 9c9d714..4db4a7b 100644 --- a/scripts-cluster/README.md +++ b/scripts-cluster/README.md @@ -7,3 +7,6 @@ bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 PTB-GDAS 1 "bash ./scripts- ``` ## CNN +``` +bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 CIFAR10-CUT-GDAS-F1 1 "bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10 cut" +``` diff --git a/scripts-cluster/job-script.sh b/scripts-cluster/job-script.sh index 28b5b7b..d770ed1 100644 --- a/scripts-cluster/job-script.sh +++ b/scripts-cluster/job-script.sh @@ -16,6 +16,7 @@ else exit 1 fi echo "CHECK-DATA-DIR DONE" +export TORCH_HOME="./data/data/" # config python @@ -27,7 +28,7 @@ echo "JOB-PWD : " `pwd` echo "JOB-files : " `ls` echo "JOB-CUDA_VISIBLE_DEVICES: " ${CUDA_VISIBLE_DEVICES} -echo `./env/bin/python --version` +./env/bin/python --version +echo "JOB-TORCH_HOME: "${TORCH_HOME} # real commands -bash ./scripts-rnn/train-WT2.sh GDAS diff --git a/scripts-cluster/submit.sh b/scripts-cluster/submit.sh index 1298d16..531d078 100644 --- a/scripts-cluster/submit.sh +++ b/scripts-cluster/submit.sh @@ -3,9 +3,9 @@ #find -name "._*" | xargs rm -rf ODIR=$(pwd) FDIR=$(cd $(dirname $0); pwd) -echo "Bash-Dir : "${ODIR} -echo "File-Dir : "${FDIR} -echo "File-Name: "${0} +echo "Bash-Dir : "${ODIR} +echo "File-Dir : "${FDIR} +echo "File-Name : "${0} if [ "$#" -ne 4 ] ;then echo "Input illegal number of parameters " $# @@ -22,7 +22,7 @@ TIME=$(date +"%Y-%h-%d--%T") TIME="${TIME//:/-}" JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh" -echo "JOB-SCRIPT: " ${JOB_SCRIPT} +echo "JOB-SCRIPT: "${JOB_SCRIPT} cat ${FDIR}/job-script.sh > ${JOB_SCRIPT} echo ${CMD} >> ${JOB_SCRIPT} diff --git a/scripts-cnn/train-cifar.sh b/scripts-cnn/train-cifar.sh index ad2e769..255fe96 100644 --- a/scripts-cnn/train-cifar.sh +++ b/scripts-cnn/train-cifar.sh @@ -1,8 +1,8 @@ #!/usr/bin/env sh -# bash scripts-cnn/train-cifar.sh 0 GDAS cifar10 cut -if [ "$#" -ne 4 ] ;then +# bash scripts-cnn/train-cifar.sh GDAS cifar10 cut +if [ "$#" -ne 3 ] ;then echo "Input illegal number of parameters " $# - echo "Need 4 parameters for the GPUs, the architecture, and the dataset-name, and the cutout" + echo "Need 3 parameters for the architecture, and the dataset-name, and the cutout" exit 1 fi if [ "$TORCH_HOME" = "" ]; then @@ -12,18 +12,27 @@ else echo "TORCH_HOME : $TORCH_HOME" fi -gpus=$1 -arch=$2 -dataset=$3 -cutout=$4 -SAVED=./snapshots/NAS/${arch}-${dataset}-${cutout}-E600 -#--data_path $TORCH_HOME/cifar.python \ +arch=$1 +dataset=$2 +cutout=$3 +SAVED=./output/NAS-CNN/${arch}-${dataset}-${cutout}-E600 -CUDA_VISIBLE_DEVICES=${gpus} python ./exps-cnn/train_base.py \ - --data_path ./data/data/cifar.python \ +PY_C="./env/bin/python" + +if [ ! -f ${PY_C} ]; then + echo "Local Run with Python: "`which python` + PY_C="python" +else + echo "Cluster Run with Python: "${PY_C} +fi + +${PY_C} --version + +${PY_C} ./exps-cnn/train_base.py \ + --data_path $TORCH_HOME/cifar.python \ --dataset ${dataset} --arch ${arch} \ --save_path ${SAVED} \ --grad_clip 5 \ --init_channels 36 --layers 20 \ --model_config ./configs/nas-cifar-cos-${cutout}.config \ - --print_freq 100 --workers 8 + --print_freq 100 --workers 6 diff --git a/scripts-cnn/train-imagenet.sh b/scripts-cnn/train-imagenet.sh index c1061dc..db1042a 100644 --- a/scripts-cnn/train-imagenet.sh +++ b/scripts-cnn/train-imagenet.sh @@ -1,7 +1,7 @@ #!/usr/bin/env sh -if [ "$#" -ne 4 ] ;then +if [ "$#" -ne 3 ] ;then echo "Input illegal number of parameters " $# - echo "Need 4 parameters for the GPUs, the architecture, and the channel and the layers" + echo "Need 3 parameters for the architecture, and the channel and the layers" exit 1 fi if [ "$TORCH_HOME" = "" ]; then @@ -11,14 +11,24 @@ else echo "TORCH_HOME : $TORCH_HOME" fi -gpus=$1 -arch=$2 +arch=$1 dataset=imagenet -channels=$3 -layers=$4 -SAVED=./snapshots/NAS/${arch}-${dataset}-C${channels}-L${layers}-E250 +channels=$2 +layers=$3 +SAVED=./output/NAS-CNN/${arch}-${dataset}-C${channels}-L${layers}-E250 -CUDA_VISIBLE_DEVICES=${gpus} python ./exps-cnn/train_base.py \ +PY_C="./env/bin/python" + +if [ ! -f ${PY_C} ]; then + echo "Local Run with Python: "`which python` + PY_C="python" +else + echo "Cluster Run with Python: "${PY_C} +fi + +${PY_C} --version + +${PY_C} ./exps-cnn/train_base.py \ --data_path $TORCH_HOME/ILSVRC2012 \ --dataset ${dataset} --arch ${arch} \ --save_path ${SAVED} \