update
This commit is contained in:
		
							
								
								
									
										29
									
								
								scripts-cluster/job-script.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								scripts-cluster/job-script.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| #!/bin/bash | ||||
| # | ||||
| echo "CHECK-DATA-DIR START" | ||||
| cifar_dir="./data/data/cifar.python" | ||||
| if [ -d ${cifar_dir} ]; then | ||||
|   echo "Find cifar-dir: "${cifar_dir} | ||||
| else | ||||
|   echo "Can not find cifar-dir: "${cifar_dir} | ||||
|   exit 1 | ||||
| fi | ||||
| echo "CHECK-DATA-DIR DONE" | ||||
|  | ||||
| sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \ | ||||
|     COMM_KM_Data COMM_km_2018 \ | ||||
|     `pwd`/hadoop-data \ | ||||
|     afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets | ||||
|  | ||||
| echo "PWD: " `pwd` | ||||
| echo "files::  " `ls` | ||||
| echo "CUDA_VISIBLE_DEVICES: " ${CUDA_VISIBLE_DEVICES} | ||||
|  | ||||
| # config python | ||||
| PYTHON_ENV=py36_pytorch1.0_env0.1.3.tar.gz | ||||
| wget -e "http_proxy=cp01-sys-hic-gpu-02.cp01:8888" http://cp01-sys-hic-gpu-02.cp01/HGCP_DEMO/$PYTHON_ENV > screen.log 2>&1 | ||||
| tar xzf $PYTHON_ENV | ||||
|  | ||||
| alias python="./env/bin/python" | ||||
|  | ||||
| echo "Python:  " `which python` | ||||
							
								
								
									
										44
									
								
								scripts-cluster/submit.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								scripts-cluster/submit.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| #!/bin/bash | ||||
| # bash ./scripts-cluster/submit.sh ${QUEUE} ${JOB-NAME} ${GPUs} | ||||
| #find -name "._*" | xargs rm -rf | ||||
| ODIR=$(pwd) | ||||
| FDIR=$(cd $(dirname $0); pwd) | ||||
| echo "Bash-Dir : "${ODIR} | ||||
| echo "File-Dir : "${FDIR} | ||||
| echo "File-Name: "${0} | ||||
|  | ||||
| if [ "$#" -ne 4 ] ;then | ||||
|   echo "Input illegal number of parameters " $# | ||||
|   echo "Need 4 parameters for the queue-name, the job-name, and the number-of-GPUs" | ||||
|   exit 1                | ||||
| fi | ||||
| find -name "__pycache__" | xargs rm -rf | ||||
|  | ||||
| QUEUE=$1 | ||||
| NAME=$2 | ||||
| GPUs=$3 | ||||
| CMD=$4 | ||||
| TIME=$(date +"%Y-%h-%d-%T") | ||||
|  | ||||
| JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh" | ||||
|  | ||||
| cat ${FDIR}/job-script.sh > ${JOB_SCRIPT} | ||||
| echo ${CMD}              >> ${JOB_SCRIPT} | ||||
|  | ||||
| exit 1 | ||||
| HGCP_CLIENT_BIN="${HOME}/.hgcp/software-install/HGCP_client/bin" | ||||
|  | ||||
|  | ||||
| ${HGCP_CLIENT_BIN}/submit \ | ||||
|     --hdfs afs://xingtian.afs.baidu.com:9902 \ | ||||
|     --hdfs-user COMM_KM_Data \ | ||||
|     --hdfs-passwd COMM_km_2018 \ | ||||
|     --hdfs-path /user/COMM_KM_Data/dongxuanyi/logs \ | ||||
|     --file-dir ./ \ | ||||
|     --job-name ${NAME} \ | ||||
|     --queue-name ${QUEUE} \ | ||||
|     --num-nodes 1 \ | ||||
|     --num-task-pernode 1 \ | ||||
|     --gpu-pnode ${GPUs} \ | ||||
|     --time-limit 0 \ | ||||
|     --job-script ${JOB_SCRIPT} | ||||
							
								
								
									
										1
									
								
								scripts-cluster/tmps/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								scripts-cluster/tmps/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| * | ||||
		Reference in New Issue
	
	Block a user