xautodl/scripts-cluster/submit.sh

53 lines
1.3 KiB
Bash
Raw Normal View History

2019-03-29 17:50:18 +01:00
#!/bin/bash
# bash ./scripts-cluster/submit.sh ${QUEUE} ${JOB-NAME} ${GPUs}
#find -name "._*" | xargs rm -rf
ODIR=$(pwd)
FDIR=$(cd $(dirname $0); pwd)
2019-03-30 06:38:15 +01:00
echo "Bash-Dir : "${ODIR}
echo "File-Dir : "${FDIR}
echo "File-Name : "${0}
2019-03-29 17:50:18 +01:00
if [ "$#" -ne 4 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 4 parameters for the queue-name, the job-name, and the number-of-GPUs"
exit 1
fi
find -name "__pycache__" | xargs rm -rf
QUEUE=$1
NAME=$2
GPUs=$3
CMD=$4
2019-03-29 19:10:20 +01:00
TIME=$(date +"%Y-%h-%d--%T")
TIME="${TIME//:/-}"
2019-03-29 17:50:18 +01:00
JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh"
2019-04-02 08:58:25 +02:00
HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${NAME}-${TIME}"
2019-03-30 06:38:15 +01:00
echo "JOB-SCRIPT: "${JOB_SCRIPT}
2019-03-29 17:50:18 +01:00
cat ${FDIR}/job-script.sh > ${JOB_SCRIPT}
echo ${CMD} >> ${JOB_SCRIPT}
2019-03-31 18:19:43 +02:00
${HDP} -mkdir ${HDFS_DIR}
echo "Create "${HDFS_DIR}" done!"
sleep 1s
2019-03-29 17:50:18 +01:00
2019-03-31 18:19:43 +02:00
HGCP_CLIENT_BIN="${HOME}/.hgcp/software-install/HGCP_client/bin"
2019-03-29 17:50:18 +01:00
${HGCP_CLIENT_BIN}/submit \
--hdfs afs://xingtian.afs.baidu.com:9902 \
--hdfs-user COMM_KM_Data \
--hdfs-passwd COMM_km_2018 \
2019-03-31 18:19:43 +02:00
--hdfs-path ${HDFS_DIR} \
2019-03-29 17:50:18 +01:00
--file-dir ./ \
--job-name ${NAME} \
--queue-name ${QUEUE} \
--num-nodes 1 \
--num-task-pernode 1 \
--gpu-pnode ${GPUs} \
--time-limit 0 \
--job-script ${JOB_SCRIPT}
2019-03-29 19:10:20 +01:00
#--job-script ${FDIR}/job-script.sh
#echo "JOB-SCRIPT: " ${JOB_SCRIPT}