#!/bin/ksh #------------------------------------------------------------------# # This is a script for running MPL jobs under the "LoadLeveler" # # batch queuing system at NIST. # #------------------------------------------------------------------# # Please report any difficulties you have in running this script # # to karin@cam.nist.gov # #------------------------------------------------------------------# #------------------------------------------------------------------# # First, set some variables that will be used to manage jobs/files: #------------------------------------------------------------------# export USER=`whoami` export TMPDIR="/tmp/$USER" export MYUID=`grep karin /etc/passwd | awk -F: '{print $3}' ` export MYSHELL=`grep karin /etc/passwd | awk -F: '{print $7}' ` HOST=`hostname` export HOST=${HOST%%.*} export CMD="${0##*/}" export INITIALDIR=`pwd` # This umask needed to give access to MP_HOSTLIST - if user's umask is set # to 077, this file can't be read, and the script fails. umask 022 #Create the temporary directory on the current host: if [ ! -d "/tmp/${USER}" ] then mkdir /tmp/${USER} fi #------------------------------------------------------------------# # Report to standard out and err which host is running the job: #------------------------------------------------------------------# echo `date` echo `date` >& 2 echo "$CMD: Running $CMD on node $HOST (Job ID: $CLUSTER)" echo "$CMD: Running $CMD on node $HOST (Job ID: $CLUSTER)" >& 2 #------------------------------------# # MPL Program Environment set-up: #------------------------------------# export MP_PROCS=$NPROCS export MP_RETRY=10 export MP_RETRYCOUNT=3 export MP_INFOLEVEL=1 unset MP_CMDFILE poe if [ ! -r $MP_SAVEHOSTFILE ] then echo "No hostfile generated... Exiting." >& 2 exit 1 else currentdir=`pwd` fi `grep "nist.gov" $MP_SAVEHOSTFILE | cut -f1 -d. > ${TMPDIR}/nodelist` NODELIST="${TMPDIR}/nodelist" hostlist=`grep "nist.gov" $MP_SAVEHOSTFILE | cut -f1 -d.` echo "Node pool: \n$hostlist" #---------------------------------------------# # Invoke a user setup script if available: #---------------------------------------------# if [ -n "${USERSETUP}" ] && [ -x "${USERSETUP}" ] then echo "$CMD Invoking user set-up procedure ${USERSETUP}" ${USERSETUP} fi #--------------------------------------------------# # Execute the MPL program through a command file: #--------------------------------------------------# if [ -x ${PROGRAM} ] then echo "$CMD: Starting $PROGRAM $ARGUMENTS < ${INPUT} on $HOST" >& 2 if [ "$MP_PGMMODEL" = "mpmd" ] then echo "$CMD: MPL program running in MPMD mode... \n" >& 2 export SLAVE_PROCS=$NPROCS-1 if [ -r ${PROGRAM}.cmdfile ] then rm -f ${PROGRAM}.cmdfile} fi exec 3>"${PROGRAM}.cmdfile" # Open unit 3 print -u3 "${DEBUG} ${PROGRAM} ${ARGUMENTS} < ${INPUT}" count=1 while [ $count -le $SLAVE_PROCS ] do print -u3 "${DEBUG} ${NODEPROGRAM}" count=`expr $count + 1` done exec 3>&- # Close unit 3 echo "$CMD: poe -cmdfile ${PROGRAM}.cmdfile" >& 2 echo "$CMD: Contents of cmdfile:" >& 2 cat ${PROGRAM}.cmdfile >& 2 echo "\n\n<<<<<<<<<<<<<< Start of program output >>>>>>>>>>>>>>\n\n" poe -cmdfile "${PROGRAM}.cmdfile" else echo "\n\n<<<<<<<<<<<<<< Start of program output >>>>>>>>>>>>>>\n\n" (${DEBUG} ${PROGRAM} $ARGUMENTS < ${INPUT}) echo "$CMD: (${DEBUG} ${PROGRAM} $ARGUMENTS < ${INPUT})\n" >& 2 fi echo "\n\n<<<<<<<<<<<<<< End of program output >>>>>>>>>>>>>>\n\n" else print -u2 "$CMD: error $PROGRAM not found" fi #-------------------------------# # Clean up node directories: #-------------------------------# if [ -x "${USERCLEAN}" ] then echo "$CMD Invoking user clean-up procedure ${USERCLEAN}" ${USERCLEAN} fi #----------------------------------------------------------# # CD out of the directory where files will be removed... #----------------------------------------------------------# cd $HOME echo "$CMD: Cleaning tmp directories on the nodes..." >& 2 for host in $hostlist do rsh ${host} rm -fr ${TMPDIR} done rm -rf $TMPDIR rm -f $currentdir/$MP_SAVEHOSTFILE rm -f $currentdir/${PROGRAM}.cmdfile echo "$CMD: Done." >& 2 #------------------------------------# # End clean-up; Exit shell program; #------------------------------------# exit 0