This shows you the differences between two versions of the page.
| — | wiki:an_equivalent_to_slurm_s_srun_for_oar [2017/09/14 12:55] (current) – created neyron | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| + | Slurm' | ||
| + | resources, ties stdout and stderr to those of the parent process, and | ||
| + | exits with the exit code of the called script. | ||
| + | e.g. you can do | ||
| + | |||
| + | srun make | ||
| + | |||
| + | It's great for batch scripts that run on the frontend. | ||
| + | |||
| + | The following script mimics that behavior with OAR: | ||
| + | |||
| + | <code bash> | ||
| + | # | ||
| + | # | ||
| + | # Credits to Emmanuel Thomé and Pierre Neyron | ||
| + | # | ||
| + | # Usage: oarrun.sh [[oar args]] -- [[child command]] | ||
| + | # | ||
| + | # If no oar args are needed, oarrun.sh [[child command]] works. | ||
| + | # | ||
| + | # By default stdout and stderr are not saved (just printed), but that can | ||
| + | # be adjusted with -O and -E. | ||
| + | # | ||
| + | # The command is run on $PWD remotely, at least if that directory exist. | ||
| + | # | ||
| + | # BUG: if $PWD does not exist remotely, the error message is totally | ||
| + | # unhelpful, as the command complains about some /dev/cpuset being | ||
| + | # absent. This is explained by the fact that the job is within its exit | ||
| + | # sequence by the time we do oarsh. | ||
| + | |||
| + | set -e | ||
| + | |||
| + | oar_args=() | ||
| + | child=() | ||
| + | |||
| + | while [ $# -gt 0 ] ; do | ||
| + | if [ " | ||
| + | oar_args=(" | ||
| + | shift | ||
| + | done | ||
| + | if [ $# -eq 0 ] ; then | ||
| + | child=(" | ||
| + | oar_args=() | ||
| + | else | ||
| + | while [ $# -gt 0 ] ; do | ||
| + | if [ " | ||
| + | child=(" | ||
| + | shift | ||
| + | done | ||
| + | fi | ||
| + | |||
| + | control=`mktemp -d / | ||
| + | |||
| + | on_exit() { | ||
| + | [ -n " | ||
| + | rm -rf $control | ||
| + | } | ||
| + | trap on_exit EXIT | ||
| + | |||
| + | FIFO=$control/ | ||
| + | NOTIFY_SCRIPT=$control/ | ||
| + | cat <<EOF > $NOTIFY_SCRIPT | ||
| + | #!/bin/bash | ||
| + | echo " | ||
| + | echo " | ||
| + | echo \$1 > $FIFO | ||
| + | EOF | ||
| + | chmod 755 $NOTIFY_SCRIPT | ||
| + | mkfifo $FIFO | ||
| + | exec 3<> | ||
| + | |||
| + | # -O and -E can be overridden by the user in the oar_args | ||
| + | oarsub -O /dev/null -E /dev/null " | ||
| + | |||
| + | read -u 3 OAR_JOB_ID | ||
| + | export OAR_JOB_ID | ||
| + | |||
| + | echo "Job $OAR_JOB_ID is running." | ||
| + | LEADER=$(oarstat -fj $OAR_JOB_ID | grep assigned_hostnames | cut -d\= -f2 | cut -d\+ -f1) | ||
| + | |||
| + | # In the same way oarsub does, we expect oarrun to preserve the current | ||
| + | # directory. Note that if $PWD does not exist remotely, oarsub will fail | ||
| + | # miserably, and we have practically zero chance to survive, so trapping | ||
| + | # errors is useless. | ||
| + | # | ||
| + | # The full oarsh command line is run as a bash command, so we're allowed | ||
| + | # to put arbitrary bash code in there. And this is what we need to do if | ||
| + | # we wish to avoid an extraneous level of quoting for command with | ||
| + | # special characters (i.e. with this, oarrun.sh ls 'a\ b' works as | ||
| + | # intended). | ||
| + | oarsh -t $LEADER cd " | ||
| + | </ | ||