This is an old revision of the document!


Kind of a simulation of OAR setup in Grid'5000

This setup fakes the boot stages (post-deploy or standby) with at commands.

On the host
  • cd …/oar
  • oardocker -v$PWD:/mnt -n 50
On the frontend VM
  • cd /mnt/ ; make node-build node-install node-setup
  • apt-get install -y at
  • /etc/init.d/atd start
  • cp /usr/local/share/oar/oar-node/init.d/oar-node /etc/init.d
  • sed -i -e 's/-n -20' /etc/init.d/oar-node # remove the nice -20 which causes an error * systemctl daemon-reload * cp /usr/local/share/oar/oar-node/default/oar-node /etc/default/ * /etc/init.d/oar-node start * setup epilogue /etc/oar/epilogue: <code bash> #!/bin/bash # Usage: # Script is run under uid of oar who is sudo # argv[1] is the jobid # argv[2] is the user's name # argv[3] is the file which contains the list of nodes used # argv[4] is the job walltime in seconds exec 1> /tmp/epilogue.$1.log exec 2>&1 set -x OARNODESETTINGCMD=/usr/local/sbin/oarnodesetting # simulate the boot of the nodes with an at command → put the node Alive asynchronously. for n in $(uniq $3); do nodes=“$nodes -h $n” echo “bash -xc '$OARNODESETTINGCMD -h $n -s Alive -p available_upto=2147483646' > /tmp/oarnodesetting.$1.$n 2>&1” | at now +1 minute; done #$OARNODESETTINGCMD $nodes -s Absent -p available_upto=0 # set the nodes down, using the command used on g5k: $OARNODESETTINGCMD -n -s Absent -p available_upto=0 –sql “resource_id IN (select assigned_resources.resource_id from jobs,assigned_resources,resources where assigned_resource_index = 'CURRENT' AND jobs.state = 'Running' AND jobs.job_id = $1 and moldable_job_id = jobs.assigned_moldable_job AND (resources.resource_id = assigned_resources.resource_id AND resources.type='default'))” exit 0 </code> * Changes to drawgantt-svg <code php> — /mnt/sources/visualization_interfaces/DrawGantt-SVG/drawgantt-config.inc.php2015-10-22 00:26:56.132034018 +0200 +++ /etc/oar/drawgantt-config.inc.php 2015-10-21 23:43:25.228911673 +0200 @@ -71,10 +71,10 @@ 'network_address' ⇒ '/^([^.]+)\..*$/', ); $CONF['label_cmp_regex'] = array( substring selection regex for comparing and sorting labels (resources)

- 'network_address' ⇒ '/^([^-]+)-(\d+)\..*$/', + 'network_address' ⇒ '/^([^-]+)(\d+)$/',

 );

$CONF['resource_properties'] = array( properties to display in the pop-up on top of the resources labels (on the left) - 'deploy', 'cpuset', 'besteffort', 'network_address', 'type', 'drain'); + 'deploy', 'cpuset', 'besteffort', 'network_address', 'type', 'drain', 'state', 'available_upto'); $CONF['resource_hierarchy'] = array( properties to use to build the resource hierarchy drawing

 'network_address','cpuset',
 ); 

@@ -156,7 +156,7 @@ EOT;

Standby job display options for the part shown in the future -$CONF['standby_truncate_state_to_now'] = 1; default: 1 +$CONF['standby_truncate_state_to_now'] = 0; default: 1 Besteffort job display options for the part shown in the future $CONF['besteffort_truncate_job_to_now'] = 1; default: 1 $CONF['besteffort_pattern'] = «<EOT </code> == On the server VM== * apply changes to oar.conf: set cosystem frontend, epilogue, energy saving * we use the cosystem type instead of deploy (or we need to add a deploy preoperty) <code bash> — /root/oar.conf 2015-10-21 22:45:52.973104493 +0200 +++ /etc/oar/oar.conf 2015-10-21 22:44:05.752902069 +0200 @@ -63,10 +63,10 @@ LOG_FILE=“/var/log/oar.log” # Specify where we are connected with a job of the deploy type -DEPLOY_HOSTNAME=“127.0.0.1” +DEPLOY_HOSTNAME=“frontend” # Specify where we are connected with a job of the cosystem type -COSYSTEM_HOSTNAME=“127.0.0.1” +COSYSTEM_HOSTNAME=“frontend” # Specify the database field to use to fill the file on the first node of the # job in $OAR_NODE_FILE (default is 'network_address'). Only resources with @@ -197,8 +197,8 @@ # Files to execute before and after each job on the first computing node # (by default nothing is executed) -#PROLOGUE_EXEC_FILE=“/path/to/prog” -#EPILOGUE_EXEC_FILE=“/path/to/prog” +PROLOGUE_EXEC_FILE=“/etc/oar/prologue” +EPILOGUE_EXEC_FILE=“/etc/oar/epilogue” # Set the timeout for the prologue and epilogue execution on the OAR server #SERVER_PROLOGUE_EPILOGUE_TIMEOUT=“60” @@ -374,16 +374,16 @@ # # Parameter for the scheduler to decide when a node is idle. # Number of seconds since the last job was terminated on nodes -#SCHEDULER_NODE_MANAGER_IDLE_TIME=“600” +SCHEDULER_NODE_MANAGER_IDLE_TIME=“300” # Parameter for the scheduler to decide if a node will have enough time to sleep. # Number of seconds before the next job -#SCHEDULER_NODE_MANAGER_SLEEP_TIME=“600” +SCHEDULER_NODE_MANAGER_SLEEP_TIME=“600” # Parameter for the scheduler to know when a node has to be woken up before the # beginning of the job when a reservation is scheduled on a resource on this node # Number of seconds for a node to wake up -#SCHEDULER_NODE_MANAGER_WAKEUP_TIME=“600” +SCHEDULER_NODE_MANAGER_WAKEUP_TIME=“600” # When OAR scheduler wants some nodes to wake up then it launches this command # and puts on its STDIN the list of nodes to wake up (one hostname by line). @@ -415,26 +415,26 @@ # - the launching of wakeup/shutdown commands can be windowized to prevent # from electric peeks # Possible values are “yes” and “no” -ENERGY_SAVING_INTERNAL=“no” +ENERGY_SAVING_INTERNAL=“yes” # Path to the script used by the energy saving module to wake up nodes. # This command is executed from the oar server host. # OAR puts the node list on its STDIN (one hostname by line). # The scheduler looks at the available_upto field in the resources table to know # if the node will be started for enough time. -#ENERGY_SAVING_NODE_MANAGER_WAKE_UP_CMD=“/etc/oar/wake_up_nodes.sh” +ENERGY_SAVING_NODE_MANAGER_WAKE_UP_CMD=“/etc/oar/wake_up_nodes.sh” # Path to the script used by the energy saving module to shut down nodes. # This command is executed from the oar server host. # OAR puts the node list on its STDIN (one hostname by line). -#ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD=“/etc/oar/shut_down_nodes.sh” # Timeout to consider a node broken (suspected) if it has not woken up # The value can be an integer of seconds or a set of pairs. # For example, “1:500 11:1000 21:2000” will produce a timeout of 500 # seconds if 1 to 10 nodes have to wakeup, 1000 seconds if 11 t 20 nodes # have to wake up and 2000 seconds otherwise. -#ENERGY_SAVING_NODE_MANAGER_WAKEUP_TIMEOUT=“900” +ENERGY_SAVING_NODE_MANAGER_WAKEUP_TIMEOUT=“1:200 81:400 161:600 241:800” # You can set up a number of nodes that must always be on. You can use the # syntax in the examples if you want a number of alive nodes of different types @@ -457,7 +457,7 @@ # Possible values are “yes” and “no” # When set to “yes”, the list of nodes to wake up or shut down is passed to # ENERGY_SAVING_NODE_MANAGER_*_CMD through stdin. -#ENERGY_SAVING_WINDOW_FORKER_BYPASS=“no” +ENERGY_SAVING_WINDOW_FORKER_BYPASS=“yes” # Time in second between execution of each window. # Minimum is 0 to set no delay between each window. @@ -471,7 +471,7 @@ # The energy saving module can be automatically restarted after reaching # this number of cycles. This is a workaround for some DBD modules that do # not always free memory correctly. -#ENERGY_MAX_CYCLES_UNTIL_REFRESH=5000 +ENERGY_MAX_CYCLES_UNTIL_REFRESH=500 ################################################################################ </code> * apt-get install -y at * /etc/init.d/atd start * add energy saving script /etc/oar/wake_up_nodes.sh <code bash> #!/bin/bash # Sample script for energy saving (wake-up) DATE=$(date +%F_%T) OARNODESETTINGCMD=/usr/local/sbin/oarnodesetting # stdout/err goes to oar.log set -x # simulate the boot time with an at command: will set the node Alive asynchronously while read n; do for nn in $n; do echo “bash -xc 'echo $DATE; $OARNODESETTINGCMD -h $nn -s Alive -p available_upto=2147483646' > /tmp/wake-up.$DATE.$n 2>&1” | at now +1 minute; done done </code> /etc/oar/shut_down_nodes.sh <code bash> #!/bin/bash # Sample script for energy saving (shut-down) # stdout/err goes to oar.log set -x # Nothing really needed here while read n; do for nn in $n; do echo shutting down $n done done </code>

wiki/oardocker_setup_for_grid_5000.1537484858.txt.gz · Last modified: 2018/09/21 01:07 by neyron
Recent changes RSS feed GNU Free Documentation License 1.3 Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki