diff --git a/roles/slurm-from-source/tasks/installMungeFromSource.yml b/roles/slurm-from-source/tasks/installMungeFromSource.yml index 11d376a5e6c11db63673c9b52b37959292b580b7..e7a594498b7f49d15c5642fcab1a2898755d28a9 100644 --- a/roles/slurm-from-source/tasks/installMungeFromSource.yml +++ b/roles/slurm-from-source/tasks/installMungeFromSource.yml @@ -1,3 +1,9 @@ +- name: clean up system installation + shell: rpm -e munge-libs slurm-munge munge slurm-plugins munge-devel slurm slurm-slurmdbd slurm-perlapi slurm-devel slurm-pam_slurm slurm-sjstat slurm-perlapi slurm-pam_slurm slurm-slurmdb-direct slurm-torque slurm-sjobexit + ignore_errors: true + sudo: true + when: ansible_os_family == 'RedHat' + - name: get munge shell: wget https://munge.googlecode.com/files/munge-{{ munge_version }}.tar.bz2 args: diff --git a/roles/slurm-from-source/tasks/main.yml b/roles/slurm-from-source/tasks/main.yml index b9c468885d8c0ac14c9d1d10583770cb5a5d8f57..7549c00e2c6ed81c9ccfab4260062329f09d720a 100644 --- a/roles/slurm-from-source/tasks/main.yml +++ b/roles/slurm-from-source/tasks/main.yml @@ -1,4 +1,10 @@ --- +- name: clean up system installation + shell: rpm -e munge munge-libs munge-devel slurm slurm-sql + ignore_errors: true + sudo: true + when: ansible_os_family == 'RedHat' + - name: create munge group group: name=munge system=yes gid=498 sudo: true @@ -97,3 +103,9 @@ template: src=slurm-vpn.conf.j2 dest={{ slurm_dir }}/etc/slurm.conf sudo: true when: slurm_use_vpn==True + +- name: setup envirnment variables + template: src=slurm_setup.sh.j2 dest=/etc/profile.d/slurm_setup.sh + sudo: true + when: ansible_os_family == 'RedHat' + diff --git a/roles/slurm-from-source/templates/munge.initd.j2 b/roles/slurm-from-source/templates/munge.initd.j2 index 86acc09836ef456fc7d1e48084ba9cbcd2473380..10d63a1974bbcd1fc02ecca938900515ea79404c 100755 --- a/roles/slurm-from-source/templates/munge.initd.j2 +++ b/roles/slurm-from-source/templates/munge.initd.j2 @@ -34,7 +34,7 @@ localstatedir="${prefix}/var" SERVICE_NAME="MUNGE" DAEMON_EXEC="$sbindir/munged" -#DAEMON_ARGS= +DAEMON_ARGS="-S ${localstatedir}/run/munge/munge.socket.2" #CONFIG=#_NOT_SUPPORTED_# PIDFILE="$localstatedir/run/munge/munged.pid" #NICE= diff --git a/roles/slurm-from-source/templates/slurm.conf.j2 b/roles/slurm-from-source/templates/slurm.conf.j2 index 7832ac9a5be7ec84409c754f6eeed9f84c2d3143..b8d0753a9e4dd75d430da09933f153114ed51aa0 100644 --- a/roles/slurm-from-source/templates/slurm.conf.j2 +++ b/roles/slurm-from-source/templates/slurm.conf.j2 @@ -98,6 +98,11 @@ SlurmSchedLogFile={{ slurmschedlog.log }} JobCompType=jobcomp/none #JobCompLoc= # +{% if slurmjob %} +Prolog={{ slurmjob.prolog }} +Epilog={{ slurmjob.epilog }} +{% endif %} +# # ACCOUNTING #JobAcctGatherType=jobacct_gather/linux #JobAcctGatherFrequency=30 @@ -110,7 +115,7 @@ AccountingStorageHost={{ slurmctrl }} #AccountingStorageUser= # #GRES -GresTypes=gpu +#GresTypes=gpu # Fair share {% if slurmfairshare.def %} @@ -128,7 +133,7 @@ MpiParams=ports=12000-12999 {% endfor %} {% endfor %} {% for node in nodelist|unique %} -NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} State=UNKNOWN +NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} {% endfor %} {% for queue in slurmqueues %} diff --git a/roles/slurm-from-source/templates/slurm.initd.j2 b/roles/slurm-from-source/templates/slurm.initd.j2 index 8e0b0ce083689dfdee92efeb2b8c55142b48830a..a667fce9716f2ef220e7f2bcd4a565eccc8ad0b1 100644 --- a/roles/slurm-from-source/templates/slurm.initd.j2 +++ b/roles/slurm-from-source/templates/slurm.initd.j2 @@ -24,12 +24,12 @@ # Short-Description: slurm daemon management # Description: Start slurm to provide resource management ### END INIT INFO - +munge_lib="{{ munge_dir }}/lib" exec_prefix="{{ slurm_dir }}" prefix="{{ slurm_dir }}" BINDIR="${exec_prefix}/bin" CONFDIR="${prefix}/etc" -LIBDIR="${exec_prefix}/lib" +LIBDIR="${exec_prefix}/lib:${munge_lib}" SBINDIR="${exec_prefix}/sbin" # Source function library. diff --git a/roles/slurm-from-source/templates/slurm_setup.sh.j2 b/roles/slurm-from-source/templates/slurm_setup.sh.j2 new file mode 100644 index 0000000000000000000000000000000000000000..e999952f38698d24b8039fe3bc1715bbc5a976f7 --- /dev/null +++ b/roles/slurm-from-source/templates/slurm_setup.sh.j2 @@ -0,0 +1,5 @@ + +export PATH={{ munge_dir }}/bin:{{ slurm_dir }}/bin:{{ slurm_dir }}/sbin:$PATH + +export LD_LIBRARY_PATH={{ munge_dir }}/lib:{{ slurm_dir }}/lib:{{ slurm_dir }}/lib/slurm:$LD_LIBRARY_PATH + diff --git a/roles/slurm-provision/templates/slurm_provision.sh.j2 b/roles/slurm-provision/templates/slurm_provision.sh.j2 index e1dda7c2196223c255da932fe42d1229ed27a973..6da159b8f9638a0e9397ab653ffc96c806bc4800 100644 --- a/roles/slurm-provision/templates/slurm_provision.sh.j2 +++ b/roles/slurm-provision/templates/slurm_provision.sh.j2 @@ -1,33 +1,39 @@ #!/bin/sh -CVL_HOME="/cvl/home" +HOME_DIR="/cvl/home" user_list=($(getent passwd | cut -d ":" -f1)) log_file="/root/slurm.log" for user in ${user_list[*]}; do - uid=$(id -u ${user}) - gid=$(id -g ${user}) - user_home=${CVL_HOME}/${user} - if [[ ! -d ${user_home} && ${uid} -gt 1000 ]]; then - mkdir -p ${user_home} - cp -r /etc/skel/* ${user_home} - chown -R ${uid}:${gid} ${user_home} - chmod 700 ${user_home} + {% if project_check is defined %} + run=$(id ${user} | grep {{ project_check }}) + {% else %} + run="1" + {% endif %} + if [ ! -z "${run}" ]; then + uid=$(id -u ${user}) + gid=$(id -g ${user}) + user_home=${HOME_DIR}/${user} + if [[ ! -d ${user_home} && ${uid} -gt 1000 ]]; then + cp -r /etc/skel ${user_home} + chown -R ${uid}:${gid} ${user_home} + chmod 700 ${user_home} - account={{ projectname }} - cluster={{ clustername }} + account={{ projectname }} + cluster={{ clustername }} - find=$(sacctmgr list cluster ${cluster} | grep ${cluster}) - if [ -z "${find}" ]; then - su slurm -c "sacctmgr -i add cluster ${cluster}" || { echo "error to create cluster ${cluster}" >> ${log_file} && exit 1; } - fi - find=$(sacctmgr list account ${account} | grep ${account}) - if [ -z "${find}" ]; then - su slurm -c "sacctmgr -i add account ${account} Description=CVL Organization=monash cluster=${cluster}" || { echo "error to create account ${account}" >> ${log_file} && exit 1; } - fi - find=$(sacctmgr list user ${username} | grep ${username}) - if [ -z "${find}" ]; then - su slurm -c "sacctmgr -i add user ${username} account=${account} cluster=${cluster}" || { echo "error to create user ${username}" >> ${log_file} && exit 1; } + find=$(sacctmgr list cluster ${cluster} | grep ${cluster}) + if [ -z "${find}" ]; then + su slurm -c "sacctmgr -i add cluster ${cluster}" || { echo "error to create cluster ${cluster}" >> ${log_file} && exit 1; } + fi + find=$(sacctmgr list account ${account} | grep ${account}) + if [ -z "${find}" ]; then + su slurm -c "sacctmgr -i add account ${account} Description=CVL Organization=monash cluster=${cluster}" || { echo "error to create account ${account}" >> ${log_file} && exit 1; } + fi + find=$(sacctmgr list user ${user} | grep ${user}) + if [ -z "${find}" ]; then + su slurm -c "sacctmgr -i add user ${user} account=${account} cluster=${cluster}" || { echo "error to create user ${user}" >> ${log_file} && exit 1; } + fi fi fi done diff --git a/roles/slurmdb/templates/slurmdbd.initd.j2 b/roles/slurmdb/templates/slurmdbd.initd.j2 index afeb6cab36371c3a773c206a1d57848cbe5b05cc..d9a7b2084f4340f4099f9bedafafa49fbb396968 100644 --- a/roles/slurmdb/templates/slurmdbd.initd.j2 +++ b/roles/slurmdb/templates/slurmdbd.initd.j2 @@ -21,10 +21,11 @@ # Description: Start slurm to provide database server for SLURM ### END INIT INFO +munge_lib="{{ munge_dir }}/lib" exec_prefix="{{ slurm_dir }}" prefix="{{ slurm_dir }}" CONFDIR="${prefix}/etc" -LIBDIR="${exec_prefix}/lib" +LIBDIR="${exec_prefix}/lib:${munge_lib}" SBINDIR="${exec_prefix}/sbin" #Source function library.