diff --git a/roles/MonashBioinformaticsPlatform_node_allocation/tasks/main.yml b/roles/MonashBioinformaticsPlatform_node_allocation/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..8a5bf2d56789282cc40e2bb5a47cd81f12b4a640 --- /dev/null +++ b/roles/MonashBioinformaticsPlatform_node_allocation/tasks/main.yml @@ -0,0 +1,4 @@ +--- +- name: Install tmux + apt: name=tmux state=latest + sudo: true diff --git a/roles/jasons_ssh_ca/files/server_ca.pub b/roles/jasons_ssh_ca/files/server_ca.pub new file mode 100644 index 0000000000000000000000000000000000000000..7e33e7cbf5a010dc6c9063e18d76017cddf09a39 --- /dev/null +++ b/roles/jasons_ssh_ca/files/server_ca.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDOzj+PSIuuGMOkMK4UO01Y1d8+6jGYELM+msVDpeSCZCAYCqYzXay6QDl5IFpdlxlhWXzcsfbC8WcHy3z+jW6kP6BcqZd7+eYrZVcWeO9A+p67OcsOHw5ixhCjFlXKxX/3D4JeppQeIUswI33zw90QViAlOPsTQuvIIuNNKQhUvfTVvkljduFXNT3xjLWai+isYKWaCbfmaiQ7EQIQyX9a3RrBKcEsLbghk3UkSq/j1OlMTbIuBKfPu26slPNRQFVBjJJfkx+kFF9ArgywHDN5dX3CxGOJhC2KIBemOC5cXjUbUI15a1UReDqShhb0m4p9pTkFOexGOB17lh1/4nUuYt2xzRahNyAEz9i02eIaVkhYFjVn1OuKJ7pa44YwoGx8RmFjRp8W/i3Crbp/IqBzMCfOZmub98b0I7H9ryg+taACRga6sLqWTDrEAbj7zFmRaaOHDIvrFj5ITO4YKYwSaWKL8w19NX4VJqzO3VVHbmUxFBoK4tGDAQ39w6BfRdxdKb+FIe+MOz68k4ADKHJSf9+LCQOFEikKNkKVUNh7FjLwi5Wz7K4S5wjnrjTUiqNC5imst262UJjtTeg7wE7ngPOlpSi1Mh4pV3/tcAboiRF8ABS/P8P0chln1YbA73x45ZF/Is9XQ2XUJiUwutrcY+upRdu2p9JAeKxGrt8i7w== root@autht diff --git a/roles/jasons_ssh_ca/handlers/main.yml b/roles/jasons_ssh_ca/handlers/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..875ea0a1df436812a61a5919059d0eb5b59e8884 --- /dev/null +++ b/roles/jasons_ssh_ca/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart ssh debian + service: name=ssh state=restarted + sudo: true + when: ansible_os_family == "Debian" diff --git a/roles/jasons_ssh_ca/tasks/main.yml b/roles/jasons_ssh_ca/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..4be9b14f7956c86fe584366c0df3dbd9d703fe97 --- /dev/null +++ b/roles/jasons_ssh_ca/tasks/main.yml @@ -0,0 +1,14 @@ +--- +- name: copy ca cert + copy: src=server_ca.pub dest=/etc/ssh/server_ca.pub owner=root group=root mode=644 + sudo: true + +- name: edit sshd_config + lineinfile: + args: + dest: /etc/ssh/sshd_config + line: TrustedUserCAKeys /etc/ssh/server_ca.pub + state: present + sudo: true + notify: restart ssh debian + when: ansible_os_family == "Debian" diff --git a/roles/modulefiles/tasks/main.yml b/roles/modulefiles/tasks/main.yml index 84ae26e4a27dc2ea7de433eb9c954099b4f77a75..7700d3b82cfd23a1e24f7eb73161bf89ed81458c 100644 --- a/roles/modulefiles/tasks/main.yml +++ b/roles/modulefiles/tasks/main.yml @@ -14,4 +14,14 @@ args: dest: /usr/share/Modules/init/.modulespath line: /usr/local/Modules/modulefiles + ignore_errors: true + sudo: true + +# for some reason ubuntu uses lowercase modules +- name: add /usr/local/Modules to the module file path + lineinfile: + args: + dest: /usr/share/modules/init/.modulespath + line: /usr/local/Modules/modulefiles + ignore_errors: true sudo: true diff --git a/roles/provision/tasks/main.yml b/roles/provision/tasks/main.yml index 3b71e4d40f287e6fefd0be5763aa05e8ec406b1a..27a2cbbd47537625c2e13efa54ff0132b0ab8c4a 100644 --- a/roles/provision/tasks/main.yml +++ b/roles/provision/tasks/main.yml @@ -4,5 +4,5 @@ sudo: true - name: provision cron job - cron: name=provision job={{ provision }} user=root minute=*/5 state=present + cron: name=provision job="/usr/bin/flock -x -n /tmp/provision.lck -c {{ provision }}" user=root minute=*/30 state=present sudo: true diff --git a/roles/provision/templates/provision.sh.j2 b/roles/provision/templates/provision.sh.j2 index 69483a265968419c02dc1715e55caa51821a54fe..d4082c8ae41b59824252396bbc178bdeaf7931ef 100644 --- a/roles/provision/templates/provision.sh.j2 +++ b/roles/provision/templates/provision.sh.j2 @@ -3,6 +3,8 @@ HOME_DIR={{ home_dir }} user_list=($(getent passwd | cut -d ":" -f1)) log_file="/root/slurm.log" +export PATH=$PATH:{{ slurm_dir }}/bin +sacctmgr=$( which sacctmgr ) for user in ${user_list[*]}; do {% if project_check is defined %} @@ -24,15 +26,15 @@ for user in ${user_list[*]}; do find=$(sacctmgr list cluster ${cluster} | grep ${cluster}) if [ -z "${find}" ]; then - su slurm -c "sacctmgr -i add cluster ${cluster}" || { echo "error to create cluster ${cluster}" >> ${log_file} && exit 1; } + su slurm -c "$sacctmgr -i add cluster ${cluster}" || { echo "error to create cluster ${cluster}" >> ${log_file} && exit 1; } fi find=$(sacctmgr list account ${account} | grep ${account}) if [ -z "${find}" ]; then - su slurm -c "sacctmgr -i add account ${account} Description=CVL Organization=monash cluster=${cluster}" || { echo "error to create account ${account}" >> ${log_file} && exit 1; } + su slurm -c "$sacctmgr -i add account ${account} Description=CVL Organization=monash cluster=${cluster}" || { echo "error to create account ${account}" >> ${log_file} && exit 1; } fi find=$(sacctmgr list user ${user} | grep ${user}) if [ -z "${find}" ]; then - su slurm -c "sacctmgr -i add user ${user} account=${account} cluster=${cluster}" || { echo "error to create user ${user}" >> ${log_file} && exit 1; } + su slurm -c "$sacctmgr -i add user ${user} account=${account} cluster=${cluster}" || { echo "error to create user ${user}" >> ${log_file} && exit 1; } fi fi fi diff --git a/roles/provision/vars/main.yml b/roles/provision/vars/main.yml index 2e084a92f49c7f6380db5cbc17aa8c2faff2f3cf..ed97d539c095cf1413af30cc23dea272095b97dd 100644 --- a/roles/provision/vars/main.yml +++ b/roles/provision/vars/main.yml @@ -1,2 +1 @@ --- - slurm_provision: /root/slurm_provision.sh diff --git a/roles/slurm-from-source/tasks/installCgroup.yml b/roles/slurm-from-source/tasks/installCgroup.yml index 247983df5f642e58d329495f24e58fef58c0f850..b81bb6b2fb8b6194744f62ab839b090355efe761 100644 --- a/roles/slurm-from-source/tasks/installCgroup.yml +++ b/roles/slurm-from-source/tasks/installCgroup.yml @@ -11,6 +11,7 @@ - cgmanager - cgmanager-utils - libcgmanager0 + sudo: true when: ansible_os_family == "Debian" - name: config cgroup.conf file diff --git a/roles/slurm-from-source/tasks/installNhc.yml b/roles/slurm-from-source/tasks/installNhc.yml index 628b10a9706a75c8947fa8bf434cec0c42735c26..9d48a4e7adf23c6e61f6e06a356b2bb43fa69acf 100644 --- a/roles/slurm-from-source/tasks/installNhc.yml +++ b/roles/slurm-from-source/tasks/installNhc.yml @@ -21,6 +21,10 @@ args: chdir: /tmp/warewulf-nhc-{{ nhc_version }} +- name: ensure sysconfig dir exists + file: dest=/etc/sysconfig state=directory owner=root group=root mode=755 + sudo: true + - name: copy nhc sysconfig script template: dest=/etc/sysconfig/nhc src=nhc.sysconfig.j2 mode=644 sudo: true @@ -39,7 +43,7 @@ register: generate_nhc_config_file - name: generate config file - shell: "{{ nhc_dir }}/sbin/nhc-genconf" + shell: "{{ nhc_dir }}/sbin/nhc-genconf -d -c {{ nhc_dir }}/etc/nhc/{{ nhc_config_file }} CONFDIR={{ nhc_dir }}/etc/nhc" sudo: true when: generate_nhc_config_file diff --git a/roles/slurm-from-source/tasks/main.yml b/roles/slurm-from-source/tasks/main.yml index 1fa874cbab3f8a106551190f33ce070447435880..3109758e85483a59295f5242ff797f226793959a 100644 --- a/roles/slurm-from-source/tasks/main.yml +++ b/roles/slurm-from-source/tasks/main.yml @@ -134,7 +134,7 @@ template: src=slurm.conf.j2 dest={{ slurm_dir }}/etc/slurm.conf sudo: true notify: restart slurm - when: slurm_use_vpn==False + when: slurm_use_vpn==False - name: install slurm.conf template: src=slurm-vpn.conf.j2 dest={{ slurm_dir }}/etc/slurm.conf diff --git a/roles/strudel_config/templates/generic_slurm_config.json.j2 b/roles/strudel_config/templates/generic_slurm_config.json.j2 index 626495b706e69a4f05d13dba270d50a69f3f8dbe..17ba6262ca1f5455c95b38aa6e174afcd627ffad 100644 --- a/roles/strudel_config/templates/generic_slurm_config.json.j2 +++ b/roles/strudel_config/templates/generic_slurm_config.json.j2 @@ -91,7 +91,7 @@ "__class__": "cmdRegEx", "__module__": "siteConfig", "async": false, - "cmd": "\"squeue -j {jobidNumber} -o \"%N\" | tail -n -1 | cut -f 1 -d ',' | xargs -iname getent hosts name | cut -f 1 -d ' ' \"", + "cmd": "\"{{ slurm_dir }}/bin/squeue -j {jobidNumber} -o \"%N\" | tail -n -1 | cut -f 1 -d ',' | xargs -iname getent hosts name | cut -f 1 -d ' ' \"", "failFatal": true, "formatFatal": false, "host": "login", @@ -121,7 +121,7 @@ "__class__": "cmdRegEx", "__module__": "siteConfig", "async": false, - "cmd": "squeue -u {username} -o \\\"%i %L\\\" | tail -n -1", + "cmd": "{{ slurm_dir }}/bin/squeue -u {username} -o \\\"%i %L\\\" | tail -n -1", "failFatal": true, "formatFatal": false, "host": "login", @@ -208,7 +208,7 @@ "__class__": "cmdRegEx", "__module__": "siteConfig", "async": false, - "cmd": "\"scontrol show job {jobidNumber}\"", + "cmd": "\"{{ slurm_dir }}/bin/scontrol show job {jobidNumber}\"", "failFatal": true, "formatFatal": false, "host": "login", @@ -268,7 +268,7 @@ "__class__": "cmdRegEx", "__module__": "siteConfig", "async": false, - "cmd": "\"mkdir ~/.vnc ; rm -f ~/.vnc/clearpass ; touch ~/.vnc/clearpass ; chmod 600 ~/.vnc/clearpass ; passwd=\"'$'\"( dd if=/dev/urandom bs=1 count=8 2>/dev/null | md5sum | cut -b 1-8 ) ; echo \"'$'\"passwd > ~/.vnc/clearpass ; cat ~/.vnc/clearpass | vncpasswd -f > ~/.vnc/passwd ; chmod 600 ~/.vnc/passwd ; echo -e '#!/bin/bash\\nvncserver ; sleep 36000000 ' | sbatch -p batch -N {nodes} -n {ppn} --time={hours}:00:00 -J desktop_{username} -o .vnc/slurm-%j.out \"", + "cmd": "\"mkdir ~/.vnc ; rm -f ~/.vnc/clearpass ; touch ~/.vnc/clearpass ; chmod 600 ~/.vnc/clearpass ; passwd=\"'$'\"( dd if=/dev/urandom bs=1 count=8 2>/dev/null | md5sum | cut -b 1-8 ) ; echo \"'$'\"passwd > ~/.vnc/clearpass ; cat ~/.vnc/clearpass | vncpasswd -f > ~/.vnc/passwd ; chmod 600 ~/.vnc/passwd ; echo -e '#!/bin/bash\\nvncserver ; sleep 36000000 ' | {{slurm_dir}}/bin/sbatch -p batch -N {nodes} -n {ppn} --time={hours}:00:00 -J desktop_{username} -o .vnc/slurm-%j.out \"", "failFatal": true, "formatFatal": false, "host": "login", @@ -282,7 +282,7 @@ "__class__": "cmdRegEx", "__module__": "siteConfig", "async": false, - "cmd": "\"scancel {jobidNumber}\"", + "cmd": "\"{{ slurm_dir }}/bin/scancel {jobidNumber}\"", "failFatal": true, "formatFatal": false, "host": "login",