diff --git a/roles/calculateSlurmConf/templates/slurm.conf.j2 b/roles/calculateSlurmConf/templates/slurm.conf.j2 index f8818eca480935c56a5974c7241d5f66f0d99155..4cc03ad73081b03a877d3ec7a488115f6c92bc70 100644 --- a/roles/calculateSlurmConf/templates/slurm.conf.j2 +++ b/roles/calculateSlurmConf/templates/slurm.conf.j2 @@ -30,7 +30,8 @@ SwitchType=switch/none MpiDefault=pmi2 SlurmctldPidFile={{ slurmpiddir }}/slurmctld.pid SlurmdPidFile={{ slurmpiddir }}/slurmd.pid -ProctrackType=proctrack/linuxproc +#ProctrackType=proctrack/linuxproc +ProctrackType=proctrack/cgroup #PluginDir= CacheGroups=0 #FirstJobId= @@ -78,16 +79,16 @@ SelectType={{ slurmselecttype }} SelectTypeParameters=CR_Core_Memory {% endif %} FastSchedule={{ slurmfastschedule }} -#PriorityType=priority/multifactor +PriorityType=priority/multifactor #PriorityFlags=Ticket_Based #PriorityCalcPeriod=5 #PriorityDecayHalfLife=0 #PriorityUsageResetPeriod=14-0 -##PriorityWeightFairshare=10000 -#PriorityWeightAge=10000 -#PriorityWeightPartition=10000 -#PriorityWeightJobSize=10000 -#PriorityMaxAge=14-0 +PriorityWeightFairshare=10000 +PriorityWeightAge=10000 +PriorityWeightPartition=10000 +PriorityWeightJobSize=10000 +PriorityMaxAge=14-0 # # LOGGING {% if slurmctlddebug %} @@ -117,24 +118,27 @@ JobCompType=jobcomp/none {% if slurmjob is defined %} Prolog={{ slurmjob.prolog }} Epilog={{ slurmjob.epilog }} +{% else %} +Prolog={{ slurm_dir }}/bin/slurm.prolog +Epilog={{ slurm_dir }}/bin/slurm.epilog {% endif %} # # ACCOUNTING -#JobAcctGatherType=jobacct_gather/linux -#JobAcctGatherFrequency=30 +JobAcctGatherType=jobacct_gather/linux +JobAcctGatherFrequency=30 # AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost={{ slurmdbd }} {% if slurmdbdbackup is defined %} AccountingStorageBackupHost={{ slurmdbdbackup }} {% endif %} -#AccountingStorageEnforce=limits,safe +AccountingStorageEnforce=limits,safe #AccountingStorageLoc= #AccountingStoragePass= #AccountingStorageUser= # #GRES -GresTypes=gpu +#GresTypes=gpu # Fair share {% if slurmfairshare.def %} @@ -155,6 +159,10 @@ MpiParams=ports=12000-12999 NodeName={{ hostvars[node]['ansible_hostname'] }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} RealMemory={{ hostvars[node].ansible_memory_mb.real.total }} Sockets={{ hostvars[node]['ansible_processor_vcpus'] }} CoresPerSocket=1 ThreadsPerCore={{ hostvars[node].ansible_processor_threads_per_core }} {% if hostvars[node].ansible_hostname.find('vis') != -1 %}Gres=gpu:1{% endif %} {% if hostvars[node]['ansible_processor_vcpus'] == 1 %}Weight=1{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 1 and hostvars[node]['ansible_processor_vcpus'] <= 16 %}Weight=3{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 16 and hostvars[node]['ansible_processor_vcpus'] <= 20 %}Weight=5{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 20 and hostvars[node]['ansible_processor_vcpus'] <= 40 %}Weight=7{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 40 and hostvars[node]['ansible_processor_vcpus'] <= 64 %}Weight=8{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 64 and hostvars[node]['ansible_processor_vcpus'] <= 128 %}Weight=9{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 128 %}Weight=10{% endif %} Feature=stage1 State=UNKNOWN {% endfor %} +#monarch specific to stop stupid warning messages +NodeName={{ hostvars[groups['LoginNodes'][0]]['ansible_hostname'] }} State=DOWN +NodeName={{ slurmctrl }} State=DOWN + {% for queue in slurmqueues %} {% set nodenames = [] %} {% for node in groups[queue.group] %} diff --git a/roles/karaage3.1.17/templates/index.html.j2 b/roles/karaage3.1.17/templates/index.html.j2 index 61c24b3954b21f5ebd0fa0f50abda8fd76611eac..6b65f306e0dd0f09477fc69c67ba87e09fead635 100644 --- a/roles/karaage3.1.17/templates/index.html.j2 +++ b/roles/karaage3.1.17/templates/index.html.j2 @@ -1,15 +1,4 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html> -<body><h3>HPC identity system (The landing page is under the construction)</h3> -<br> -<p>Monash HPC identity system is a new HPC access control system. Access to the HPC ID system is done through the Australian Access Federation (AAF). This allows you to login using your Institutional username and password. -<br> -<br> -If it is the first time you are using the system, it will give your options to select your existing HPC username for creating a new user account. You'll need to join projects before you can access HPC system. -<br> -<br> -If your organisation is not a member of the AAF or if you need helps, please send HPC email support: help@massive.org.au.</p> -<br> -<p>Click following link <a href=https://{{ ansible_fqdn }}/aafbootstrap>to continue.</a></p> -</body> -</html> +<html><body><h3>HPC identity management</h3> +<p>To log in via AAF authentication, connect to <a href=https://{{ hpchostname }}.erc.monash.edu.au/aafbootstrap>aafbootstrap</a></p> +<p>To log in without AAF authentication, connect to <a href=https://{{ hpchostname }}.erc.monash.edu.au/users>users</a></p> +</body></html> diff --git a/roles/setupKnownHosts/tasks/main.yml b/roles/setupKnownHosts/tasks/main.yml index 2f520d84659457471331e582e91827694fb7bfd8..ad1ebffe3ffe2c98ef37a7aecfa739af6f06bde1 100644 --- a/roles/setupKnownHosts/tasks/main.yml +++ b/roles/setupKnownHosts/tasks/main.yml @@ -1,10 +1,18 @@ - name: "Templating /etc/ssh/known_hosts" - template: src=known_hosts.j2 dest=/etc/ssh/known_hosts owner=root group=root mode=600 + template: src=known_hosts.j2 dest=/etc/ssh/ssh_known_hosts owner=root group=root mode=644 sudo: true - register: sshknowhost + register: sshknownhost -- name: ecrypt the hosts file - shell: ssh-keygen -H -f /etc/ssh/known_hosts +- name: encrypt the hosts file + shell: ssh-keygen -H -f /etc/ssh/ssh_known_hosts + sudo: true + when: sshknownhost.changed + +- name: set read permissions + file: path=/etc/ssh/ssh_known_hosts owner=root group=root mode=644 state=file + sudo: true + +- name: delete ssh_known_hosts.old + file: path=/etc/ssh/ssh_known_hosts.old state=absent sudo: true - when: sshknowhost.changed diff --git a/roles/setupKnownHosts/templates/known_hosts.j2 b/roles/setupKnownHosts/templates/known_hosts.j2 index 326660a1d7799932062e79ce44b9bc3a560c9207..b083b5ee6e037007b984b133264405c77a9340ff 100644 --- a/roles/setupKnownHosts/templates/known_hosts.j2 +++ b/roles/setupKnownHosts/templates/known_hosts.j2 @@ -2,14 +2,22 @@ {% for node in groups['all'] %} {% for interface in hostvars[node]['ansible_interfaces'] %} {% if interface != "lo" %} -{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'rsa': hostvars[node]['ansible_ssh_host_key_rsa_public']} %} +{% if hostvars[node]['ansible_ssh_host_key_rsa_public'] %} +{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ssh-rsa', 'key': hostvars[node]['ansible_ssh_host_key_rsa_public']} %} {% if nodelist.append(host) %} {% endif %} {% endif %} +{% if hostvars[node]['ansible_ssh_host_key_ecdsa_public'] %} +#{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ssh-ecdsa', 'key': hostvars[node]['ansible_ssh_host_key_ecdsa_public']} %} +{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ecdsa-sha2-nistp256', 'key': hostvars[node]['ansible_ssh_host_key_ecdsa_public']} %} +{% if nodelist.append(host) %} +{% endif %} +{% endif %} +{% endif %} {% endfor %} {% endfor %} -{% for host in nodelist|unique %} -{{ host.ip }} ssh-rsa {{ host.rsa }} -{{ host.name }} ssh-rsa {{ host.rsa }} +{% for host in nodelist %} +{{ host.ip }} {{ host.keytype }} {{ host.key }} +{{ host.name }} {{ host.keytype }} {{ host.key }} {% endfor %} diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index 9763a144c7e4dc04175cdac769dcb0fe1842c30c..31120c9385a9c1137fa6cf225b4cbbfa30de8379 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -123,6 +123,14 @@ sudo: true when: slurm_gres_list is defined +- name: install slurm prolog + template: src=slurm.prolog.j2 dest={{ slurm_dir }}/bin/slurm.prolog + sudo: true + +- name: install slurm epilog + template: src=slurm.epilog.j2 dest={{ slurm_dir }}/bin/slurm.epilog + sudo: true + - name: install slurm.conf copy: src=files/slurm.conf dest={{ slurm_dir }}/etc/slurm.conf sudo: true diff --git a/roles/slurm-common/templates/slurm.epilog.j2 b/roles/slurm-common/templates/slurm.epilog.j2 new file mode 100644 index 0000000000000000000000000000000000000000..84dd012fae68a7fd85ac390582d8f8991c322073 --- /dev/null +++ b/roles/slurm-common/templates/slurm.epilog.j2 @@ -0,0 +1,2 @@ +#!/bin/sh +find /tmp -user ${SLURM_JOB_USER} | xargs rm -rf diff --git a/roles/slurm-common/templates/slurm.prolog.j2 b/roles/slurm-common/templates/slurm.prolog.j2 new file mode 100644 index 0000000000000000000000000000000000000000..039e4d0069c5c26909f86c505b9de66182e6d1f3 --- /dev/null +++ b/roles/slurm-common/templates/slurm.prolog.j2 @@ -0,0 +1,2 @@ +#!/bin/sh +exit 0