diff --git a/roles/config_repos/files/epel.repo b/roles/config_repos/files/epel.repo new file mode 100644 index 0000000000000000000000000000000000000000..053ed43c62542e860cdb16660bdab1918b90fd7d --- /dev/null +++ b/roles/config_repos/files/epel.repo @@ -0,0 +1,8 @@ +# Place this file in your /etc/yum.repos.d/ directory + +[epel] +name=Extra Packages for Enterprise Linux 7 - $basearch +baseurl=https://consistency0/epel/$releasever/$basearch/ +enabled=0 +gpgcheck=0 +sslverify=false diff --git a/roles/config_repos/tasks/main.yml b/roles/config_repos/tasks/main.yml index fd13eb52916874e2c500602c01025ecae8470c1a..f397640fc77541b7daf56e35a499bef69e561ba8 100644 --- a/roles/config_repos/tasks/main.yml +++ b/roles/config_repos/tasks/main.yml @@ -35,13 +35,14 @@ with_items: - monashhpc_base.repo - monashhpc_others.repo + - epel.repo - name: get enabled repos #shell: yum repolist | grep -v "repo id" | grep -v "Loaded plugins" | head -n -1 | cut -f 1 -d '/' | sed -s 's/\!//' shell: yum repolist all | grep enabled | cut -f 1 -d '/' | sed -s 's/\!//' register: repolist check_mode: no - args: + args: warn: False - name: disable unwanted repos diff --git a/roles/ec2-user/tasks/main.yml b/roles/ec2-user/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..0c147a5ccde699030781a99d9f341be54ce1133a --- /dev/null +++ b/roles/ec2-user/tasks/main.yml @@ -0,0 +1,7 @@ +- name: Add ec2-user to systems group locally + user: + name: ec2-user + groups: systems + append: yes + become: true + become_user: root diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml index bfa6204ecbd11d24a5fcac46a3d19f70ab66f225..bbaf99596489f7e1b62634f99a41c4386957e79e 100644 --- a/roles/gpu/tasks/main.yml +++ b/roles/gpu/tasks/main.yml @@ -13,6 +13,7 @@ - libX11-common - libX11-devel - libX11 + - libglvnd-devel - xorg-x11-server-common - xorg-x11-util-macros - xorg-x11-server-utils diff --git a/roles/ldapclient/templates/system-auth.j2 b/roles/ldapclient/templates/system-auth.j2 index 2f9036e24bd3d33b4626af796c1427a168ad3de3..7b5f3e590ee6cf34803ecb5684c446a4d4c8253f 100644 --- a/roles/ldapclient/templates/system-auth.j2 +++ b/roles/ldapclient/templates/system-auth.j2 @@ -1,19 +1,22 @@ #%PAM-1.0 # This file is auto-generated. # User changes will be destroyed the next time authconfig is run. + auth required pam_env.so +auth required pam_faildelay.so delay=2000000 auth sufficient pam_unix.so nullok try_first_pass auth requisite pam_succeed_if.so uid >= 500 quiet auth sufficient pam_sss.so use_first_pass auth required pam_deny.so account required pam_unix.so broken_shadow +account sufficient pam_localuser.so account sufficient pam_succeed_if.so uid < 500 quiet account [default=bad success=ok user_unknown=ignore] pam_sss.so account required pam_permit.so -password requisite pam_cracklib.so try_first_pass retry=3 -password sufficient pam_unix.so md5 shadow nullok try_first_pass use_authtok +password requisite pam_pwquality.so try_first_pass local_users_only retry=3 authtok_type= +password sufficient pam_unix.so sha512 shadow nullok try_first_pass use_authtok password sufficient pam_sss.so use_authtok password required pam_deny.so @@ -21,4 +24,4 @@ session optional pam_keyinit.so revoke session required pam_limits.so session [success=1 default=ignore] pam_succeed_if.so service in crond quiet use_uid session required pam_unix.so -session optional pam_sss.so +session optional pam_sss.so \ No newline at end of file diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml index c6c1f4a30fc014c000a433ba4bfe4db629f370af..756c3b1aff462dcc22238fe7d05c6540cf2828cf 100644 --- a/roles/mellanox_drivers/tasks/main.yml +++ b/roles/mellanox_drivers/tasks/main.yml @@ -53,7 +53,7 @@ set_fact: install_now: true reboot_now: true - when: driver_version is failed or not desired_driver_version.stdout in driver_version.stdout + when: driver_version | failed or not desired_driver_version.stdout in driver_version.stdout - name: debug - print out value of install_now debug: var=install_now @@ -65,7 +65,7 @@ when: install_now - name: install drivers - shell: ./mlnxofedinstall -q --add-kernel-support --force + shell: ./mlnxofedinstall -q --add-kernel-support --force --skip-repo args: #more changes chdir: "/tmp/{{ MELLANOX_DRIVER_SRC }}" @@ -87,9 +87,9 @@ register: reload_service - name: enable roce_mode setting - service: name=roce_mode state=started enabled=True + service: name=roce_mode state=started enabled=yes become: true - + ignore_errors: yes # # A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL # diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml index 98f1c359baa2b76427a9cb7fead2e64473558cb5..6aa643d58ac88337df835db8fd81dd6e3b84fb17 100644 --- a/roles/mellanox_drivers/vars/mellanoxVars.yml +++ b/roles/mellanox_drivers/vars/mellanoxVars.yml @@ -1,7 +1,4 @@ --- #note. do not add '.tgz' to driver src. done in playbook - #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" - #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" - #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.4-1.0.0.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" -# MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-4.2-1.2.0.0-rhel7.4-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" -MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-4.4-1.0.0.0-rhel7.4-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" +#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-4.4-1.0.0.0-rhel7.4-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" +MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-4.5-1.0.1.0-rhel7.6-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" diff --git a/roles/nat_server/templates/iptables.j2 b/roles/nat_server/templates/iptables.j2 index 07def7ca120b86a9fc573c9f7ac3bcc249889df8..c311a4463467e00c82cf77dbe0e93263b3fa6e4a 100644 --- a/roles/nat_server/templates/iptables.j2 +++ b/roles/nat_server/templates/iptables.j2 @@ -22,6 +22,7 @@ COMMIT :INPUT ACCEPT [0:0] :FORWARD ACCEPT [0:0] :OUTPUT ACCEPT [0:0] +-A INPUT -s {{ PRIVATE_NETWORK_CIDR }} -j ACCEPT -A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT -A INPUT -p icmp -j ACCEPT -A INPUT -i lo -j ACCEPT diff --git a/roles/slurm-common/defaults/main.yml b/roles/slurm-common/defaults/main.yml index 362d9cf4ae92e58c49950d6a09dcaaf9df92cba6..283c06273b4c10af672e815f0fe0062f986814fd 100644 --- a/roles/slurm-common/defaults/main.yml +++ b/roles/slurm-common/defaults/main.yml @@ -1,9 +1,8 @@ --- slurm_use_vpn: False -slurmctlddebug: {level: 9, log: '/var/log/slurm/slurmctld.log'} -slurmddebug: {level: 9, log: '/var/log/slurm/slurmd.log'} -slurmschedlog: {level: 9, log: '/var/log/slurm/slurmsched.log'} -slurmdbdlog: {level: 9, log: '/var/log/slurm/slurmdbd.log'} +slurmddebug: {level: 5, log: '/var/log/slurm/slurmd.log'} +slurmctlddebug: {level: 5, log: '/mnt/slurm-logs/slurmctld.log'} +slurmdbdlog: {level: 5, log: '/mnt/slurm-logs/slurmdbd.log'} slurmfairshare: {def: false, val: 10000} slurmdatadir: "/var/spool/slurm" slurmselecttype: "select/linear" diff --git a/roles/slurm-common/tasks/createSlurmDirectories.yml b/roles/slurm-common/tasks/createSlurmDirectories.yml index f4847d42b773d936d994da71401c3e3568dbf4c0..295aeadf3e1655c74e778d0b1263d5bc446757bb 100644 --- a/roles/slurm-common/tasks/createSlurmDirectories.yml +++ b/roles/slurm-common/tasks/createSlurmDirectories.yml @@ -1,4 +1,8 @@ --- +- name: make sure slurmctld and slurmdb log dir exists + file: dest=/mnt/slurm-logs state=directory owner=root group=root mode=755 + sudo: true + - name: make sure slurm conf dir exists file: dest={{ slurm_dir }}/etc state=directory sudo: true