diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 24b4dcbd65298b92768dc13857f9c679750b3a9f..acde4a94962ff1a4875c89556978d3a5b926a70b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -134,9 +134,9 @@ tests: - grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME #fail if inventory file is empty - ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all - - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sinfo" ManagementNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "squeue" ManagementNodes + # Need to find a better check for sinfo + #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name sinfo -type f" ManagementNodes + #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name squeue -type f" ManagementNodes - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes diff --git a/CICD/heat/gc_HOT.yaml b/CICD/heat/gc_HOT.yaml index 7b44445e1831e0766a21dda38da6c7ea4093ad1c..f08a4dab7a7d34a505ade570095a5e051490898e 100644 --- a/CICD/heat/gc_HOT.yaml +++ b/CICD/heat/gc_HOT.yaml @@ -56,16 +56,20 @@ resources: name: "heatnfssecgroup" rules: [ { protocol: tcp, port_range_min: 2049, - port_range_max: 2049}, + port_range_max: 2049, + remote_mode: "remote_group_id"}, { protocol: tcp, port_range_min: 111, - port_range_max: 111}, + port_range_max: 111, + remote_mode: "remote_group_id"}, { protocol: udp, port_range_min: 2049, - port_range_max: 2049}, + port_range_max: 2049, + remote_mode: "remote_group_id"}, { protocol: udp, port_range_min: 111, - port_range_max: 111}] + port_range_max: 111, + remote_mode: "remote_group_id"} ] MySQLSecGroup: type: "OS::Neutron::SecurityGroup" properties: @@ -82,17 +86,129 @@ resources: port_range_min: 22, port_range_max: 22, direction: ingress, - remote_ip_prefix: 118.138.240.0/21 + remote_ip_prefix: 59.191.208.0/20 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 114.30.64.0/21 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 118.138.0.0/16 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 118.139.0.0/17 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 130.194.0.0/16 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 203.0.141.0/24 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 203.6.141.0/24 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 203.23.136.0/24 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 59.191.192.0/20 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 202.158.212.32/27 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 130.194.13.96/27 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 49.127.0.0/16 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 202.58.246.0/24 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 202.94.69.0/24 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 103.35.228.0/22 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 43.246.232.0/22 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 103.35.228.0/22 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 59.191.208.0/20 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 49.127.0.0/16 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 59.191.220.0/22 + }, + { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 43.246.232.0/22 } ] -# SSHInternalSecGroup: -# type: "OS::Neutron::SecurityGroup" -# properties: -# name: "SSHInternalSecGroup" -# rules: [ { protocol: tcp, -# port_range_min: 22, -# port_range_max: 22, -# direction: ingress} ] - #remote_ip_prefix: { get_param: REMOTE_IP }, direction: ingress + webaccess: type: "OS::Neutron::SecurityGroup" properties: @@ -113,10 +229,10 @@ resources: flavor: m3.xsmall image: { get_param: centos_7_image_id } key_name: { get_param: ssh_key } - security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: MySQLSecGroup }, { get_resource: NFSSecGroup } ] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: MySQLSecGroup }, { get_resource: NFSSecGroup } ] metadata: ansible_host_groups: [ SQLNodes, NFSNodes ] - ansible_ssh_user: ec2-user + ansible_ssh_user: ubuntu project_name: { get_param: project_name } networks: - network: { get_param: NetID } @@ -157,10 +273,10 @@ resources: volume_id: { get_resource: DBVolume } instance_uuid: { get_resource: SQLNode0 } - MgmtNodes: + MgmtNodesC: type: "OS::Heat::ResourceGroup" properties: - count: 2 + count: 1 resource_def: type: My::Server::MgmtNode properties: @@ -173,10 +289,26 @@ resources: security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup }, { get_resource: MySQLSecGroup } ] project_name: { get_param: project_name } - LoginNodes: + MgmtNodesU: type: "OS::Heat::ResourceGroup" properties: count: 1 + resource_def: + type: My::Server::MgmtNode + properties: + #avz: { get_param: avz } + image: { get_param: ubuntu_1804_image_id } + ansible_ssh_user: ubuntu + mynodename: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'mgmtU%index%' ]] + ssh_key: { get_param: ssh_key } + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup }, { get_resource: MySQLSecGroup } ] + project_name: { get_param: project_name } + + LoginNodesC: + type: "OS::Heat::ResourceGroup" + properties: + count: 0 resource_def: type: "OS::Nova::Server" properties: @@ -194,6 +326,27 @@ resources: networks: - network: { get_param: NetID } + LoginNodesU: + type: "OS::Heat::ResourceGroup" + properties: + count: 0 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: m3.xsmall + image: { get_param: ubuntu_1804_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'loginU%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ LoginNodes ] + ansible_ssh_user: ubuntu + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + DesktopNodes: type: "OS::Heat::ResourceGroup" properties: @@ -215,7 +368,28 @@ resources: networks: - network: { get_param: NetID } - ComputeNodes: + ComputeNodesU: + type: "OS::Heat::ResourceGroup" + properties: + count: 0 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: m3.xsmall + image: { get_param: ubuntu_1804_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'computec%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ ComputeNodes ] + ansible_ssh_user: ubuntu + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + + ComputeNodesC: type: "OS::Heat::ResourceGroup" properties: count: 1 @@ -227,7 +401,7 @@ resources: image: { get_param: centos_7_image_id } key_name: { get_param: ssh_key } name: - list_join: [ '-', [ { get_param: "OS::stack_name" }, 'computec%index%' ]] + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'computec7%index%' ]] security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] metadata: ansible_host_groups: [ ComputeNodes ] @@ -244,19 +418,40 @@ resources: type: "OS::Nova::Server" properties: #availability_zone: { get_param: avz } - flavor: m3.xsmall + flavor: mon.c10r35.gpu-k2 image: { get_param: ubuntu_1804_image_id } key_name: { get_param: ssh_key } name: - list_join: [ '-', [ { get_param: "OS::stack_name" }, 'desktopu%index%' ]] + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'gpudesktopu%index%' ]] security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] metadata: - ansible_host_groups: [ DesktopNodes ] + ansible_host_groups: [ DesktopNodes, GPU, ComputeNodes, K1, VisNodes ] ansible_ssh_user: ubuntu project_name: { get_param: project_name } networks: - network: { get_param: NetID } + CentosDesktopNodes: + type: "OS::Heat::ResourceGroup" + properties: + count: 1 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: mon.c10r35.gpu-k2 + image: { get_param: centos_7_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'gpudesktopu%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ DesktopNodes, GPU, ComputeNodes, K1, VisNodes ] + ansible_ssh_user: ec2-user + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + ComputeNodeRHEL: type: "OS::Heat::ResourceGroup" properties: @@ -277,20 +472,3 @@ resources: project_name: { get_param: project_name } networks: - network: { get_param: NetID } - -# PySSHauthz: -# type: "OS::Nova::Server" -# properties: -# name: -# list_join: [ '-', [ { get_param: "OS::stack_name" }, 'pysshautz' ]] -# availability_zone: { get_param: avz } -# flavor: m3.xsmall -# image: { get_param: ubuntu_1804_image_id } -# key_name: { get_param: ssh_key } -# security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: webaccess } ] -# metadata: -# ansible_host_groups: [ PySSHauthz ] -# ansible_ssh_user: ubuntu -# project_name: { get_param: project_name } -# networks: -# - network: { get_param: NetID } diff --git a/CICD/plays/allnodes.yml b/CICD/plays/allnodes.yml index d6eed3ef32a32d9b702b20534e2f916652c3c3e4..812ca4e0b6f10b46734702629bb28635695c193a 100644 --- a/CICD/plays/allnodes.yml +++ b/CICD/plays/allnodes.yml @@ -24,7 +24,7 @@ # - { role: disable_selinux, tags: [ disableselinux ] } - { role: etcHosts, tags: [ networking ] } - { role: config_repos, tags: [ repos ] } - - { role: upgrade } + - { role: upgrade, tags: [ upgrade ]} - { role: set_password } diff --git a/CICD/plays/mgmtnodes.yml b/CICD/plays/mgmtnodes.yml index 5d4241194324fe13739e074b4ee749c969935dfb..c37d10a86fef7555560e5d2586fb4f07e74ecd78 100644 --- a/CICD/plays/mgmtnodes.yml +++ b/CICD/plays/mgmtnodes.yml @@ -29,10 +29,7 @@ # - { role: ldapclient, tags: [ authentication ] } # - { role: ssh-password-login } # - { role: enable_sudo_group } -# - { role: make_filesystems, volumes: "{{ glustervolumes }}" } -# - { role: gluster_server, volname: "gv", brickmnt: '/gbrick', gluster_servers: "{{ groups['ManagementNodes'] }}", replicas: 2, tags: [ gluster_server ] } -# - { role: gluster_volcreate, volname: "gv", gluster_servers: "{{ groups['ManagementNodes'] }}", brickmnt: '/gbrick', replicas: 2 } -# - { role: gluster_client, volname: "gv", gluster_servers: ['mgmt0','mgmt1','sql0'], volmnt: '/glusterVolume' } + - { role: nfs-client, nfsMounts: "{{ mgmtNfsMounts }}", tags: [ nfs ] } - { role: slurmdb-config, tags: [ slurm, slurmdb-config ] } - { role: slurm-common, tags: [ slurm, slurm-common ] } diff --git a/CICD/tests/ManagementNodes/check.yml b/CICD/tests/ManagementNodes/check.yml deleted file mode 100644 index 95e06a0a034c32c5e8ae30c2a58c40e10a738afc..0000000000000000000000000000000000000000 --- a/CICD/tests/ManagementNodes/check.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -- hosts: ManagementNodes - gather_facts: false - tasks: - - name: have ssh running - service: - name: sshd - state: started \ No newline at end of file diff --git a/CICD/tests/all/check.yml b/CICD/tests/all/check.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd95357394f0c3d582043aace19cd80b3dad9dd5 --- /dev/null +++ b/CICD/tests/all/check.yml @@ -0,0 +1,16 @@ +--- +- hosts: ManagementNodes + gather_facts: false + tasks: + - name: have ssh running + service: + name: sshd + state: started + +- hosts: ComputeNodes + gather_facts: false + tasks: + - name: have munge service running + service: + name: munge + state: started \ No newline at end of file diff --git a/CICD/vars/slurm.yml b/CICD/vars/slurm.yml index 65def4d949685d32b7f6b705a6390c9a6dfdab2a..0d665291de05f39f40aae2f57b1f5a2b11431481 100644 --- a/CICD/vars/slurm.yml +++ b/CICD/vars/slurm.yml @@ -4,7 +4,7 @@ desktopNodeList: clustername: "m3" projectname: "m3" slurm_version: 19.05.3-2 -munge_version: 0.5.11 +munge_version: 0.5.13 nhc_version: 1.4.2 munge_dir: /opt/munge-{{ munge_version }} slurm_dir: /opt/slurm-{{ slurm_version }} diff --git a/CICD/vars/vars.yml b/CICD/vars/vars.yml index 83485426b7e370a91d2fd15a5083156c483a1f4e..7def1ce714e85c7a5325fa6ccfe5ce3a9141a508 100644 --- a/CICD/vars/vars.yml +++ b/CICD/vars/vars.yml @@ -1,7 +1,7 @@ --- sudo_group: systems nagios_home: "/var/lib/nagios" -nvidia_version: "390.46" +nvidia_version: "367.134" yumdisablerepo: - 'base' @@ -16,6 +16,7 @@ yumenablerepo: gpumap: 'K1': 'K1' + 'K2': 'K2' 'K80': 'K80' 'P100-PCIE-16GB': 'P100' 'V100-PCIE-16GB': 'V100' diff --git a/roles/calculateSlurmConf/templates/slurm.conf.j2 b/roles/calculateSlurmConf/templates/slurm.conf.j2 index dc833e3e78c8fa191c434c0795c0f936cfbb1e7c..d460ef811ddb8f3474b26a5e3ff72bf7434a718b 100644 --- a/roles/calculateSlurmConf/templates/slurm.conf.j2 +++ b/roles/calculateSlurmConf/templates/slurm.conf.j2 @@ -119,8 +119,8 @@ JobCompType=jobcomp/none Prolog={{ slurmjob.prolog }} Epilog={{ slurmjob.epilog }} {% else %} -Prolog={{ slurm_dir }}/bin/slurm.prolog -Epilog={{ slurm_dir }}/bin/slurm.epilog +Prolog=/opt/slurm/etc/slurm.prolog +Epilog=/opt/slurm/etc/slurm.epilog {% endif %} # # ACCOUNTING diff --git a/roles/config_repos/tasks/main.yml b/roles/config_repos/tasks/main.yml index 684c327aa9be6fb9c90f588bf44857f94794bfe7..402ac09c2cbebd578ff29f16468c64b30385d497 100644 --- a/roles/config_repos/tasks/main.yml +++ b/roles/config_repos/tasks/main.yml @@ -6,8 +6,7 @@ line: "{{ reposerverip }} {{ reposervername }}" #this is duplicated in the role calculateEtcHosts owner: root group: root - become: true - + become: True #- name: remove default repos # file: @@ -44,6 +43,7 @@ - name: get enabled repos #shell: yum repolist | grep -v "repo id" | grep -v "Loaded plugins" | head -n -1 | cut -f 1 -d '/' | sed -s 's/\!//' shell: yum repolist all | grep enabled | cut -f 1 -d '/' | sed -s 's/\!//' + when: ansible_os_family == 'RedHat' register: repolist check_mode: no changed_when: False @@ -55,7 +55,8 @@ with_items: "{{ repolist.stdout_lines|difference(yumenablerepo) }}" become: true become_user: root - ignore_errors: false + ignore_errors: true + when: ansible_os_family == 'RedHat' #- name: Enable epel @@ -75,11 +76,6 @@ become: true when: ansible_distribution_release == 'trusty' -- name: add repos apt - shell: "add-apt-repository -y ppa:gluster/glusterfs-3.7" - become: true - when: ansible_distribution == 'Ubuntu' - - name: apt-get update apt: update_cache=True become: true diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml index f76796f1881d3a0efb9b3eb3974e261e2b9dab58..96425d40b43dffc3b055f0c4b3479a6f1ec3bab1 100644 --- a/roles/gpu/tasks/main.yml +++ b/roles/gpu/tasks/main.yml @@ -25,12 +25,62 @@ - xorg-x11-xauth - xorg-x11-proto-devel - xorg-x11-xkb-utils + when: ansible_os_family == 'RedHat' + +- name: install deps + apt: + name: + - 'gcc' + - 'perl' + - 'wget' + - 'pciutils' + - 'linux-headers-generic' + - 'xterm' + - 'libx11-dev' + - 'libx11-6' + - 'libglvnd-dev' + - 'xserver-xorg' + - 'vim' + state: present + update_cache: yes + become: true + become_user: root + when: ansible_distribution == 'Ubuntu' + +- name: install deps + yum: name={{ item }} state=installed + become: true + with_items: + - gcc + - perl + - wget + - pciutils + - kernel-headers + - kernel-devel + - xterm + - libX11-common + - libX11-devel + - libX11 + - libglvnd-devel + - xorg-x11-server-common + - xorg-x11-util-macros + - xorg-x11-server-utils + - xorg-x11-font-utils + - xorg-x11-server-Xorg + - xorg-x11-glamor + - xorg-x11-xinit + - xorg-x11-utils + - xorg-x11-xauth + - xorg-x11-proto-devel + - xorg-x11-xkb-utils + when: ansible_os_family == 'RedHat' - name: install development tools yum: name="@Development Tools" state=installed become: true become_user: root ignore_errors: yes + when: ansible_os_family == 'RedHat' - name: disable nouveau template: src=blacklist-nouveau.conf.j2 dest=/etc/modprobe.d/blacklist-nouveau.conf @@ -99,7 +149,6 @@ become: true when: install_driver - - name: stop the persistence daemon service: name=nvidia-persistenced state=stopped become: true @@ -138,7 +187,7 @@ when: install_driver - name: build nvidia driver - shell: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run + shell: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run -q -a -n -X -s become: true when: install_driver @@ -164,6 +213,15 @@ # become: true # become_user: root # when: template_xorgconf is defined and template_xorgcon +- name: install dependencies for nvidia-xconf-gen + apt: + name: + - python-jinja2 + - python3-jinja2 + update_cache: yes + state: present + become: true + become_user: root - name: run nvidia-xconf-gen script: scripts/nvidia-xconf-gen.py diff --git a/roles/lmod/tasks/main.yml b/roles/lmod/tasks/main.yml index 9e2ac4af909db08388e570ca586bec83e0889118..e33c2ee5664f527213be4c64f09f882fb9cb3957 100644 --- a/roles/lmod/tasks/main.yml +++ b/roles/lmod/tasks/main.yml @@ -12,8 +12,7 @@ - gcc - lua-devel become: true - when: - - '"CentOS" in ansible_distribution' + when: ansible_os_family == 'RedHat' - name: install lua RHEL7 yum: name={{ item }} state=installed update_cache=yes enablerepo="Monash_University_EPEL7_EPEL_7_-_x86_64" @@ -30,18 +29,8 @@ - '"RedHat" in ansible_distribution' become: true - - name: install lua debian - apt: name={{ item }} state=installed - with_items: - - lua5.2 - - lua5.2 - - lua-filesystem - - lua-bitop - - lua-posix - - liblua5.2-0 - - liblua5.2-dev - - tcl + apt: name=lmod state=installed become: true when: ansible_os_family == 'Debian' @@ -49,13 +38,12 @@ stat: path="{{ soft_dir }}/lmod/{{ lmod_version }}" register: lmodstat - - name: Download LMOD get_url: url=http://consistency0/src/Lmod-{{ lmod_version }}.tar.bz2 dest={{ source_dir }}/Lmod-{{ lmod_version }}.tar.bz2 mode=0444 - when: not lmodstat.stat.exists + when: ansible_os_family == 'RedHat' and not lmodstat.stat.exists - name: Uncompress LMOD unarchive: @@ -63,10 +51,11 @@ dest={{ source_dir }} copy=no creates={{ source_dir }}/Lmod-{{ lmod_version }}/README - when: not lmodstat.stat.exists + when: ansible_os_family == 'RedHat' and not lmodstat.stat.exists - name: Compile and install Lmod shell: cd {{ source_dir }}/Lmod-{{ lmod_version }}; ./configure --prefix={{ soft_dir }} --with-mpathSearch=YES --with-caseIndependentSorting=YES && make install LUA_INCLUDE={{ lua_include }} args: creates: "{{ soft_dir }}/lmod/{{ lmod_version }}" become: true + when: ansible_os_family == 'RedHat' \ No newline at end of file diff --git a/roles/mysql/tasks/Centos_7_mysql_server.yml b/roles/mysql/tasks/Centos_7_mysql_server.yml new file mode 100644 index 0000000000000000000000000000000000000000..33f65d3d5eecdc877103b3ba9fa656588b1e7b37 --- /dev/null +++ b/roles/mysql/tasks/Centos_7_mysql_server.yml @@ -0,0 +1,57 @@ +--- +- name: Make sure OS is updated since apt install might fail + apt: + update_cache: yes + become: true + when: ansible_os_family == "Debian" + +- name: "Installing MySQL Debian" + apt: name="{{ server_packages }}" update_cache=yes state=present + become: true + when: ansible_os_family == "Debian" + +- name: Installing MySQL RedHat + yum: name={{ item }} + with_items: "{{ server_packages }}" + become: true + when: ansible_os_family == "RedHat" + +- name: make sure mysql conf directory exists + file: dest=/etc/mysql/conf.d state=directory + become: true + register: mysqldb_confdir_create + +- name: "Starting MySQL" + service: name={{ sqlServiceName }} state=started enabled=true + become: true + +#- name: "Adding root" +# become: true +# mysql_user: name=root host="{{ item }}" password="{{ mysql_root_password }}" login_user=root login_password="{{ mysql_root_password }}" check_implicit_admin=yes +# with_items: +# - "{{ ansible_hostname }}" +# - 127.0.0.1 +# - ::1 +# - localhost + +- name: Check that the slurm_acct_db_directory exists + stat: + path: /var/lib/mysql/slurm_acct_db/ #defined in /vars/filesystems.yaml + register: slurm_acct_db_directory_result + +# this will only work if a completely fresh db gets installed because it gets shipped with a blank root pw +- name: update mysql root password for all root accounts + mysql_user: name=root host=localhost password={{ mysql_root_password }} login_user=root + when: not slurm_acct_db_directory_result.stat.exists and mysqldb_confdir_create.changed + +- name: "Adding user database" + mysql_db: name={{ mysql_user_db_name }} state=present login_user=root login_password={{ mysql_root_password }} + +- name: "Giving priviliges to user" + mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present + when: mysql_user_host is defined + +- name: "Giving priviliges to user" + mysql_user: name={{ mysql_user_name }} host={{ hostvars[item].ansible_fqdn }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present + with_items: "{{ mysql_user_hosts_group }}" + when: mysql_user_hosts_group is defined diff --git a/roles/mysql/tasks/Ubuntu_18_mysql_server.yml b/roles/mysql/tasks/Ubuntu_18_mysql_server.yml new file mode 100644 index 0000000000000000000000000000000000000000..e573a9187a9ffc76168341b23c37e04675d7c54b --- /dev/null +++ b/roles/mysql/tasks/Ubuntu_18_mysql_server.yml @@ -0,0 +1,54 @@ +--- +- name: Make sure OS is updated since apt install might fail + apt: + update_cache: yes + become: true + +- name: "Installing MySQL for Ubuntu" + apt: name="{{ server_packages }}" update_cache=yes state=present + become: true + +- name: Comment out bind address so it doesn't bind to 127.0.0.1 + replace: + path: /etc/mysql/mariadb.conf.d/50-server.cnf + regexp: '(.*bind.*)' + replace: '#\1' + become: true + +- name: make sure mysql conf directory exists + file: dest=/etc/mysql/conf.d state=directory + become: true + register: mysqldb_confdir_create + +- name: "Starting MySQL" + service: name={{ sqlServiceName }} state=started enabled=true + become: true + +- name: Check that the slurm_acct_db_directory exists + stat: + path: /var/lib/mysql/slurm_acct_db/ #defined in /vars/filesystems.yaml + register: slurm_acct_db_directory_result + +# this will only work if a completely fresh db gets installed because it gets shipped with a blank root pw +- name: update mysql root password for all root accounts + mysql_user: name=root host=localhost password={{ mysql_root_password }} login_user=root check_implicit_admin=yes + become: true + become_user: root + +- name: "Adding user database" + mysql_db: name={{ mysql_user_db_name }} state=present login_user=root login_password={{ mysql_root_password }} + become: true + become_user: root + +- name: "Giving priviliges to user" + mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present + when: mysql_user_host is defined + become: true + become_user: root + +- name: "Giving priviliges to user" + mysql_user: name={{ mysql_user_name }} host={{ hostvars[item].ansible_fqdn }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present + with_items: "{{ mysql_user_hosts_group }}" + when: mysql_user_hosts_group is defined + become: true + become_user: root \ No newline at end of file diff --git a/roles/mysql/tasks/main.yml b/roles/mysql/tasks/main.yml index fd7181ba5206b53ab92a9a0802a239a2f0b0fde2..29bd62272f9c7e68812d95caff8ff4105a31da0c 100644 --- a/roles/mysql/tasks/main.yml +++ b/roles/mysql/tasks/main.yml @@ -1,3 +1,4 @@ --- - include_vars: "{{ ansible_distribution }}_{{ ansible_distribution_major_version }}.yml" -- include: "{{ mysql_type }}.yml" +- include: "{{ ansible_distribution }}_{{ ansible_distribution_major_version }}_{{ mysql_type }}.yml" +- include: mysql_client.yml \ No newline at end of file diff --git a/roles/mysql/tasks/mysql_server.yml b/roles/mysql/tasks/mysql_server.yml index 5ad085830619f71689d367cf48f9d8bc230e0df0..33f65d3d5eecdc877103b3ba9fa656588b1e7b37 100644 --- a/roles/mysql/tasks/mysql_server.yml +++ b/roles/mysql/tasks/mysql_server.yml @@ -1,7 +1,12 @@ --- +- name: Make sure OS is updated since apt install might fail + apt: + update_cache: yes + become: true + when: ansible_os_family == "Debian" + - name: "Installing MySQL Debian" - apt: name="{{ item }}" update_cache=yes cache_valid_time=3600 state=present - with_items: "{{ server_packages }}" + apt: name="{{ server_packages }}" update_cache=yes state=present become: true when: ansible_os_family == "Debian" @@ -10,7 +15,7 @@ with_items: "{{ server_packages }}" become: true when: ansible_os_family == "RedHat" - + - name: make sure mysql conf directory exists file: dest=/etc/mysql/conf.d state=directory become: true diff --git a/roles/mysql/vars/Ubuntu_18.yml b/roles/mysql/vars/Ubuntu_18.yml new file mode 100644 index 0000000000000000000000000000000000000000..ecfd81694c1c68b36bf7d23905c7e2002465a1d6 --- /dev/null +++ b/roles/mysql/vars/Ubuntu_18.yml @@ -0,0 +1,15 @@ +server_packages: + - python + - python-dev + - libmariadb-dev + - python-pip + - libapache2-mod-wsgi + - python-mysql.connector + - mariadb-server + - python-mysqldb + +client_packages: + - python + - mariadb-client + +sqlServiceName: "mariadb" diff --git a/roles/nfs-client/tasks/main.yml b/roles/nfs-client/tasks/main.yml index 1a3ea5fd54a102c95ec8276b8e59d6187f19ac7d..23ac6d08e10c0927ac3c680bc9a2a349771d01fe 100644 --- a/roles/nfs-client/tasks/main.yml +++ b/roles/nfs-client/tasks/main.yml @@ -15,4 +15,13 @@ - nfs-utils-lib when: ansible_os_family == "RedHat" and ansible_distribution_major_version < "7" +- name: install dependencies nfs-common ubuntu + apt: + name: nfs-common + state: present + update_cache: yes + become: true + become_user: root + when: ansible_distribution == 'Ubuntu' + - include: mountFileSystem.yml diff --git a/roles/nfs-client/tasks/mountFileSystem.yml b/roles/nfs-client/tasks/mountFileSystem.yml index 80dc3cb332385fb6154fdef6ded63ca748a47689..41ecd052629ed1de9acd3d1953bde2836a7a2cbe 100644 --- a/roles/nfs-client/tasks/mountFileSystem.yml +++ b/roles/nfs-client/tasks/mountFileSystem.yml @@ -4,6 +4,6 @@ mount: name={{ item.name }} src="{{ item.ipv4 }}:{{ item.src }}" fstype={{ item.fstype }} opts={{ item.opts }} state=mounted with_items: "{{ nfsMounts }}" become: true - ignore_errors: true + ignore_errors: false register: firstMount when: nfsMounts is defined diff --git a/roles/nfs-common/tasks/aptPackages.yml b/roles/nfs-common/tasks/aptPackages.yml index d8e07d6195e9f1012970c375cc8b6c5c236570cc..5324005b6cad3b3f896c97320cb2d129ae0f3552 100644 --- a/roles/nfs-common/tasks/aptPackages.yml +++ b/roles/nfs-common/tasks/aptPackages.yml @@ -6,4 +6,3 @@ - nfs-kernel-server apt: "name={{ item }} state=present" become: true - diff --git a/roles/nfs-server/tasks/startServer.yml b/roles/nfs-server/tasks/startServer.yml index 7ac79c0fa9ad39b43463dc2a3c4f6e8b2f1e0304..a2e0cbea827fc4dcc251b05f997dcb75581d4d9e 100644 --- a/roles/nfs-server/tasks/startServer.yml +++ b/roles/nfs-server/tasks/startServer.yml @@ -29,7 +29,15 @@ become: true when: ansible_os_family == "RedHat" and ansible_distribution_major_version == "7" +- name: "Run exportfs" + command: /usr/sbin/exportfs -a + become: true + when: ansible_os_family == "Debian" + - name: "Start the Server" service: "name=nfs-kernel-server state=started enabled=true" become: true when: ansible_os_family == "Debian" + + + diff --git a/roles/slurm-common/files/scripts/nvidia-probe.py b/roles/slurm-common/files/scripts/nvidia-probe.py index 7fd743ef41b91c85842973e623e1cbfd9f3c6535..7bc00899e6f3416003aa8dea5c00519f3e78bf4c 100755 --- a/roles/slurm-common/files/scripts/nvidia-probe.py +++ b/roles/slurm-common/files/scripts/nvidia-probe.py @@ -1,4 +1,4 @@ -#!/bin/env python +#!/usr/bin/env python # prints a list of NIDIA devices and their type in json format for # parsing by ansible program; # fields are 'name':'gpu' (fixed) diff --git a/roles/slurm-common/tasks/installCgroup.yml b/roles/slurm-common/tasks/installCgroup.yml index c7f4253d3dfcb0540421c27249d7aee0a4920118..b97f820a904b3eab6bea660ac92db74d9906d0eb 100644 --- a/roles/slurm-common/tasks/installCgroup.yml +++ b/roles/slurm-common/tasks/installCgroup.yml @@ -9,9 +9,9 @@ - name: apt install cgroup apt: name={{ item }} state=installed update_cache=yes with_items: - - cgmanager - - cgmanager-utils - - libcgmanager0 + - libcgroup1 + - cgroupfs-mount + - cgroup-tools when: ansible_os_family == "Debian" become: True become_method: sudo diff --git a/roles/slurm-common/tasks/installMungeFromSource.yml b/roles/slurm-common/tasks/installMungeFromSource.yml index 656d35c9ff04a253224e44c9031e2c37c67c777e..a8f5496207dc643a1cc870c2fd6441f632e2bbd1 100644 --- a/roles/slurm-common/tasks/installMungeFromSource.yml +++ b/roles/slurm-common/tasks/installMungeFromSource.yml @@ -8,30 +8,35 @@ src: "http://consistency0/src/munge-{{ munge_version }}.tar.bz2" copy: no dest: /tmp - creates: /tmp/munge-{{ munge_version }}/configure + creates: /tmp/munge-munge-{{ munge_version }}/configure when: not munge_binary.stat.exists - name: build munge shell: ./configure --prefix={{ munge_dir }} && make args: - chdir: /tmp/munge-{{ munge_version }} - creates: /tmp/munge-{{ munge_version }}/src/munge/munge + chdir: /tmp/munge-munge-{{ munge_version }} + creates: /tmp/munge-munge-{{ munge_version }}/src/munge/munge when: not munge_binary.stat.exists - name: install munge shell: make install become: true args: - chdir: /tmp/munge-{{ munge_version }} + chdir: /tmp/munge-munge-{{ munge_version }} creates: "{{ munge_dir }}/bin/munge" when: not munge_binary.stat.exists -- name: set use_systemd +- name: set use_systemd Redhat set_fact: use_systemd: True when: (ansible_distribution == "CentOS" or ansible_distribution == "RedHat") and ( ansible_distribution_major_version == "7") +- name: set use_systemd Debian + set_fact: + use_systemd: True + when: ansible_os_family == "Debian" + - name: copy init script template: dest=/etc/init.d/munge src=munge.initd.j2 mode=755 become: true diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index d2351af627d7d6b32aa7d720d236c3a5139d84d5..99a64ff3d1fd22fff2ba2efed0aaeaf7c0eed961 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -44,13 +44,14 @@ with_items: - gcc - wget - - libssl-dev + - libssl-dev # downgrade needed for bionic see https://github.com/dun/munge/issues/54 - libpam0g-dev - libbz2-dev - make - perl - libdbi-perl - lua5.2 + - liblua5.2-dev - hwloc - libhwloc-dev when: ansible_os_family == "Debian" diff --git a/roles/slurm-mysql-config/tasks/main.yml b/roles/slurm-mysql-config/tasks/main.yml index 52f06b184ac0f5487e09b633a97b2db40e712f2a..6be48e8ad7c042b24912166da59b7c9b5b21ab2d 100644 --- a/roles/slurm-mysql-config/tasks/main.yml +++ b/roles/slurm-mysql-config/tasks/main.yml @@ -2,3 +2,10 @@ template: src=slurm.cnf.j2 dest=/etc/my.cnf.d/slurm.cnf become: true become_user: root + when: ansible_os_family == "RedHat" + +- name: "Copy slurm db tuning config" + template: src=slurm.cnf.j2 dest=/etc/mysql/mariadb.conf.d/slurm.cnf + become: true + become_user: root + when: ansible_os_family == "Debian" \ No newline at end of file diff --git a/roles/upgrade/tasks/main.yml b/roles/upgrade/tasks/main.yml index 85255ce6e8c1be524eb118ff7846767f1304d8f8..09d633ffbb6e18bbbfe4fb87e6579c77a2efdf8b 100644 --- a/roles/upgrade/tasks/main.yml +++ b/roles/upgrade/tasks/main.yml @@ -6,10 +6,10 @@ become_user: root when: ansible_os_family=="Debian" -- name: apt-get upgrade - apt: upgrade=safe - become: true - when: ansible_os_family=="Debian" +#- name: apt-get upgrade +# apt: upgrade=safe +# become: true +# when: ansible_os_family=="Debian" - name: yum remove yum: name=ipa-client-common state=absent