diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9e7c54b267f026e01a18a831b91004881d946807..a94a6344c01602b666816d956d507327fdb7cf29 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -217,4 +217,4 @@ clean: script: - source ./$NECTAR_ALLOCATION-openrc.sh - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME - when: manual \ No newline at end of file + #when: manual \ No newline at end of file diff --git a/CICD/heat/gc_HOT.yaml b/CICD/heat/gc_HOT.yaml index 507489a8fbe9aa3d552fd04eae6ede3a08b50cda..71e9081af114525e8b8d92c537dd99a7b7c71fc0 100644 --- a/CICD/heat/gc_HOT.yaml +++ b/CICD/heat/gc_HOT.yaml @@ -13,7 +13,7 @@ parameters: type: string label: Image ID description: Centos Image - default: 12da1997-5122-4be3-a2a9-2f44961c1b16 + default: c47c3acb-9657-4243-9e14-e6c676157e3b #with NetworkManager ssh_key: type: string default: gc_key @@ -104,7 +104,7 @@ resources: name: list_join: [ '-', [ { get_param: "OS::stack_name" }, 'sql0' ]] availability_zone: { get_param: avz } - flavor: t3.small + flavor: m3.small image: { get_param: centos_7_image_id } key_name: { get_param: ssh_key } security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: MySQLSecGroup }, { get_resource: NFSSecGroup } ] @@ -167,15 +167,15 @@ resources: security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup }, { get_resource: MySQLSecGroup } ] project_name: { get_param: project_name } - LoginNodes: + LoginNodesC: type: "OS::Heat::ResourceGroup" properties: - count: 0 + count: 1 resource_def: type: "OS::Nova::Server" properties: #availability_zone: { get_param: avz } - flavor: t3.small + flavor: m3.xsmall image: { get_param: centos_7_image_id } key_name: { get_param: ssh_key } name: @@ -191,7 +191,7 @@ resources: LoginNodesU: type: "OS::Heat::ResourceGroup" properties: - count: 0 + count: 1 resource_def: type: "OS::Nova::Server" properties: @@ -217,7 +217,7 @@ resources: type: "OS::Nova::Server" properties: #availability_zone: { get_param: avz } - flavor: t3.small + flavor: m3.xsmall image: { get_param: centos_7_image_id } key_name: { get_param: ssh_key } name: @@ -230,7 +230,7 @@ resources: networks: - network: { get_param: NetID } - ComputeNodes: + ComputeNodesU: type: "OS::Heat::ResourceGroup" properties: count: 1 @@ -238,7 +238,7 @@ resources: type: "OS::Nova::Server" properties: #availability_zone: { get_param: avz } - flavor: t3.small + flavor: m3.xsmall image: { get_param: ubuntu_1804_image_id } key_name: { get_param: ssh_key } name: @@ -280,7 +280,7 @@ resources: type: "OS::Nova::Server" properties: #availability_zone: { get_param: avz } - flavor: t3.small + flavor: m3.xsmall image: { get_param: ubuntu_1804_image_id } key_name: { get_param: ssh_key } name: diff --git a/CICD/heat/mgmtnode_HOT.yaml b/CICD/heat/mgmtnode_HOT.yaml index d4f9858acef22bf8237d4833f6ab353cb050db3c..d44cf911c1dcc0909330d6f9455500fc28b6ec88 100644 --- a/CICD/heat/mgmtnode_HOT.yaml +++ b/CICD/heat/mgmtnode_HOT.yaml @@ -25,7 +25,7 @@ resources: type: OS::Nova::Server properties: #availability_zone: { get_param: avz } - flavor: t3.xsmall + flavor: m3.xsmall image: { get_param: image } key_name: { get_param: ssh_key } security_groups: { get_param: security_groups } diff --git a/CICD/vars/vars.yml b/CICD/vars/vars.yml index d1dc95cfbdf366fc202881732465d8f55bbda36b..83485426b7e370a91d2fd15a5083156c483a1f4e 100644 --- a/CICD/vars/vars.yml +++ b/CICD/vars/vars.yml @@ -3,6 +3,17 @@ sudo_group: systems nagios_home: "/var/lib/nagios" nvidia_version: "390.46" +yumdisablerepo: + - 'base' + - 'extras' + - 'updates' +yumenablerepo: + - 'monashhpc_base' + - 'monashhpc_updates' + - 'monashhpc_extras' + - 'monashhpc_centosplus' + - 'monashhpc_otherstuff' + gpumap: 'K1': 'K1' 'K80': 'K80' diff --git a/plays/allnodes.yml b/plays/allnodes.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac098f4dd1496fd05c7ce869b09ab54144cd307b --- /dev/null +++ b/plays/allnodes.yml @@ -0,0 +1,47 @@ +- hosts: 'all' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + tasks: + - { name: set use shared state, set_fact: usesharedstatedir=False } + - { name: set hostgroup, set_fact: hostgroup='ComputeNodes' } + tags: [ always ] + +- hosts: 'all' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + strategy: free + roles: +# - { role: disable_selinux, tags: [ disableselinux ] } + - { role: upgrade } + - { role: set_password } + - { role: etcHosts, tags: [ networking ] } +# - { role: config_repos, tags: [ repos ] } + +- hosts: 'DesktopNodes,ComputeNodes,LoginNodes,ManagementNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + strategy: free + roles: + - { role: disable_selinux, tags: [ disableselinux ] } + - { role: ldapclient, tags: [ authentication ] } + - { role: ssh-password-login, tags: [ authentication ] } + - { role: enable_sudo_group, tags: [ authentication, sudo ] } + - { role: move_homedir } + - { role: calculateKnownHosts, tags: [ calculateKnownHosts ] } + - { role: SSHKnownHosts, tags: [ known_hosts ] } + - { role: jasons_ssh_ca, tags: [ ssh_ca ] } diff --git a/plays/computenodes.yml b/plays/computenodes.yml new file mode 100644 index 0000000000000000000000000000000000000000..208ad954f57c479461c4270b69abefe20384c468 --- /dev/null +++ b/plays/computenodes.yml @@ -0,0 +1,64 @@ + +- hosts: 'DesktopNodes,ComputeNodes,LoginNodes,VisNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + tasks: + - { name: set use shared state, set_fact: usesharedstatedir=False } + tags: [ always ] + +- hosts: 'DesktopNodes,ComputeNodes,LoginNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + strategy: free + roles: + - { role: move_homedir, tags: [ authentication, filesystems ] } + - { role: nfs-client, nfsMounts: "{{ computeNfsMounts }}", tags: [ filesystems ] } + - { role: slurm-common, tags: [ slurm, slurm-common ] } + - { role: lmod, tags: [ other ] } + - { role: enable_modules, default_modules: "lmod", tags: [ other ] } + - { role: postfix, tags: [ mail, other ] } + +- hosts: 'VisNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + roles: + - { role: gpu, tags: [ gpu ] } + +- hosts: 'DesktopNodes,ComputeNodes,LoginNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + roles: + - { role: slurm_config, tags: [slurm, slurm_config] } + +- hosts: 'DesktopNodes,ComputeNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + strategy: free + roles: + - { role: slurm-start, start_slurmd: True, tags: [ slurm, slurmstart ] } + - { role: mate-de-install, tags: [ mate-de-install ] } # TODO this crashes for everything except cmca diff --git a/plays/files b/plays/files new file mode 120000 index 0000000000000000000000000000000000000000..feb122881ce2321d72ad6b867bd2a3d01eadaac3 --- /dev/null +++ b/plays/files @@ -0,0 +1 @@ +../files \ No newline at end of file diff --git a/plays/init_slurmconf.yml b/plays/init_slurmconf.yml new file mode 100644 index 0000000000000000000000000000000000000000..30667ac53b5b6c387af0bdacb609f09cc8bfa5c3 --- /dev/null +++ b/plays/init_slurmconf.yml @@ -0,0 +1,15 @@ +--- +- hosts: 'all' + tasks: + - include_vars: vars/passwords.yml + - include_vars: vars/names.yml + - include_vars: vars/ldapConfig.yml + - include_vars: vars/filesystems.yml + - include_vars: vars/slurm.yml + - include_vars: vars/vars.yml +- hosts: 'all' + tasks: + - { name: setup, setup: } +- hosts: 'ManagementNodes' + roles: + - { role: calculateSlurmConf } diff --git a/plays/make_files.yml b/plays/make_files.yml new file mode 100644 index 0000000000000000000000000000000000000000..b05925ce73f9be136bb46128961990b938c07910 --- /dev/null +++ b/plays/make_files.yml @@ -0,0 +1,22 @@ +--- +# just calculates an etc hosts +- hosts: 'all' + tasks: + - include_vars: vars/passwords.yml + - include_vars: vars/names.yml + - include_vars: vars/ldapConfig.yml + - include_vars: vars/filesystems.yml + - include_vars: vars/slurm.yml + - include_vars: vars/vars.yml +- hosts: 'all' + tasks: + - { name: setup, setup: } +- hosts: 'ManagementNodes' + roles: + - { role: calculateEtcHosts } + +#- hosts: 'NFSNodes' +# roles: +# - { role: calculateExports } + + diff --git a/plays/mgmtnodes.yml b/plays/mgmtnodes.yml new file mode 100644 index 0000000000000000000000000000000000000000..5d4241194324fe13739e074b4ee749c969935dfb --- /dev/null +++ b/plays/mgmtnodes.yml @@ -0,0 +1,44 @@ +# Basic stuff to make the nodes functionl +# i.e. upgrade operating systems, etc +# + +- hosts: 'ManagementNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + tasks: + # - { name: set hostgroup, set_fact: hostgroup='ManagementNodes' } + - { name: set use shared state, set_fact: usesharedstatedir=True } + tags: [ always ] + +- hosts: 'ManagementNodes' + strategy: free + gather_facts: False + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + roles: +# - { role: ldapclient, tags: [ authentication ] } +# - { role: ssh-password-login } +# - { role: enable_sudo_group } +# - { role: make_filesystems, volumes: "{{ glustervolumes }}" } +# - { role: gluster_server, volname: "gv", brickmnt: '/gbrick', gluster_servers: "{{ groups['ManagementNodes'] }}", replicas: 2, tags: [ gluster_server ] } +# - { role: gluster_volcreate, volname: "gv", gluster_servers: "{{ groups['ManagementNodes'] }}", brickmnt: '/gbrick', replicas: 2 } +# - { role: gluster_client, volname: "gv", gluster_servers: ['mgmt0','mgmt1','sql0'], volmnt: '/glusterVolume' } + - { role: nfs-client, nfsMounts: "{{ mgmtNfsMounts }}", tags: [ nfs ] } + - { role: slurmdb-config, tags: [ slurm, slurmdb-config ] } + - { role: slurm-common, tags: [ slurm, slurm-common ] } + - { role: slurm_config, tags: [ slurm, slurm-config ] } + - { role: slurm-start, start_slurmdbd: True, start_slurmctld: True, tags: [ slurm-start ] } + - { role: telegraf, tags: [ monitoring ] } +# - { role: provision_slurm, use_active_directory: False, lockpath: "/mnt/home", tags: [ slurm ] } +# - { role: provision_homedir, use_active_directory: False, mntpt: "/mnt/home", tags: [ provisioning ] } + diff --git a/plays/nfssqlnodes.yml b/plays/nfssqlnodes.yml new file mode 100644 index 0000000000000000000000000000000000000000..30b3b1ed1d6ddab06d6b538757ef636538338082 --- /dev/null +++ b/plays/nfssqlnodes.yml @@ -0,0 +1,84 @@ +# Role to initialize nfs and SQL Nodes +# +# + +- hosts: 'all' + tasks: + - { name: setup, setup: } + tags: [ always ] + +#we need this here to gather facts and fill required variables. +- hosts: 'ManagementNodes' + gather_facts: True + tasks: + - include_vars: vars/passwords.yml + - include_vars: vars/names.yml + - include_vars: vars/ldapConfig.yml + - include_vars: vars/filesystems.yml + - include_vars: vars/slurm.yml + - include_vars: vars/vars.yml + - { name: set hostgroup, set_fact: hostgroup='ManagementNodes' } + - { name: set use shared state, set_fact: usesharedstatedir=True } + tags: [ always ] + +- hosts: 'SQLNodes,NFSNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + pre_tasks: + - { name: set hostgroup, set_fact: hostgroup='SQLNodes', tags: [ always ] } + - { name: set use shared state, set_fact: usesharedstatedir=True, tags: [ always ] } + +- hosts: 'SQLNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + strategy: free + gather_facts: True + roles: + - { role: upgrade, tags: [ upgrade ] } + - { role: make_filesystems, volumes: "{{ dbvolumes }}" } + - { role: mysql, mysql_type: mysql_server, mysql_root_password: "{{ sqlrootPasswd }}", mysql_user_name: slurmdb, mysql_user_db_name: slurm_acct_db, mysql_user_hosts_group: "{{ groups['ManagementNodes'] }}", mysql_user_password: "{{ slurmdb_passwd }}", tags: [ database ] } + - { role: slurm-mysql-config, tags: [database,slurmdb] } + tags: [ sql ] + +- hosts: 'NFSNodes' + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + strategy: free + gather_facts: False + roles: + - { role: make_filesystems, volumes: "{{ nfsvolumes }}" } + tasks: + - { name: make homedir, file: { path: /nfsvol/home, state: directory }, become: true, become_user: root } + - { name: make usr_local, file: { path: /nfsvol/usr_local_centos7, state: directory }, become: true, become_user: root } + - { name: make projects, file: { path: /nfsvol/projects, state: directory }, become: true, become_user: root } + - { name: make projects, file: { path: /nfsvol/scratch, state: directory }, become: true, become_user: root } + tags: [ nfs ] + +- hosts: 'NFSNodes' + strategy: free + gather_facts: False + vars_files: + - vars/passwords.yml + - vars/names.yml + - vars/ldapConfig.yml + - vars/filesystems.yml + - vars/slurm.yml + - vars/vars.yml + roles: + - { role: nfs-server } + tags: [ nfs ] diff --git a/plays/roles b/plays/roles new file mode 120000 index 0000000000000000000000000000000000000000..d8c4472ca1b65cea039252e137ff3b4ab5d3a555 --- /dev/null +++ b/plays/roles @@ -0,0 +1 @@ +../roles \ No newline at end of file diff --git a/plays/vars b/plays/vars new file mode 120000 index 0000000000000000000000000000000000000000..e8d9a6429b3aaab679b98557469104f0f7cc952b --- /dev/null +++ b/plays/vars @@ -0,0 +1 @@ +../vars \ No newline at end of file diff --git a/roles/calculateEtcHosts/tasks/main.yml b/roles/calculateEtcHosts/tasks/main.yml index ff71a956bb33175ab3ebe2227ed0b13a55165746..1d1c617050cd33d1fbf3ffe17b3b5696ebc2c254 100644 --- a/roles/calculateEtcHosts/tasks/main.yml +++ b/roles/calculateEtcHosts/tasks/main.yml @@ -8,7 +8,7 @@ - name: make hosts data command: /tmp/makehosts.py /tmp/groups {{ domain }} register: hosts_data - + - name: write hosts file template: dest=/tmp/etcHosts src=etcHosts.j2 diff --git a/roles/calculateEtcHosts/templates/etcHosts.j2 b/roles/calculateEtcHosts/templates/etcHosts.j2 index 590826feef324307abcff9aa6f8fcbaccfee30e0..88e5044c73072be51b55b3a34b7d565d4c11a313 100644 --- a/roles/calculateEtcHosts/templates/etcHosts.j2 +++ b/roles/calculateEtcHosts/templates/etcHosts.j2 @@ -4,8 +4,6 @@ fe00::0 ip6-localnet ff00::0 ip6-mcastprefix ff02::1 ip6-allnodes ff02::2 ip6-allrouters -118.138.241.196 hpcldap0.erc.monash.edu.au -118.138.244.7 consistency0 {% for item in hosts_data.stdout_lines %} {{ item }} diff --git a/roles/config_repos/defaults/main.yml b/roles/config_repos/defaults/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..a5ac4ab98ab70895b62ab4c1140122007b29df96 --- /dev/null +++ b/roles/config_repos/defaults/main.yml @@ -0,0 +1,5 @@ +--- +# default variables for config_repos + +reposervername: consistency0 +reposerverip: 118.138.244.7 diff --git a/roles/config_repos/tasks/main.yml b/roles/config_repos/tasks/main.yml index 51ee0dc16f0296af019929aeaeb321f9fa9cf234..9dfc27b250dc04d4316cd4463ff5d857369a2f4e 100644 --- a/roles/config_repos/tasks/main.yml +++ b/roles/config_repos/tasks/main.yml @@ -1,11 +1,12 @@ --- - name: make sure out repo server is resolvable - lineinfile: - dest: /etc/hosts - line: "118.138.244.7 consistency0" + lineinfile: + dest: /etc/hosts + line: "{{ reposerverip }} {{ reposervername }}" #this is duplicated in the role calculateEtcHosts owner: root group: root + become: True when: ansible_os_family == 'RedHat' #- name: remove default repos @@ -32,7 +33,7 @@ # when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == "7" - name: add our repos - copy: src={{ item }} dest=/etc/yum.repos.d/{{ item }} + template: src={{ item }}.j2 dest=/etc/yum.repos.d/{{ item }} become: true when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == "7" with_items: @@ -52,7 +53,7 @@ - name: disable unwanted repos shell: yum-config-manager --disable "{{ item }}" - with_items: "{{repolist.stdout_lines|difference(yumenablerepo)}}" + with_items: "{{ repolist.stdout_lines|difference(yumenablerepo) }}" become: true become_user: root ignore_errors: true diff --git a/roles/config_repos/files/epel.repo b/roles/config_repos/templates/epel.repo.j2 similarity index 70% rename from roles/config_repos/files/epel.repo rename to roles/config_repos/templates/epel.repo.j2 index 053ed43c62542e860cdb16660bdab1918b90fd7d..4b1017bf11cd106d2f54fb47bc7accb2ab19b28b 100644 --- a/roles/config_repos/files/epel.repo +++ b/roles/config_repos/templates/epel.repo.j2 @@ -2,7 +2,7 @@ [epel] name=Extra Packages for Enterprise Linux 7 - $basearch -baseurl=https://consistency0/epel/$releasever/$basearch/ +baseurl=https://{{ reposervername }}/epel/$releasever/$basearch/ enabled=0 gpgcheck=0 sslverify=false diff --git a/roles/config_repos/files/glusterfs-epel.repo b/roles/config_repos/templates/glusterfs-epel.repo.j2 similarity index 100% rename from roles/config_repos/files/glusterfs-epel.repo rename to roles/config_repos/templates/glusterfs-epel.repo.j2 diff --git a/roles/config_repos/files/monashhpc_base.repo b/roles/config_repos/templates/monashhpc_base.repo.j2 similarity index 64% rename from roles/config_repos/files/monashhpc_base.repo rename to roles/config_repos/templates/monashhpc_base.repo.j2 index dfbb0a77a5110c72150ecc7e0481368487e2abc4..0f0cd119928b6e6803e491a495ff625b9fc54cf9 100644 --- a/roles/config_repos/files/monashhpc_base.repo +++ b/roles/config_repos/templates/monashhpc_base.repo.j2 @@ -2,24 +2,24 @@ [monashhpc_base] name=MonashHPC base repository mirrored to control the update process -baseurl=https://consistency0/centos/$releasever/os/$basearch/ +baseurl=https://{{ reposervername }}/centos/$releasever/os/$basearch/ enabled=1 sslverify=false [monashhpc_updates] name=MonashHPC base repository mirrored to control the update process -baseurl=https://consistency0/centos/$releasever/updates/$basearch/ +baseurl=https://{{ reposervername }}/centos/$releasever/updates/$basearch/ enabled=1 sslverify=false [monashhpc_extras] name=MonashHPC base repository mirrored to control the update process -baseurl=https://consistency0/centos/$releasever/extras/$basearch/ +baseurl=https://{{ reposervername }}/centos/$releasever/extras/$basearch/ enabled=1 sslverify=false [monashhpc_centosplus] name=MonashHPC base repository mirrored to control the update process -baseurl=https://consistency0/centos/$releasever/centosplus/$basearch/ +baseurl=https://{{ reposervername }}/centos/$releasever/centosplus/$basearch/ enabled=1 sslverify=false diff --git a/roles/config_repos/files/monashhpc_others.repo b/roles/config_repos/templates/monashhpc_others.repo.j2 similarity index 70% rename from roles/config_repos/files/monashhpc_others.repo rename to roles/config_repos/templates/monashhpc_others.repo.j2 index e78702bf53f5fe0a1284c0474aac75bba615aabd..bd0b06270231095c8fa52cdffdb65f6b1f1202a7 100644 --- a/roles/config_repos/files/monashhpc_others.repo +++ b/roles/config_repos/templates/monashhpc_others.repo.j2 @@ -2,7 +2,7 @@ [monashhpc_otherstuff] name=MonashHPC base repository mirrored to control the update process -baseurl=https://consistency0/centos/hpcsystems/$releasever/$basearch/ +baseurl=https://{{ reposervername }}/centos/hpcsystems/$releasever/$basearch/ enabled=1 sslverify=false gpgcheck=0 diff --git a/roles/cron-access/tasks/main.yml b/roles/cron-access/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..09e97aea34682a7e66782f3f2e98389ffa3f892a --- /dev/null +++ b/roles/cron-access/tasks/main.yml @@ -0,0 +1,8 @@ +- name: Adding pamd access for users who can run cron jobs + lineinfile: + path: /etc/security/access.conf + state: present + insertbefore: '^-:ALL EXCEPT root systems ec2-user debian ubuntu admin :ALL' + line: '+: cron-users : cron crond :0' + become: true + become_user: root diff --git a/roles/set_timezone/README.md b/roles/set_timezone/README.md new file mode 100644 index 0000000000000000000000000000000000000000..43cb094b917b7d2cab5ec357b4de67fa9496ba7c --- /dev/null +++ b/roles/set_timezone/README.md @@ -0,0 +1,13 @@ +This role sets the timezone on the desired server. +- installs a templated file into /etc/ntp.conf. The variable NTP_SERVER sets the ntp server + - NTP_SERVER defaults to ntp.monash.edu.au +- starts and enables the ntpd process +- Makes a link from /etc/localtime state=link to path defined by Variable TIMEZONE_PATH + - TIMEZONE_PATH defaults to /usr/share/zoneinfo/Australia/Melbourne + + +Example of use +- { role: set_timezone } #sets to Melbourne time +- { role: set_timezone, TIMEZONE_PATH: "/usr/share/zoneinfo/Australia/Perth" } #sets to Perth time +- { role: set_timezone, TIMEZONE_PATH: "/usr/share/zoneinfo/Australia/Perth", NTP_SERVER: "time.google.com" } #sets to Perth time and using google ntp server + diff --git a/roles/set_timezone/tasks/main.yml b/roles/set_timezone/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..177969103af146ee970584e774bf2d4731209e77 --- /dev/null +++ b/roles/set_timezone/tasks/main.yml @@ -0,0 +1,20 @@ +--- +- name: install ntp.conf + template: src=ntp.conf.j2 dest=/etc/ntp.conf mode=644 owner=root group=root + become: true + become_user: root + +- name: restart ntpd + service: name=ntpd state=restarted + become: true + become_user: root + +- name: ensure ntpd is enabled and started + service: name=ntpd state=started enabled=yes + become: true + become_user: root + +- name: set local timezone + file: path=/etc/localtime state=link src={{ TIMEZONE_PATH }} + become: true + become_user: root diff --git a/roles/set_timezone/templates/ntp.conf.j2 b/roles/set_timezone/templates/ntp.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..2717f9824e405f016299c2c292aeafd6dfe1cccb --- /dev/null +++ b/roles/set_timezone/templates/ntp.conf.j2 @@ -0,0 +1,55 @@ +# For more information about this file, see the man pages +# ntp.conf(5), ntp_acc(5), ntp_auth(5), ntp_clock(5), ntp_misc(5), ntp_mon(5). + +driftfile /var/lib/ntp/drift + +# Permit time synchronization with our time source, but do not +# permit the source to query or modify the service on this system. +restrict default nomodify notrap nopeer noquery + +# Permit all access over the loopback interface. This could +# be tightened as well, but to do so would effect some of +# the administrative functions. +restrict 127.0.0.1 +restrict ::1 + +# Hosts on local network are less restricted. +#restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap + +# Use public servers from the pool.ntp.org project. +# Please consider joining the pool (http://www.pool.ntp.org/join.html). +server {{ NTP_SERVER }} + +#broadcast 192.168.1.255 autokey # broadcast server +#broadcastclient # broadcast client +#broadcast 224.0.1.1 autokey # multicast server +#multicastclient 224.0.1.1 # multicast client +#manycastserver 239.255.254.254 # manycast server +#manycastclient 239.255.254.254 autokey # manycast client + +# Enable public key cryptography. +#crypto + +includefile /etc/ntp/crypto/pw + +# Key file containing the keys and key identifiers used when operating +# with symmetric key cryptography. +keys /etc/ntp/keys + +# Specify the key identifiers which are trusted. +#trustedkey 4 8 42 + +# Specify the key identifier to use with the ntpdc utility. +#requestkey 8 + +# Specify the key identifier to use with the ntpq utility. +#controlkey 8 + +# Enable writing of statistics records. +#statistics clockstats cryptostats loopstats peerstats + +# Disable the monitoring facility to prevent amplification attacks using ntpdc +# monlist command when default restrict does not include the noquery flag. See +# CVE-2013-5211 for more details. +# Note: Monitoring will not be disabled with the limited restriction flag. +disable monitor diff --git a/roles/set_timezone/vars/main.yml b/roles/set_timezone/vars/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..65f16b25f13e746fe3185f2f9789cf09367b79ae --- /dev/null +++ b/roles/set_timezone/vars/main.yml @@ -0,0 +1,3 @@ +--- +TIMEZONE_PATH: "/usr/share/zoneinfo/Australia/Melbourne" +NTP_SERVER: "ntp.monash.edu.au" diff --git a/roles/telegraf/files/telegraf_slurmstats.py b/roles/telegraf/files/telegraf_slurmstats.py new file mode 100644 index 0000000000000000000000000000000000000000..962a2080abbeb1a5532045bce4beb5a51f0033b7 --- /dev/null +++ b/roles/telegraf/files/telegraf_slurmstats.py @@ -0,0 +1,31 @@ +#!/usr/bin/python + +class SlurmStats: + def __init__(self): + self._values = {'backfill':1} + + + def values(self): + values = ",".join(["{}={}".format(key,value) for key,value in self._values.items()]) + return values + + +def print_stats(stats): + print("slurmstats {}".format(stats.values())) + +def get_stats(): + import subprocess + p = subprocess.Popen(['sdiag'],stdout = subprocess.PIPE,stderr=subprocess.PIPE) + (stdout,stderr) = p.communicate() + for l in stdout.decode().splitlines(): + if "Total backfilled jobs (since last stats cycle start):" in l: + v = l.split(':')[1] + stats = SlurmStats() + stats._values = {'backfill':int(v)} + return stats + + + +slurmstats = get_stats() +print_stats(slurmstats) + diff --git a/roles/telegraf/tasks/main.yml b/roles/telegraf/tasks/main.yml index f6382e35dcd3345c58d50dc935e904fc58ac990e..13701898cd1ae4c091aa148bc8928d091834b0b6 100644 --- a/roles/telegraf/tasks/main.yml +++ b/roles/telegraf/tasks/main.yml @@ -40,6 +40,15 @@ become: true become_user: root +- name: copy slurmstats plugin + copy: + mode: 'u=rwx,g=rx,o=rx' + src: telegraf_slurmstats.py + dest: '/opt/telegraf/bin/telegraf_slurmstats.py' + become: true + become_user: root + + - name: Install Telegraf config template: diff --git a/roles/telegraf/templates/telegraf.conf.j2 b/roles/telegraf/templates/telegraf.conf.j2 index 3c27c4b4db028d14d70221f85ce64288f21678a4..52c2fae9c6f193000249ccd1bdcc64574b72d103 100644 --- a/roles/telegraf/templates/telegraf.conf.j2 +++ b/roles/telegraf/templates/telegraf.conf.j2 @@ -67,6 +67,17 @@ # user_agent = "telegraf" # Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) # udp_payload = 512 + [outputs.influxdb.tagdrop] + influxdb_database = ["*"] + +[[outputs.influxdb]] + urls = ["{{ influxdb_server }}"] # required + database = "slurm" # required + precision = "s" + username = "{{ influxdb_user }}" + password = "{{ influxdb_password }}" + [outputs.influxdb.tagpass] + influxdb_database = ["slurm"] ############################################################################### @@ -123,6 +134,19 @@ timeout="4s" interval="300s" +# Both Slurm ManagementNodes will log sdiag stats, but no Compute or Login nodes will +{% if 'ManagementNodes' in group_names %} +[[inputs.exec]] + commands = [ + "/opt/telegraf/bin/telegraf_slurmstats.py" + ] + data_format = "influx" + timeout="4s" + interval="60s" + [inputs.exec.tags] + influxdb_database="slurm" +{% endif %} + ############################################################################### # SERVICE INPUTS # diff --git a/scripts/make_inventory.py b/scripts/make_inventory.py index 9e7997bcf72dc43b633fa3ae53d979bdea2acd06..48bd21d85e1a7314d0982d062227c33ac2b87783 100755 --- a/scripts/make_inventory.py +++ b/scripts/make_inventory.py @@ -34,9 +34,11 @@ def gatherInfo(md_key,md_value,authDict,project_id,inventory): if groupName not in inventory: inventory[groupName] = [] inventory[groupName].append(hostname) # Add other metadata + if not hostname in inventory['_meta']['hostvars']: + inventory['_meta']['hostvars'][hostname] = {} for md in server.metadata.items(): if md[0] not in (md_key,'ansible_host_groups'): - inventory['_meta']['hostvars'][hostname] = { md[0]:md[1] } + inventory['_meta']['hostvars'][hostname].update({ md[0]:md[1] }) if novaVolumes: volDict = {} for volume in novaVolumes: @@ -51,6 +53,8 @@ def gatherInfo(md_key,md_value,authDict,project_id,inventory): for nn in server.networks.keys(): if 'internal' in nn: network_name = nn + else: + inventory['_meta']['hostvars'][hostname]['public_host'] = server.networks[nn][0] if network_name == None: network_name = list(server.networks.keys())[0]