diff --git a/roles/allow_stale_nfs/tasks/main.yml b/roles/allow_stale_nfs/tasks/main.yml index 391c34dcb35fda959d8babfa848eb8cf6b820566..7541b935c2239e9ea257d02f63e718c364bfe82d 100644 --- a/roles/allow_stale_nfs/tasks/main.yml +++ b/roles/allow_stale_nfs/tasks/main.yml @@ -8,6 +8,15 @@ become: true become_user: root +- name: remove old line + lineinfile: + args: + dest: "/etc/profile" + regexp: "^PATH=/usr/local/bin:/bin:/usr/bin$" + state: absent + become: true + become_user: root + - name: remove /usr/local/ from the PATH in /etc/profile lineinfile: args: diff --git a/roles/etcHosts/tasks/main.yml b/roles/etcHosts/tasks/main.yml index bc86805d85725fa88bd71bd0136e91b153417553..dadad0ef4e7688fa691fd87696879f84ca6f5d39 100644 --- a/roles/etcHosts/tasks/main.yml +++ b/roles/etcHosts/tasks/main.yml @@ -2,18 +2,34 @@ copy: src=files/etcHosts dest=/etc/hosts owner=root mode=644 sudo: true +- name: get hostname by sysctl + shell: sysctl kernel.hostname | cut -f 3 -d " " + register: sysctl_hostname + check_mode: no + become: true + become_user: root + - name: set hostname by sysctl shell: sysctl kernel.hostname="{{ inventory_hostname }}" sudo: true + when: not "{{ sysctl_hostname.stdout }}" == "{{ inventory_hostname }}" + +- name: get domainname by sysctl + shell: sysctl kernel.domainname | cut -f 3 -d " " + register: sysctl_domainname + check_mode: no + become: true + become_user: root - name: set domainname by sysctl shell: sysctl kernel.domainname="{{ domain }}" sudo: true + when: not "{{ sysctl_domainname.stdout }}" == "{{ domain }}" - name: set /etc/sysconfig/network on CentOS 6 lineinfile: dest=/etc/sysconfig/network line='HOSTNAME={{ inventory_hostname }}' regexp='^HOSTNAME' sudo: true - when: ansible_distribution == "CentOS" + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "6" - name: set /etc/sysctl.conf on Debian 8 lineinfile: dest=/etc/sysctl.conf line='kernel.domainname = {{ domain }}' regexp='^#kernel.domainname' diff --git a/roles/gluster_volcreate/tasks/main.yml b/roles/gluster_volcreate/tasks/main.yml index cabe3170ff91f2ac5d0f2ec6743ff97e93f2b609..dbc266b7dc47a9c420181f099d801b17d9959d06 100644 --- a/roles/gluster_volcreate/tasks/main.yml +++ b/roles/gluster_volcreate/tasks/main.yml @@ -11,15 +11,6 @@ become: true become_user: root -- name: peer status - shell: "gluster peer status" - become: true - become_user: root - register: peer_status - -- name: debug peer status - debug: var=peer_status - - name: create volume gluster_volume: name: "{{ volname }}" diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml index c13e7db22a699453bf41d0bc45a6663bf51787ae..f4b2d0585e8a750ebc4e9b1473dfea3f985a97fb 100644 --- a/roles/gpu/tasks/main.yml +++ b/roles/gpu/tasks/main.yml @@ -78,17 +78,17 @@ - name: set uninstall default set_fact: - install_driver: false + uninstall_driver: false - name: set install set_fact: install_driver: true - when: not nvidia_driver.stat.exists or not installed_driver_version == nvidia_version + when: not nvidia_driver.stat.exists or not installed_driver_version.stdout == nvidia_version - name: set uninstall set_fact: uninstall_driver: true - when: nvidia_driver.stat.exists and not installed_driver_version == nvidia_version + when: nvidia_driver.stat.exists and not installed_driver_version.stdout == nvidia_version - name: stop the persistence daemon service: name=nvidia-persistenced state=stopped @@ -96,6 +96,12 @@ become_user: root when: uninstall_driver +- name: kill any X processes + shell: ps ax | grep "X :0" | grep -v grep | cut -f 1 -d " " | xargs -I{} kill -9 {} + become: true + become_user: root + when: uninstall_driver + - name: get nvidia driver get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run become: true diff --git a/roles/hpcsystems/tasks/main.yml b/roles/hpcsystems/tasks/main.yml index 4500f696e1cb277e3fa00c14f96eaffe8fae627e..15c6947a8bdbde2bda6970d086af07b1e6dec047 100644 --- a/roles/hpcsystems/tasks/main.yml +++ b/roles/hpcsystems/tasks/main.yml @@ -5,6 +5,7 @@ - openssl-devel - openldap-devel - python-pip + - git become: true become_user: root when: ansible_os_family == "RedHat" @@ -38,6 +39,7 @@ git: repo: git@gitlab.erc.monash.edu.au:hpc-team/hpcsystem.git dest: /usr/local/hpcsystem + accept_hostkey: True - name: install hpcsystem_config git: diff --git a/roles/ldapclient/tasks/configLdapClient.yml b/roles/ldapclient/tasks/configLdapClient.yml index 3a85550ea67a81696eb526e575665524c29da6f3..cdf3fa9b31e41f85e8549bfb08583477eb9f5cb6 100644 --- a/roles/ldapclient/tasks/configLdapClient.yml +++ b/roles/ldapclient/tasks/configLdapClient.yml @@ -25,12 +25,12 @@ when: ldapCaCertContents is defined - name: "Copy system auth" - template: src=system-auth.j2 dest=/etc/pam.d/system-auth + template: src=system-auth.j2 dest=/etc/pam.d/system-auth-ac become: true become_user: root - name: "Copy password auth" - template: src=password-auth.j2 dest=/etc/pam.d/password-auth + template: src=password-auth.j2 dest=/etc/pam.d/password-auth-ac become: true become_user: root diff --git a/roles/move_homedir/tasks/main.yml b/roles/move_homedir/tasks/main.yml index e0f4863464e6aff0ea2b3c6a6898d1e0b3da4336..61d6b03a2b82e920011c3c03a773509d3b77bfce 100644 --- a/roles/move_homedir/tasks/main.yml +++ b/roles/move_homedir/tasks/main.yml @@ -2,11 +2,16 @@ file: path=/local_home owner=root group=root state=directory sudo: true +- name: stat the local_home path + stat: path=/local_home/{{ ansible_user }} + register: local_home_path + - name: copy the {{ ansible_user }} home shell: cp -ar /home/{{ ansible_user }} /local_home ignore_errors: true sudo: true register: home_copied + when: not local_home_path.stat.exists - name: edit passwd file diff --git a/roles/slurm-common/files/scripts/nvidia-probe.py b/roles/slurm-common/files/scripts/nvidia-probe.py index 4b3e93e1934b9c6b648381c99b565a52240d4660..ba23982e7388c98f89f4f16f09eef38620bc1563 100755 --- a/roles/slurm-common/files/scripts/nvidia-probe.py +++ b/roles/slurm-common/files/scripts/nvidia-probe.py @@ -35,7 +35,7 @@ try: if not line : break #print "Line is ",line - pe=re.compile('GPU\s*(\d*).*Tesla\s*(\S*)') + pe=re.compile('GPU\s+(\d*):\s+\S+\s+(\S*)') m=pe.search(line) if not m: #print "No match found" diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index 2e4146507057db8d0a285ed3c00558ba4ae45fd7..d99cb2ff52283205eb6ccd627505c573cf9180de 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -90,6 +90,11 @@ register: probeOutput check_mode: no +- name: get cpu count + shell: 'lscpu | grep "On-line CPU" | cut -f 2 -d ":" | sed "s/\ *//g"' + register: cpucount + check_mode: no + - name: "set nvidiaprobe slurm_gres_list" set_fact: "slurm_gres_list={{ probeOutput.stdout }}" diff --git a/roles/slurm-common/templates/gres.conf.j2 b/roles/slurm-common/templates/gres.conf.j2 index 24001d0dc4c874f63a23e55bee68fcd47ca4c2eb..9d13ec66f8aaf2e1dedc8d813ebd6b5ba19356a1 100644 --- a/roles/slurm-common/templates/gres.conf.j2 +++ b/roles/slurm-common/templates/gres.conf.j2 @@ -1,5 +1,5 @@ #slurm gres file for {{ ansible_hostname }} #No Of Devices={{ slurm_gres_list | length }} {% for gr in slurm_gres_list %} -Name={{ gr.name }} Type={{ gr.type }} File={{ gr.file }} CPUs=0-23 +Name={{ gr.name }} Type={{ gr.type }} File={{ gr.file }} CPUs={{ cpucount.stdout }} {% endfor %} diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml index 0aac90c6779918e51f95649dde4d2789c43cec92..2b2d82beea7afd96bdcbd2328bfd11c67c20f58e 100644 --- a/roles/slurm-start/tasks/main.yml +++ b/roles/slurm-start/tasks/main.yml @@ -19,6 +19,7 @@ template: dest=/etc/systemd/system/slurmdbd.service src=slurmdbd.service.j2 mode=644 sudo: true when: use_systemd is defined and start_slurmdbd is defined + register: slurmdbd_service_installed - name: copy slurm init script template: dest=/etc/init.d/slurm src=slurm.initd.j2 mode=755 @@ -29,16 +30,28 @@ template: dest=/etc/systemd/system/slurmd.service src=slurmd.service.j2 mode=644 sudo: true when: use_systemd is defined and start_slurmd is defined + register: slurmd_service_installed - name: slurmctld.service template: dest=/etc/systemd/system/slurmctld.service src=slurmctld.service.j2 mode=644 sudo: true when: use_systemd is defined and start_slurmctld is defined + register: slurmctld_service_installed - name: reload systemd shell: systemctl daemon-reload sudo: true - when: use_systemd is defined + when: use_systemd is defined and start_slurmd is defined and slurmd_service_installed | changed + +- name: reload systemd + shell: systemctl daemon-reload + sudo: true + when: use_systemd is defined and start_slurmctld is defined and slurmctld_service_installed | changed + +- name: reload systemd + shell: systemctl daemon-reload + sudo: true + when: use_systemd is defined and start_slurmdbd is defined and slurmdbd_service_installed | changed - name: start munge service: name=munge state=started enabled=yes