diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml index cdef9347e47802cef6d65d06c5698ddc198f906a..b35c8890d9dddcf4c555a50cf8ceff45f0add157 100644 --- a/roles/gpu/tasks/main.yml +++ b/roles/gpu/tasks/main.yml @@ -35,18 +35,37 @@ become: true become_user: root +- name: template unit for for persistenced + template: src=nvidia-persistenced.service dest=/etc/systemd/system/nvidia-persistenced.service + become: true + become_user: root + +- name: create the nvidia-persistenced user + user: name=nvidia-persistenced state=present system=yes shell=/bin/false + become: true + become_user: root + +- name: enable persistenced on boot + service: name=nvidia-persistenced state=started enabled=yes + become: true + become_user: root + + +- name: remove nouveau + modprobe: name=nouveau state=absent + become: true + become_user: root + - name: check nvidia driver - shell: ls /usr/lib64/libnvidia-opencl.so.{{ nvidia_version }} - register: drivers_installed + stat: path="/usr/lib64/libnvidia-opencl.so.{{ nvidia_version }}" + register: opencl ignore_errors: true - name: get nvidia driver - shell: wget http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run - args: - chdir: /tmp - creates: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run - sudo: true - when: drivers_installed | failed + get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run + become: true + become_user: root + when: not opencl.stat.exists #- name: Copy boot file # template: src=grub.conf.j2 dest=/boot/grub/grub.conf @@ -60,54 +79,12 @@ template: src=xserver.j2 dest=/etc/pam.d/xserver sudo: true -- name: look for nouveau - shell: /usr/sbin/lsmod - register: lsmod_output - -- name: set nouveau string - set_fact: nouveau='nouveau' - - -- name: restart_host - shell: sleep 5 ; shutdown -r now "Reboot triggered by Ansible" - async: 2 - poll: 1 - sudo: true - ignore_errors: true - when: nouveau in lsmod_output.stdout - -- name: waiting for server to come back - local_action: wait_for host={{ ansible_host }} state=started port=22 delay=10 search_regex=OpenSSH - sudo: false - when: nouveau in lsmod_output.stdout - - - name: build nvidia driver shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent sudo: true - when: drivers_installed | failed - -- name: restart_host - shell: sleep 5 ; shutdown -r now "Reboot triggered by Ansible" - async: 2 - poll: 1 - sudo: true - ignore_errors: true - when: drivers_installed | failed - -- name: waiting for server to come back - local_action: wait_for host={{ ansible_host }} state=started port=22 delay=10 search_regex=OpenSSH - sudo: false - when: drivers_installed|failed - -#- name: set persistence mode -# lineinfile: -# args: -# dest: /etc/rc.d/rc.local -# line: "nvidia-smi --persistence-mode=1" -# state: present -# sudo: true -#- name: Load module -# shell: modprobe nvidia -# sudo: true + when: not opencl.stat.exists +- name: set the GOM + shell: nvidia-smi --gom=0 + become: true + become_user: root