From 3ffddb97504f5f1256a50af5546607bf5fc59e3c Mon Sep 17 00:00:00 2001 From: "Trung Nguyen (Monash University)" <trung.nguyen1@monash.edu> Date: Fri, 19 Oct 2018 12:57:09 +1100 Subject: [PATCH] Add improvement to roles Former-commit-id: 0237a52f61f3b7f3aac3db8eceb5a1a88ce4c07d --- roles/gpu/tasks/main.yml.bkup | 206 ---------------------------------- 1 file changed, 206 deletions(-) delete mode 100644 roles/gpu/tasks/main.yml.bkup diff --git a/roles/gpu/tasks/main.yml.bkup b/roles/gpu/tasks/main.yml.bkup deleted file mode 100644 index 0cdf5047..00000000 --- a/roles/gpu/tasks/main.yml.bkup +++ /dev/null @@ -1,206 +0,0 @@ ---- -- name: install deps - yum: name={{ item }} state=installed - sudo: true - with_items: - - gcc - - perl - - wget - - pciutils - - kernel-headers - - kernel-devel - - xterm - - libX11-common - - libX11-devel - - libX11 - - xorg-x11-server-common - - xorg-x11-util-macros - - xorg-x11-server-utils - - xorg-x11-font-utils - - xorg-x11-server-Xorg - - xorg-x11-glamor - - xorg-x11-xinit - - xorg-x11-utils - - xorg-x11-xauth - - xorg-x11-proto-devel - - xorg-x11-xkb-utils - -- name: install development tools - yum: name="@Development Tools" state=installed - become: true - become_user: root - -- name: disable nouveau - template: src=blacklist-nouveau.conf.j2 dest=/etc/modprobe.d/blacklist-nouveau.conf - become: true - become_user: root - -- name: template unit for for persistenced - template: src=nvidia-persistenced.service dest=/etc/systemd/system/nvidia-persistenced.service - become: true - become_user: root - -- name: create the nvidia-persistenced user - user: name=nvidia-persistenced state=present system=yes shell=/bin/false - become: true - become_user: root - - -- name: remove nouveau - modprobe: name=nouveau state=absent - become: true - become_user: root - -- name: get kernel version - shell: uname -r - register: kernel_version - check_mode: no - - -- name: check nvidia driver - stat: path="/lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko" - register: nvidia_driver - ignore_errors: true - -- name: set default driver version - set_fact: - installed_driver_version: '0.0' - -- name: check nvidia driver version - shell: 'nvidia-smi | grep -Po "Driver Version: \K\S+"' - register: installed_driver_version - when: nvidia_driver.stat.exists - check_mode: no - -- name: debug - installed nvidia driver version - debug: - msg: "{{ installed_driver_version }} " - -- name: set install default - set_fact: - install_driver: false - -- name: set uninstall default - set_fact: - uninstall_driver: false - -- name: set install - set_fact: - install_driver: true - when: not nvidia_driver.stat.exists or not installed_driver_version.stdout_lines[0] == nvidia_version - -- name: set uninstall - set_fact: - uninstall_driver: true - when: nvidia_driver.stat.exists and not installed_driver_version.stdout_lines[0] == nvidia_version - -- name: Unload nvidia driver - shell: rmmod nvidia_uvm nvidia_drm nvidia_modeset nvidia || true - sudo: true - when: install_driver - -- name: stop the persistence daemon - service: name=nvidia-persistenced state=stopped - become: true - become_user: root - when: uninstall_driver - -- name: kill any X processes - shell: ps ax | grep "X :0" | grep -v grep | cut -f 1 -d " " | xargs -I{} kill -9 {} - become: true - become_user: root - when: uninstall_driver - -- name: get old nvidia driver - get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run - become: true - become_user: root - when: uninstall_driver - ignore_errors: true - -- name: uninstall old nvidia driver - shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run; /tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run --uninstall --silent - become: true - become_user: root - when: uninstall_driver - -- name: clean up old driver installation file - file: - state: absent - path: /tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run - become: true - become_user: root - ignore_errors: true - -- name: get nvidia driver - get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run - become: true - become_user: root - when: install_driver - -#- name: Copy boot file -# template: src=grub.conf.j2 dest=/boot/grub/grub.conf -# sudo: true -# -#- name: Copy X config file -# template: src=xorg.conf.j2 dest=/etc/X11/xorg.conf -# sudo: true - -- name: Copy xserver file - template: src=xserver.j2 dest=/etc/pam.d/xserver - become: true - become_user: root - -- name: build nvidia driver - shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent - become: true - become_user: root - when: install_driver - -- name: set the GOM - shell: nvidia-smi --gom=0 - become: true - become_user: root - -- name: enable persistenced on boot - service: name=nvidia-persistenced state=started enabled=yes - become: true - become_user: root - -- name: Configure xorg.conf with nvidia-xconfig so xorg.conf matches gpu number - shell: /usr/bin/nvidia-xconfig -a --use-display-device=none --preserve-busid - become: true - become_user: root - args: - creates: /etc/X11/xorg.conf - -#- name: Template xorg.conf for nodes with one GPU -# template: src=xorg.conf.j2 dest=/etc/X11/xorg.conf -# become: true -# become_user: root -# when: template_xorgconf is defined and template_xorgcon - -- name: run nvidia-xconf-gen - script: scripts/nvidia-xconf-gen.py - register: nvidiacards - check_mode: no - -- name: set env for nvidia_card_lists - set_fact: - nvidiacardslist: "{{ nvidiacards.stdout | from_json }}" - -- name: generate nvidia-xorg-conf - sudo: true - template: - src: xorg.conf.j2 - dest: "{{ item['filename'] }}" - with_items: "{{ nvidiacardslist }}" - -- name: clean up nvidia driver installation file - file: - state: absent - path: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run - become: true - become_user: root - ignore_errors: true - -- GitLab