Skip to content
Snippets Groups Projects
main.yml 5.12 KiB
Newer Older
Jupiter Hu's avatar
Jupiter Hu committed
---
- name: install deps
  package:
    state: present
    name:
      - gcc
      - perl
      - wget
      - pciutils
      - kernel-headers
      - kernel-devel
      - xterm
      - libX11-common
      - libX11-devel
      - libX11
      - libglvnd-devel
      - xorg-x11-server-common
      - xorg-x11-util-macros
      - xorg-x11-server-utils
      - xorg-x11-font-utils
      - xorg-x11-server-Xorg
      - xorg-x11-glamor
      - xorg-x11-xinit
      - xorg-x11-utils
      - xorg-x11-xauth
      - xorg-x11-proto-devel
      - xorg-x11-xkb-utils
Andreas Hamacher's avatar
Andreas Hamacher committed
      - python-jinja2
  become: true
  when: ansible_os_family == 'RedHat'

- name: install deps
  apt:
    name:
     - 'gcc'
     - 'perl'
     - 'wget'
     - 'pciutils'
     - 'linux-headers-generic'
     - 'xterm'
     - 'libx11-dev'
     - 'libx11-6'
     - 'libglvnd-dev'
     - 'xserver-xorg'
     - 'vim'
     - 'python-jinja2'
     - 'python3-jinja2'
    state: present
    update_cache: yes
  become: true
  become_user: root
  when: ansible_distribution == 'Ubuntu'

Chris Hines's avatar
Chris Hines committed
- name: install development tools
  yum: name="@Development Tools" state=installed
  become: true
  become_user: root
  when: ansible_os_family == 'RedHat'
Jupiter Hu's avatar
Jupiter Hu committed

Chris Hines's avatar
Chris Hines committed
- name: disable nouveau
  template: src=blacklist-nouveau.conf.j2 dest=/etc/modprobe.d/blacklist-nouveau.conf
  become: true
  become_user: root
Jupiter Hu's avatar
Jupiter Hu committed

- name: template unit for for persistenced
  template: src=nvidia-persistenced.service dest=/etc/systemd/system/nvidia-persistenced.service
  become: true
  become_user: root

- name: create the nvidia-persistenced user
  user: name=nvidia-persistenced state=present system=yes shell=/bin/false
  become: true
  become_user: root


- name: remove nouveau
  modprobe: name=nouveau state=absent
- name: get kernel version
  shell: uname -r
  register: kernel_version
Jupiter Hu's avatar
Jupiter Hu committed
- name: check nvidia driver
  stat: path="/lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko"
  register: nvidia_driver
Jupiter Hu's avatar
Jupiter Hu committed
  ignore_errors: true

Chris Hines's avatar
Chris Hines committed
- name: set default driver version
Chris Hines's avatar
Chris Hines committed
    installed_driver_version: '0.0'

- name: check nvidia driver version
  shell: 'nvidia-smi | grep -Po "Driver Version: \K\S+"'
  register: installed_driver_version
  when: nvidia_driver.stat.exists
  check_mode: no
Chris Hines's avatar
Chris Hines committed

- name: set install default
Chris Hines's avatar
Chris Hines committed
    install_driver: false

- name: set uninstall default
Chris Hines's avatar
Chris Hines committed

- name: set install
Chris Hines's avatar
Chris Hines committed
    install_driver: true
  when: not nvidia_driver.stat.exists or not installed_driver_version.stdout == nvidia_version
Chris Hines's avatar
Chris Hines committed

- name: set uninstall
Chris Hines's avatar
Chris Hines committed
    uninstall_driver: true
  when: nvidia_driver.stat.exists and not installed_driver_version.stdout == nvidia_version
Chris Hines's avatar
Chris Hines committed

- name: Unload nvidia driver
  shell: rmmod nvidia_uvm nvidia_drm nvidia_modeset nvidia || true
Chris Hines's avatar
Chris Hines committed
- name: stop the persistence daemon
  service: name=nvidia-persistenced state=stopped
  become: true
  become_user: root
  when: uninstall_driver

- name: kill any X processes
  shell:  ps ax | grep "X :0" | grep -v grep | cut -f 1 -d " " | xargs -I{} kill -9 {}
  become: true
  become_user: root
  when: uninstall_driver

- name: get nvidia driver
  get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
  become: true
  become_user: root
  when: install_driver

Chris Hines's avatar
Chris Hines committed
#- name: Copy boot file
#  template: src=grub.conf.j2 dest=/boot/grub/grub.conf
Chris Hines's avatar
Chris Hines committed
#
#- name: Copy X config file
#  template: src=xorg.conf.j2 dest=/etc/X11/xorg.conf
Jupiter Hu's avatar
Jupiter Hu committed

- name: Copy xserver file
  template: src=xserver.j2 dest=/etc/pam.d/xserver
- name: chmod nvidia driver builder
  file:
    path: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
    mode: 0755
  become: true
  when: install_driver

- name: build nvidia driver
  shell: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run -q -a -n -X -s
Chris Hines's avatar
Chris Hines committed
  when: install_driver
Jupiter Hu's avatar
Jupiter Hu committed

- name: set the GOM
  shell: nvidia-smi --gom=0
  become: true
  become_user: root
  register: nvidiagomcall
  changed_when: '"cannot be changed" not in nvidiagomcall.stdout' # only tested on a k80

- name: enable persistenced on boot
  service: name=nvidia-persistenced state=started enabled=yes
  become: true
  become_user: root

- name: Configure xorg.conf with nvidia-xconfig so xorg.conf matches gpu number
  shell: /usr/bin/nvidia-xconfig -a --use-display-device=none --preserve-busid
  become: true
  become_user: root
    creates: /etc/X11/xorg.conf
#- name: Template xorg.conf for nodes with one GPU
#  template: src=xorg.conf.j2 dest=/etc/X11/xorg.conf
#  become: true
#  become_user: root
#  when: template_xorgconf is defined and template_xorgcon


- name: run nvidia-xconf-gen
  script: scripts/nvidia-xconf-gen.py
  register: nvidiacards
Jafar Lie's avatar
Jafar Lie committed
  check_mode: no

- name: set env for nvidia_card_lists
    nvidiacardslist: "{{ nvidiacards.stdout | from_json }}"

- name: generate nvidia-xorg-conf
  template:
    src: xorg.conf.j2
    dest: "{{ item['filename'] }}"
  with_items: "{{ nvidiacardslist }}"