Skip to content
Snippets Groups Projects
Commit ed581017 authored by Trung Nguyen's avatar Trung Nguyen
Browse files

Add improvement to roles

Former-commit-id: 3f8a4500
parent a1366171
No related branches found
No related tags found
No related merge requests found
......@@ -43,11 +43,7 @@
check_mode: no
- name: disable unwanted repos
shell: yum-config-manager --disable {{ item }}
# yum_repository:
# name: "{{ item }}"
# enabled: False
# state: 'absent'
shell: yum-config-manager --disable "{{ item }}"
with_items: "{{repolist.stdout_lines|difference(yumenablerepo)}}"
become: true
become_user: root
......
---
- name: install deps
yum: name={{ item }} state=installed
sudo: true
with_items:
- gcc
- perl
- wget
- pciutils
- kernel-headers
- kernel-devel
- xterm
- libX11-common
- libX11-devel
- libX11
- xorg-x11-server-common
- xorg-x11-util-macros
- xorg-x11-server-utils
- xorg-x11-font-utils
- xorg-x11-server-Xorg
- xorg-x11-glamor
- xorg-x11-xinit
- xorg-x11-utils
- xorg-x11-xauth
- xorg-x11-proto-devel
- xorg-x11-xkb-utils
- name: install development tools
yum: name="@Development Tools" state=installed
become: true
become_user: root
- name: disable nouveau
template: src=blacklist-nouveau.conf.j2 dest=/etc/modprobe.d/blacklist-nouveau.conf
become: true
become_user: root
- name: template unit for for persistenced
template: src=nvidia-persistenced.service dest=/etc/systemd/system/nvidia-persistenced.service
become: true
become_user: root
- name: create the nvidia-persistenced user
user: name=nvidia-persistenced state=present system=yes shell=/bin/false
become: true
become_user: root
- name: remove nouveau
modprobe: name=nouveau state=absent
become: true
become_user: root
- name: get kernel version
shell: uname -r
register: kernel_version
check_mode: no
- name: check nvidia driver
stat: path="/lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko"
register: nvidia_driver
ignore_errors: true
- name: set default driver version
set_fact:
installed_driver_version: '0.0'
- name: check nvidia driver version
shell: 'nvidia-smi | grep -Po "Driver Version: \K\S+"'
register: installed_driver_version
when: nvidia_driver.stat.exists
check_mode: no
- name: debug - installed nvidia driver version
debug:
msg: "{{ installed_driver_version }} "
- name: set install default
set_fact:
install_driver: false
- name: set uninstall default
set_fact:
uninstall_driver: false
- name: set install
set_fact:
install_driver: true
when: not nvidia_driver.stat.exists or not installed_driver_version.stdout_lines[0] == nvidia_version
- name: set uninstall
set_fact:
uninstall_driver: true
when: nvidia_driver.stat.exists and not installed_driver_version.stdout_lines[0] == nvidia_version
- name: Unload nvidia driver
shell: rmmod nvidia_uvm nvidia_drm nvidia_modeset nvidia || true
sudo: true
when: install_driver
- name: stop the persistence daemon
service: name=nvidia-persistenced state=stopped
become: true
become_user: root
when: uninstall_driver
- name: kill any X processes
shell: ps ax | grep "X :0" | grep -v grep | cut -f 1 -d " " | xargs -I{} kill -9 {}
become: true
become_user: root
when: uninstall_driver
- name: get old nvidia driver
get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run
become: true
become_user: root
when: uninstall_driver
ignore_errors: true
- name: uninstall old nvidia driver
shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run; /tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run --uninstall --silent
become: true
become_user: root
when: uninstall_driver
- name: clean up old driver installation file
file:
state: absent
path: /tmp/NVIDIA-Linux-x86_64-{{ installed_driver_version.stdout_lines[0] }}.run
become: true
become_user: root
ignore_errors: true
- name: get nvidia driver
get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
become: true
become_user: root
when: install_driver
#- name: Copy boot file
# template: src=grub.conf.j2 dest=/boot/grub/grub.conf
# sudo: true
#
#- name: Copy X config file
# template: src=xorg.conf.j2 dest=/etc/X11/xorg.conf
# sudo: true
- name: Copy xserver file
template: src=xserver.j2 dest=/etc/pam.d/xserver
become: true
become_user: root
- name: build nvidia driver
shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent
become: true
become_user: root
when: install_driver
- name: set the GOM
shell: nvidia-smi --gom=0
become: true
become_user: root
- name: enable persistenced on boot
service: name=nvidia-persistenced state=started enabled=yes
become: true
become_user: root
- name: Configure xorg.conf with nvidia-xconfig so xorg.conf matches gpu number
shell: /usr/bin/nvidia-xconfig -a --use-display-device=none --preserve-busid
become: true
become_user: root
args:
creates: /etc/X11/xorg.conf
#- name: Template xorg.conf for nodes with one GPU
# template: src=xorg.conf.j2 dest=/etc/X11/xorg.conf
# become: true
# become_user: root
# when: template_xorgconf is defined and template_xorgcon
- name: run nvidia-xconf-gen
script: scripts/nvidia-xconf-gen.py
register: nvidiacards
check_mode: no
- name: set env for nvidia_card_lists
set_fact:
nvidiacardslist: "{{ nvidiacards.stdout | from_json }}"
- name: generate nvidia-xorg-conf
sudo: true
template:
src: xorg.conf.j2
dest: "{{ item['filename'] }}"
with_items: "{{ nvidiacardslist }}"
- name: clean up nvidia driver installation file
file:
state: absent
path: /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
become: true
become_user: root
ignore_errors: true
......@@ -9,29 +9,33 @@
when: ansible_os_family == "RedHat"
- name: test for existing installation of drivers
command: ibv_devinfo
shell: '/bin/ibv_devinfo'
become: true
become_user: root
register: drivers_installed
ignore_errors: true
check_mode: no
- name: debug - print out installed driver
debug: var=drivers_installed
- name: default dont install
set_fact:
install_now: false
reboot_now: false
- name: get driver version
command: 'ofed_info -l | head -n 1 | cut -f 1 -d " "'
shell: '/bin/ofed_info -l | head -n 1 | cut -f 1 -d " "'
register: driver_version
ignore_errors: true
check_mode: no
changed_when: False
- name: get desired driver version
shell: 'echo {{ MELLANOX_DRIVER_SRC }} | cut -f 1,2,3 -d "-"'
register: desired_driver_version
check_mode: no
changed_when: False
- name: set install due to drivers not installed
set_fact:
......@@ -51,6 +55,9 @@
reboot_now: true
when: driver_version | failed or not desired_driver_version.stdout in driver_version.stdout
- name: debug - print out value of install_now
debug: var=install_now
- name: copy driver source
unarchive: copy=no src="http://consistency0/src/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp
become: true
......
......@@ -23,6 +23,12 @@
become_user: root
when: ansible_os_family=="RedHat" and yumdisablerepo is defined
- name: Clear yum pending transaction
command: yum-complete-transaction --cleanup-only
become: true
become_user: root
when: ansible_os_family == 'RedHat'
- name: yum upgrade
yum: name=* state=latest
become: true
......@@ -46,11 +52,13 @@
register: rpm_q_output
when: ansible_os_family=="RedHat"
check_mode: no
changed_when: False
- name: get kernel version
shell: uname -r
register: uname_r_output
check_mode: no
changed_when: False
- name: default dont reboot
set_fact:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment