Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
HPCasCode
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
hpc-team
HPCasCode
Commits
dfb9559a
Commit
dfb9559a
authored
8 years ago
by
Chris Hines
Browse files
Options
Downloads
Patches
Plain Diff
add better conditionals to installing mellanx and nvidia drivers
parent
8ba93b0e
No related branches found
No related tags found
1 merge request
!88
add better conditionals to installing mellanx and nvidia drivers
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
roles/gpu/tasks/main.yml
+9
-4
9 additions, 4 deletions
roles/gpu/tasks/main.yml
roles/mellanox_drivers/tasks/main.yml
+29
-3
29 additions, 3 deletions
roles/mellanox_drivers/tasks/main.yml
roles/mellanox_drivers/vars/mellanoxVars.yml
+1
-1
1 addition, 1 deletion
roles/mellanox_drivers/vars/mellanoxVars.yml
with
39 additions
and
8 deletions
roles/gpu/tasks/main.yml
+
9
−
4
View file @
dfb9559a
...
@@ -51,16 +51,21 @@
...
@@ -51,16 +51,21 @@
become
:
true
become
:
true
become_user
:
root
become_user
:
root
-
name
:
get kernel version
shell
:
uname -r
register
:
kernel_version
-
name
:
check nvidia driver
-
name
:
check nvidia driver
stat
:
path="/
usr/lib64/libnvidia-opencl.so.{{ nvidia_version }}
"
stat
:
path="/
lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko
"
register
:
opencl
register
:
nvidia_driver
ignore_errors
:
true
ignore_errors
:
true
-
name
:
get nvidia driver
-
name
:
get nvidia driver
get_url
:
url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
get_url
:
url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
become
:
true
become
:
true
become_user
:
root
become_user
:
root
when
:
not
opencl
.stat.exists
when
:
not
nvidia_driver
.stat.exists
#- name: Copy boot file
#- name: Copy boot file
# template: src=grub.conf.j2 dest=/boot/grub/grub.conf
# template: src=grub.conf.j2 dest=/boot/grub/grub.conf
...
@@ -77,7 +82,7 @@
...
@@ -77,7 +82,7 @@
-
name
:
build nvidia driver
-
name
:
build nvidia driver
shell
:
chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent
shell
:
chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent
sudo
:
true
sudo
:
true
when
:
not
opencl
.stat.exists
when
:
not
nvidia_driver
.stat.exists
-
name
:
set the GOM
-
name
:
set the GOM
shell
:
nvidia-smi --gom=0
shell
:
nvidia-smi --gom=0
...
...
This diff is collapsed.
Click to expand it.
roles/mellanox_drivers/tasks/main.yml
+
29
−
3
View file @
dfb9559a
...
@@ -14,12 +14,38 @@
...
@@ -14,12 +14,38 @@
ignore_errors
:
true
ignore_errors
:
true
-
name
:
default dont install
-
name
:
default dont install
set_fact
:
install_now=false reboot_now=False
set_fact
:
install_now
:
false
reboot_now
:
false
-
name
:
set install
-
name
:
get driver version
set_fact
:
install_now=True reboot_now=True
command
:
'
ofed_info
|
head
-n
1
|
cut
-f
1
-d
"
"'
register
:
driver_version
ignore_errors
:
true
-
name
:
get desired driver version
shell
:
'
echo
{{
MELLANOX_DRIVER_SRC
}}
|
cut
-f
1,2,3
-d
"-"'
register
:
desired_driver_version
-
name
:
set install due to drivers not installed
set_fact
:
install_now
:
true
reboot_now
:
true
when
:
drivers_installed | failed
when
:
drivers_installed | failed
-
name
:
debug
debug
:
var=driver_version
-
name
:
debug
debug
:
var=desired_driver_version
-
name
:
set install due to version mismatch
set_fact
:
install_now
:
true
reboot_now
:
true
when
:
driver_version | failed or not desired_driver_version.stdout in driver_version.stdout
-
name
:
copy driver source
-
name
:
copy driver source
unarchive
:
copy=no src="http://consistency0/src/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp
unarchive
:
copy=no src="http://consistency0/src/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp
sudo
:
true
sudo
:
true
...
...
This diff is collapsed.
Click to expand it.
roles/mellanox_drivers/vars/mellanoxVars.yml
+
1
−
1
View file @
dfb9559a
...
@@ -2,4 +2,4 @@
...
@@ -2,4 +2,4 @@
#note. do not add '.tgz' to driver src. done in playbook
#note. do not add '.tgz' to driver src. done in playbook
#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DRIVER_SRC
:
"
{%
if
ansible_os_family
==
'RedHat'
%}MLNX_OFED_LINUX-3.3-1.0.
0
.0-rhel7.2-x86_64{%
elif
ansible_os_family
==
'Debian'
%}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{%
endif
%}"
MELLANOX_DRIVER_SRC
:
"
{%
if
ansible_os_family
==
'RedHat'
%}MLNX_OFED_LINUX-3.3-1.0.
4
.0-rhel7.2-x86_64{%
elif
ansible_os_family
==
'Debian'
%}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{%
endif
%}"
This diff is collapsed.
Click to expand it.
Chris Hines
@chines
mentioned in commit
c029cfbb
·
4 years ago
mentioned in commit
c029cfbb
mentioned in commit c029cfbb35a3a581ca634ce14b14f48faaeff085
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment