Skip to content
Snippets Groups Projects
Commit efc71fd9 authored by Jay Van Schyndel's avatar Jay Van Schyndel
Browse files

Merge branch 'mig_persistent' into 'master'

applying MIG via systemd to make it reboot persistent

See merge request !554
parents ee47030c 75087d08
No related branches found
No related tags found
1 merge request!554applying MIG via systemd to make it reboot persistent
- name: Check if an nvidia GPU is installed - name: Check if an nvidia GPU is installed
shell: 'if lspci | grep -q "3D controller: NVIDIA"; then echo True; else echo False; fi;' shell: 'if lspci | grep -q "3D controller: NVIDIA"; then echo True; else echo False; fi;'
check_mode: no
when: ansible_os_family == "Debian" when: ansible_os_family == "Debian"
register: nvidia_gpu_found register: nvidia_gpu_found
...@@ -9,6 +10,7 @@ ...@@ -9,6 +10,7 @@
- name: Check if a MIG supported GPU exists. Unsupport GPUs return [N/A], supported return 'Enabled' or 'Disabled' - name: Check if a MIG supported GPU exists. Unsupport GPUs return [N/A], supported return 'Enabled' or 'Disabled'
shell: "if nvidia-smi -i 0 --query-gpu=pci.bus_id,mig.mode.current --format=csv,noheader | grep -q '[N/A]'; then echo False; else echo True; fi;" shell: "if nvidia-smi -i 0 --query-gpu=pci.bus_id,mig.mode.current --format=csv,noheader | grep -q '[N/A]'; then echo False; else echo True; fi;"
check_mode: no
register: mig_GPU register: mig_GPU
when: ansible_os_family == "Debian" and nvidia_gpu_found.stdout == "True" when: ansible_os_family == "Debian" and nvidia_gpu_found.stdout == "True"
...@@ -28,22 +30,26 @@ ...@@ -28,22 +30,26 @@
- name: install mig_config.yml - name: install mig_config.yml
copy: copy:
src: files/mig_config.yml src: files/mig_config.yml
dest: "{{ nvidia_mig_parted_src_dir }}/mig_config.yml" dest: "/etc/nvidia-mig-manager/config.yaml"
become: true
- name: assert mig config - name: replace mig setting to apply in systemd service config
command: "./nvidia-mig-parted assert -f {{ mig_config }} -c {{ mig_setting }}" replace:
args: path: "/etc/systemd/system/nvidia-mig-manager.service.d/override.conf"
chdir: "{{ nvidia_mig_parted_src_dir }}" regexp: 'Environment="MIG_PARTED_SELECTED_CONFIG=all-disabled"'
replace: 'Environment="MIG_PARTED_SELECTED_CONFIG={{ mig_setting }}"'
become: true become: true
check_mode: no
changed_when: false
ignore_errors: true
register: mig_state_assert
- name: apply mig config - name: reload systemd
command: "./nvidia-mig-parted apply -f {{ mig_config }} -c {{ mig_setting }}" ansible.builtin.systemd:
args: daemon_reload: yes
chdir: "{{ nvidia_mig_parted_src_dir }}" become: true
- name: start mig service
service:
name: nvidia-mig-manager
state: restarted
enabled: yes
become: true become: true
- name: generate mig config - name: generate mig config
......
- name: Check if an nvidia GPU is installed - name: Check if an nvidia GPU is installed
shell: 'if lspci | grep -q "3D controller: NVIDIA"; then echo True; else echo False; fi;' shell: 'if lspci | grep -q "3D controller: NVIDIA"; then echo True; else echo False; fi;'
when: ansible_os_family == "Debian" when: ansible_os_family == "Debian"
check_mode: no
register: nvidia_gpu_found register: nvidia_gpu_found
- name: Output nvidia_gpu_found - name: Output nvidia_gpu_found
...@@ -84,6 +85,20 @@ ...@@ -84,6 +85,20 @@
chdir: "{{ nvidia_mig_parted_src_dir }}" chdir: "{{ nvidia_mig_parted_src_dir }}"
creates: "{{ nvidia_mig_parted_src_dir }}/nvidia-mig-parted" creates: "{{ nvidia_mig_parted_src_dir }}/nvidia-mig-parted"
- name: install systemd service
command: ./install.sh
args:
chdir: "{{ nvidia_mig_parted_src_dir }}/deployments/systemd/"
creates: "/etc/profile.d/nvidia-mig-parted.sh"
become: true
- name: symlink nividia_mig_parted executeable into usr_sbin # figuring out the correct way to install would be better :(
file:
src: "{{ nvidia_mig_parted_src_dir }}/nvidia-mig-parted"
dest: "/usr/bin/nvidia-mig-parted"
state: link
become: true
- name: Create mig_slurm_discovery dir if it does not exist - name: Create mig_slurm_discovery dir if it does not exist
file: file:
path: "{{ nvidia_mig_slurm_discovery_src_dir }}" path: "{{ nvidia_mig_slurm_discovery_src_dir }}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment