diff --git a/roles/nvidia_mig_configure/tasks/main.yml b/roles/nvidia_mig_configure/tasks/main.yml index 87bca3b74750ea32807a7b6d3c7c07d45ad64902..2ec82741afed4c893a199594522d6112ba3568ec 100644 --- a/roles/nvidia_mig_configure/tasks/main.yml +++ b/roles/nvidia_mig_configure/tasks/main.yml @@ -1,5 +1,6 @@ - name: Check if an nvidia GPU is installed shell: 'if lspci | grep -q "3D controller: NVIDIA"; then echo True; else echo False; fi;' + check_mode: no when: ansible_os_family == "Debian" register: nvidia_gpu_found @@ -9,6 +10,7 @@ - name: Check if a MIG supported GPU exists. Unsupport GPUs return [N/A], supported return 'Enabled' or 'Disabled' shell: "if nvidia-smi -i 0 --query-gpu=pci.bus_id,mig.mode.current --format=csv,noheader | grep -q '[N/A]'; then echo False; else echo True; fi;" + check_mode: no register: mig_GPU when: ansible_os_family == "Debian" and nvidia_gpu_found.stdout == "True" @@ -28,22 +30,26 @@ - name: install mig_config.yml copy: src: files/mig_config.yml - dest: "{{ nvidia_mig_parted_src_dir }}/mig_config.yml" + dest: "/etc/nvidia-mig-manager/config.yaml" + become: true - - name: assert mig config - command: "./nvidia-mig-parted assert -f {{ mig_config }} -c {{ mig_setting }}" - args: - chdir: "{{ nvidia_mig_parted_src_dir }}" + - name: replace mig setting to apply in systemd service config + replace: + path: "/etc/systemd/system/nvidia-mig-manager.service.d/override.conf" + regexp: 'Environment="MIG_PARTED_SELECTED_CONFIG=all-disabled"' + replace: 'Environment="MIG_PARTED_SELECTED_CONFIG={{ mig_setting }}"' become: true - check_mode: no - changed_when: false - ignore_errors: true - register: mig_state_assert - - name: apply mig config - command: "./nvidia-mig-parted apply -f {{ mig_config }} -c {{ mig_setting }}" - args: - chdir: "{{ nvidia_mig_parted_src_dir }}" + - name: reload systemd + ansible.builtin.systemd: + daemon_reload: yes + become: true + + - name: start mig service + service: + name: nvidia-mig-manager + state: restarted + enabled: yes become: true - name: generate mig config diff --git a/roles/nvidia_mig_tools/tasks/main.yml b/roles/nvidia_mig_tools/tasks/main.yml index 6537826ea2dac9082bfaff09cb48a747c6d93f63..39bf3e063ab6eb6e99ee5f2baa734355e6bd208a 100644 --- a/roles/nvidia_mig_tools/tasks/main.yml +++ b/roles/nvidia_mig_tools/tasks/main.yml @@ -1,6 +1,7 @@ - name: Check if an nvidia GPU is installed shell: 'if lspci | grep -q "3D controller: NVIDIA"; then echo True; else echo False; fi;' when: ansible_os_family == "Debian" + check_mode: no register: nvidia_gpu_found - name: Output nvidia_gpu_found @@ -84,6 +85,20 @@ chdir: "{{ nvidia_mig_parted_src_dir }}" creates: "{{ nvidia_mig_parted_src_dir }}/nvidia-mig-parted" + - name: install systemd service + command: ./install.sh + args: + chdir: "{{ nvidia_mig_parted_src_dir }}/deployments/systemd/" + creates: "/etc/profile.d/nvidia-mig-parted.sh" + become: true + + - name: symlink nividia_mig_parted executeable into usr_sbin # figuring out the correct way to install would be better :( + file: + src: "{{ nvidia_mig_parted_src_dir }}/nvidia-mig-parted" + dest: "/usr/bin/nvidia-mig-parted" + state: link + become: true + - name: Create mig_slurm_discovery dir if it does not exist file: path: "{{ nvidia_mig_slurm_discovery_src_dir }}"