From 5e020e88ec7a75cfb0f5811186f77327c144edbd Mon Sep 17 00:00:00 2001 From: handreas <andreas.hamacher@monash.edu> Date: Wed, 15 Apr 2020 02:22:10 +0000 Subject: [PATCH] k2 not k1 and ubuntu fixes Former-commit-id: 81dc6d671571dc30421b5597a8ef24bb9bf9e67d --- CICD/heat/gc_HOT.yaml | 6 ++--- CICD/heat/gc_secgroups.hot | 9 ++++--- CICD/plays/computenodes.yml | 2 +- .../files/scripts/nvidia-xconf-gen.py | 24 +++++++++---------- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/CICD/heat/gc_HOT.yaml b/CICD/heat/gc_HOT.yaml index a57d7d61..4502852f 100644 --- a/CICD/heat/gc_HOT.yaml +++ b/CICD/heat/gc_HOT.yaml @@ -69,12 +69,12 @@ resources: list_join: [ '-', [ { get_param: "OS::stack_name" }, 'sql0' ]] availability_zone: { get_param: avz } flavor: m3.xsmall - image: { get_param: centos_7_image_id } + image: { get_param: ubuntu_1804_image_id } key_name: { get_param: ssh_key } security_groups: [ { get_param: SSHMonashSecGroupID }, { get_param: SlurmSecGroupID }, { get_param: MySQLSecGroupID }, { get_param: NFSSecGroupID } ] metadata: ansible_host_groups: [ SQLNodes, NFSNodes ] - ansible_ssh_user: ec2-user + ansible_ssh_user: ubuntu project_name: { get_param: project_name } networks: - network: { get_param: NetID } @@ -267,7 +267,7 @@ resources: list_join: [ '-', [ { get_param: "OS::stack_name" }, 'gpudesktopu%index%' ]] security_groups: [ default, { get_param: SSHMonashSecGroupID }, { get_param: SlurmSecGroupID }, { get_param: NFSSecGroupID } ] metadata: - ansible_host_groups: [ DesktopNodes, GPU, ComputeNodes, K1Nodes, VisNodes ] + ansible_host_groups: [ DesktopNodes, GPU, ComputeNodes, VisNodes ] ansible_ssh_user: ubuntu project_name: { get_param: project_name } networks: diff --git a/CICD/heat/gc_secgroups.hot b/CICD/heat/gc_secgroups.hot index ad6e7790..fe7d4b35 100644 --- a/CICD/heat/gc_secgroups.hot +++ b/CICD/heat/gc_secgroups.hot @@ -10,13 +10,16 @@ resources: name: "heatslurmsecgroup" rules: [ { protocol: tcp, port_range_min: 12000, - port_range_max: 12999}, + port_range_max: 12999, + remote_mode: "remote_group_id"}, { protocol: tcp, port_range_min: 6817, - port_range_max: 6819}, + port_range_max: 6819, + remote_mode: "remote_group_id"}, { protocol: tcp, port_range_min: 1019, - port_range_max: 1019}] + port_range_max: 1019, + remote_mode: "remote_group_id"}] NFSSecGroup: type: "OS::Neutron::SecurityGroup" properties: diff --git a/CICD/plays/computenodes.yml b/CICD/plays/computenodes.yml index aec7acba..84baba7c 100644 --- a/CICD/plays/computenodes.yml +++ b/CICD/plays/computenodes.yml @@ -96,7 +96,7 @@ - hosts: 'VisNodes' roles: - - { role: systemd-nvidia-uvm, tags: [ uvm ] } + - { role: systemd-nvidia-uvm, tags: [ uvm,SiteSpecific ] } - hosts: 'VisNodes' roles: diff --git a/roles/deploy-xorg/files/scripts/nvidia-xconf-gen.py b/roles/deploy-xorg/files/scripts/nvidia-xconf-gen.py index f2644697..337414bd 100755 --- a/roles/deploy-xorg/files/scripts/nvidia-xconf-gen.py +++ b/roles/deploy-xorg/files/scripts/nvidia-xconf-gen.py @@ -11,14 +11,18 @@ from subprocess import call import re import json -def grab_card_ids(): - # This method runs nvidia-smi to grab the card ids, then returns a list - - if not os.path.isfile("/bin/nvidia-smi"): +def getNvidia_smi_path(): + if os.path.isfile("/bin/nvidia-smi"): + return "/bin/nvidia-smi" + elif os.path.isfile("/usr/bin/nvidia-smi"): + return "/usr/bin/nvidia-smi" + else: print("nvidia-smi binary not found!") - exit(1) + exit(1) - cmd = ["/bin/nvidia-smi", "--query-gpu=pci.bus_id","--format=csv,noheader"] +def grab_card_ids(): + # This method runs nvidia-smi to grab the card ids, then returns a list + cmd = [getNvidia_smi_path(), "--query-gpu=pci.bus_id","--format=csv,noheader"] p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) cards = [] @@ -27,15 +31,11 @@ def grab_card_ids(): line = line.rstrip().split(":")[2] pcibus_num = int(re.sub('[.:]', '', line).rstrip("0"),16) card = "PCI:0:{}:0".format(str(pcibus_num)) - cards.append(card) + cards.append(card) return cards def grab_card_boardname(): - if not os.path.isfile("/bin/nvidia-smi"): - print("nvidia-smi binary not found!") - exit(1) - - cmd = ["/bin/nvidia-smi", "--query-gpu=name","--format=csv,noheader"] + cmd = [getNvidia_smi_path(), "--query-gpu=name","--format=csv,noheader"] cards = [] p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout.readlines(): -- GitLab