From 6d609d378d37a7afbf306139b403913c93cc3f2e Mon Sep 17 00:00:00 2001
From: handreas <andreas.hamacher@monash.edu>
Date: Thu, 13 Feb 2020 22:21:52 +0000
Subject: [PATCH] avoid errors with sacctmgr

Former-commit-id: 89125730155ca9dfcc4f0b10d10dffeec87f579d
---
 CICD/heat/server_rebuild.sh      | 62 ++++++++++++++++++++++++++++++++
 roles/slurm-start/tasks/main.yml |  3 +-
 2 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100755 CICD/heat/server_rebuild.sh

diff --git a/CICD/heat/server_rebuild.sh b/CICD/heat/server_rebuild.sh
new file mode 100755
index 00000000..2e9aa620
--- /dev/null
+++ b/CICD/heat/server_rebuild.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+REBOOT_TIME=90
+
+NODETYPES=${@}
+
+# make sure you define variable STACKNAME in current environment
+if [[ -z $STACKNAME ]]; then
+    echo "please define STACKNAME variable"  
+    exit 1
+fi
+
+# prepend CICD to stack name
+if [[ "$STACKNAME" == "CICD"* ]]; then
+  echo "CICD found in stackname. doing nothing"
+else
+  STACKNAME="CICD"$STACKNAME
+fi
+
+function check_stack_exists {
+    if openstack stack list | grep -w $STACKNAME;
+        then 
+            echo "stack found";
+        else 
+            echo "stack not found";
+            return 1
+    fi
+}
+
+if ! check_stack_exists
+then
+    exit 0
+fi
+
+# return and dictionary in json format, which map server name to ansible_host_groups. There will be IndexError if the servers, which names are prefixed by STACKNAME, don't have ansible_host_groups property
+host_group_mapping=$(openstack server list --long -f json | python3 -c "import json,sys,re;ivt=json.load(sys.stdin);json.dump({i['Name']: re.search('ansible_host_groups\=\'\[(.+)\]\'', i['Properties']).groups()[0].replace('\"', '').split() for i in ivt if re.match('$STACKNAME',i['Name'])}, fp=sys.stdout)")
+
+# all available ansible_host_groups
+available_groups=$(echo $host_group_mapping |  python3 -c "import json,sys;mapping=json.load(sys.stdin);output=[];[output.extend(v) for v in mapping.values()];print(output)" | tr -d "[',]")
+
+# if the first input parameter is all then rebuild all groups
+if [[ "$1" == "all" ]]; then
+    NODETYPES=$available_groups
+fi
+echo "going to update group $NODETYPES"
+server_list=$(echo $host_group_mapping | python3 -c "import json,sys;mapping=json.load(sys.stdin);avail_groups='$available_groups'.split();print([k for k in mapping.keys() if len(set(mapping[k]).intersection(set(avail_groups))) > 0])" | tr -d "[,]'" | xargs -n1 | sort -u)
+rebuild_func(){
+    echo "rebuilding server $1"
+    openstack server rebuild --wait $1
+}
+
+# for eaech line in data
+for server in $server_list
+do
+rebuild_func $server & # run parallel in background
+done
+wait # wait for all servers to be rebuild
+# add an extra time for reboot
+echo "waiting for reboot"
+sleep $REBOOT_TIME
+echo "All done"
+
diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml
index 5bd124c0..df512513 100644
--- a/roles/slurm-start/tasks/main.yml
+++ b/roles/slurm-start/tasks/main.yml
@@ -64,9 +64,8 @@
 
 
 - name: "create cluster in slurm db"
-  shell:  "{{slurm_dir}}/bin/sacctmgr -i create cluster {{ clustername }}"
+  shell:  sacctmgr show assoc format=Cluster -Pn | grep -q {{ clustername }} && {{ slurm_dir }}/bin/sacctmgr -i create cluster {{ clustername }}
   become: true
-  ignore_errors: true
 
 - name: start slurmctl
   service: name=slurmctld state=restarted enabled=no
-- 
GitLab