From 6d609d378d37a7afbf306139b403913c93cc3f2e Mon Sep 17 00:00:00 2001 From: handreas <andreas.hamacher@monash.edu> Date: Thu, 13 Feb 2020 22:21:52 +0000 Subject: [PATCH] avoid errors with sacctmgr Former-commit-id: 89125730155ca9dfcc4f0b10d10dffeec87f579d --- CICD/heat/server_rebuild.sh | 62 ++++++++++++++++++++++++++++++++ roles/slurm-start/tasks/main.yml | 3 +- 2 files changed, 63 insertions(+), 2 deletions(-) create mode 100755 CICD/heat/server_rebuild.sh diff --git a/CICD/heat/server_rebuild.sh b/CICD/heat/server_rebuild.sh new file mode 100755 index 00000000..2e9aa620 --- /dev/null +++ b/CICD/heat/server_rebuild.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +REBOOT_TIME=90 + +NODETYPES=${@} + +# make sure you define variable STACKNAME in current environment +if [[ -z $STACKNAME ]]; then + echo "please define STACKNAME variable" + exit 1 +fi + +# prepend CICD to stack name +if [[ "$STACKNAME" == "CICD"* ]]; then + echo "CICD found in stackname. doing nothing" +else + STACKNAME="CICD"$STACKNAME +fi + +function check_stack_exists { + if openstack stack list | grep -w $STACKNAME; + then + echo "stack found"; + else + echo "stack not found"; + return 1 + fi +} + +if ! check_stack_exists +then + exit 0 +fi + +# return and dictionary in json format, which map server name to ansible_host_groups. There will be IndexError if the servers, which names are prefixed by STACKNAME, don't have ansible_host_groups property +host_group_mapping=$(openstack server list --long -f json | python3 -c "import json,sys,re;ivt=json.load(sys.stdin);json.dump({i['Name']: re.search('ansible_host_groups\=\'\[(.+)\]\'', i['Properties']).groups()[0].replace('\"', '').split() for i in ivt if re.match('$STACKNAME',i['Name'])}, fp=sys.stdout)") + +# all available ansible_host_groups +available_groups=$(echo $host_group_mapping | python3 -c "import json,sys;mapping=json.load(sys.stdin);output=[];[output.extend(v) for v in mapping.values()];print(output)" | tr -d "[',]") + +# if the first input parameter is all then rebuild all groups +if [[ "$1" == "all" ]]; then + NODETYPES=$available_groups +fi +echo "going to update group $NODETYPES" +server_list=$(echo $host_group_mapping | python3 -c "import json,sys;mapping=json.load(sys.stdin);avail_groups='$available_groups'.split();print([k for k in mapping.keys() if len(set(mapping[k]).intersection(set(avail_groups))) > 0])" | tr -d "[,]'" | xargs -n1 | sort -u) +rebuild_func(){ + echo "rebuilding server $1" + openstack server rebuild --wait $1 +} + +# for eaech line in data +for server in $server_list +do +rebuild_func $server & # run parallel in background +done +wait # wait for all servers to be rebuild +# add an extra time for reboot +echo "waiting for reboot" +sleep $REBOOT_TIME +echo "All done" + diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml index 5bd124c0..df512513 100644 --- a/roles/slurm-start/tasks/main.yml +++ b/roles/slurm-start/tasks/main.yml @@ -64,9 +64,8 @@ - name: "create cluster in slurm db" - shell: "{{slurm_dir}}/bin/sacctmgr -i create cluster {{ clustername }}" + shell: sacctmgr show assoc format=Cluster -Pn | grep -q {{ clustername }} && {{ slurm_dir }}/bin/sacctmgr -i create cluster {{ clustername }} become: true - ignore_errors: true - name: start slurmctl service: name=slurmctld state=restarted enabled=no -- GitLab