From f36c74b163b9a0a9328d37afba00914536a1e183 Mon Sep 17 00:00:00 2001
From: Chris Hines <chris.hines@monash.edu>
Date: Wed, 13 Oct 2021 14:41:52 +1100
Subject: [PATCH] tweak cicd further

---
 .gitlab-ci.yml | 189 +++++++++++--------------------------------------
 1 file changed, 40 insertions(+), 149 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 24420a8b..7c00089e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -7,20 +7,12 @@ variables:
 
 stages:
   - lint
-  - extended
-  #- heat_test
-  - heat
   - openstack_create
-  - ansible_create_cluster_stage
-  - push_button_spawn_cluster
+  - configure_cluster
   - tests
   - integration_test #https://docs.gitlab.com/ee/ci/triggers/
   - openstack_destroy
 
-  #- clean
-  #- testlustre
-  #- clean_testlustre
-
 
 trigger_pipeline_in_Clusterbuild:
   stage: integration_test
@@ -67,7 +59,7 @@ ansiblelint:
     - python3 ansiblelint/run_lint.py --targets ../maintenance.yml
 
 
-build_cluster_cicd:
+build_infra:
   stage: openstack_create
   image: ubuntu
   allow_failure: false
@@ -75,8 +67,6 @@ build_cluster_cicd:
   - heat
   before_script:
     - cd $DEFAULT_PATH
-    - echo "$GC_KEY" > gc_key.pem
-    - chmod 400 gc_key.pem
     - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
   script:
     - whoami
@@ -84,8 +74,8 @@ build_cluster_cicd:
     - apt -y upgrade
     - apt -y install openssh-client python3-pip
     - pip3 install joblib python-cinderclient python-keystoneclient python-novaclient python-openstackclient PyYAML ansible
-    - echo "create stage"
     - source ./$NECTAR_ALLOCATION-openrc.sh
+    - echo "Generating a random SSH key for our cluster"
     - ssh-keygen -f tmp_key -N "" -t ed25519
     - export SSH_CA=`cat tmp_key.pub`
     - cd infra
@@ -93,30 +83,22 @@ build_cluster_cicd:
     - ansible-playbook os_create.yml
     - python3 ./make_inventory.py $CI_PROJECT_NAME-ci-$CI_COMMIT_REF_NAME > ../inventory.yml
     - cd ..
-      #- ssh-keygen -R `cat ssh.cfg  | grep Proxy | cut -f 2 -d "@"`
-      #- ssh `cat ssh.cfg  | grep Proxy | cut -f 2 -d "="` -o StrictHostKeyChecking=accept-new exit 0
-
-      #- bash -x ./heat/heatcicdwrapper.sh create_or_update $STACKNAME
-    #- bash -x ./heat/server_rebuild.sh all
-  after_script:
-    - sleep 30 # artifically wait a bit to give the nodes time to boot
   artifacts:
     paths:
       - CICD/inventory.yml
       - CICD/tmp_key
       - CICD/tmp_key.pub
       - CICD/infra/os_vars.yml
-#  only:
-#    changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges
-#    - "heat/*HOT*.yaml"
-#    - schedules
-#    - ./.gitlab-ci.yml
 
-ansible_create_cluster_stage:
-  stage: ansible_create_cluster_stage
+configure_cluster:
+  stage: configure_cluster
   image: ubuntu
   tags:
   - ansible
+  artifacts:
+    paths:
+      - CICD/files
+      - CICD/vars
   script:
     - whoami
     - apt update
@@ -124,147 +106,56 @@ ansible_create_cluster_stage:
     - apt -y install openssh-client python3-pip
     - pip3 install joblib python-cinderclient python-keystoneclient python-novaclient python-openstackclient PyYAML ansible
     - cd CICD
-    - pwd
     - python3 ./make_files.py ./inventory.yml ./infra/os_vars.yml ./vars/versions.yml
-    #- ssh-keygen -R `cat ssh.cfg  | grep Proxy | cut -f 2 -d "@"`
     - mkdir -p ~/.ssh
     - ssh-keyscan -H `cat ssh.cfg  | grep Proxy | cut -f 2 -d "@"` >> ~/.ssh/known_hosts
     - eval `ssh-agent`
     - ssh-add ./tmp_key
-    - ls
-    - ssh-add -L
-    - cat ansible.cfg
-    - cat ssh.cfg
     - ssh -vvv `cat ssh.cfg  | grep Proxy | cut -f 2 -d "="` exit 0
     - export ANSIBLE_HOST_KEY_CHECKING=False
     - export ANSIBLE_CONFIG=`pwd`/ansible.cfg
     - ansible -i inventory.yml -m ping 'all'
     - ansible-playbook -i inventory.yml upgrade_and_reboot.yml || true
     - ansible-playbook -i inventory.yml master_playbook.yml
-    - sleep 15
-    - echo uglyuglyfix
-    - ansible -i inventory.yml -b -a "systemctl restart slurmdbd" ManagementNodes
-    - ansible -i inventory.yml -b -a "systemctl restart slurmctld" ManagementNodes
-      #    - cd plays
-      #    - ansible-playbook -i files/inventory.$STACKNAME --key-file ../../gc_key.pem --skip-tags monitoring computenodes.yml | tee nochange.log
-      #    - echo [ `grep changed= ./nochange.log -c` = `grep changed=0 ./nochange.log -c` ] > bashtest.sh   # a crude way to make sure all changed lines are equal to changed=0
-      #    - bash ./bashtest.sh
-      #    - ansible-playbook -i files/inventory.$STACKNAME --key-file ../../gc_key.pem --skip-tags monitoring --check computenodes.yml
+      # I don't think this is necessary any more
+      #- ansible -i inventory.yml -b -a "systemctl restart slurmdbd" ManagementNodes
+      #- ansible -i inventory.yml -b -a "systemctl restart slurmctld" ManagementNodes
 
 tests:
   stage: tests
   tags:
   - ansible
   before_script:
-    - echo "$GC_KEY" > gc_key.pem
-    - chmod 400 gc_key.pem
-    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
-  script:
-    - echo "tests stage"
-    - source ./$NECTAR_ALLOCATION-openrc.sh
-    - openstack stack list
+    - whoami
+    - apt update
+    - apt -y upgrade
+    - apt -y install openssh-client python3-pip
+    - pip3 install joblib python-cinderclient python-keystoneclient python-novaclient python-openstackclient PyYAML ansible
     - cd CICD
+    - mkdir -p ~/.ssh
+    - ssh-keyscan -H `cat ssh.cfg  | grep Proxy | cut -f 2 -d "@"` >> ~/.ssh/known_hosts
+    - eval `ssh-agent`
+    - ssh-add ./tmp_key
+    - export ANSIBLE_HOST_KEY_CHECKING=False
     - export ANSIBLE_CONFIG=`pwd`/ansible.cfg
-    - python3 ../scripts/make_inventory.py static $STACKNAME | tee ./files/inventory.$STACKNAME && chmod 755 ./files/inventory.$STACKNAME
-    - grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME   #fail if inventory file is empty
-    - ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all
-    - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all
-    - echo -e '[defaults]\r\nallow_world_readable_tmpfiles = True' > ansible.cfg
-    - ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem ./tests/mockSlurmData.yml
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "/opt/slurm-latest/bin/sinfo" ManagementNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "/opt/slurm-latest/bin/squeue" ManagementNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "/opt/slurm-latest/bin/scontrol ping" LoginNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet ntpd" CentosNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet ntp" UbuntuNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes
-    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes
-
-    - bash -e ./tests/run_tests.sh all "files/inventory.$STACKNAME" "../gc_key.pem"
-    - bash -e ./tests/run_tests.sh ComputeNodes "files/inventory.$STACKNAME" "../gc_key.pem"
-    - bash -e ./tests/run_tests.sh LoginNodes "files/inventory.$STACKNAME" "../gc_key.pem"
-    - bash -e ./tests/run_tests.sh ManagementNodes "files/inventory.$STACKNAME" "../gc_key.pem"
-    - bash -e ./tests/run_tests.sh NFSNodes "files/inventory.$STACKNAME" "../gc_key.pem"
-    - bash -e ./tests/run_tests.sh SQLNodes "files/inventory.$STACKNAME" "../gc_key.pem"
+  script:
+    - ansible-playbook -i inventory.yml ./tests/mockSlurmData.yml
+    - ansible -B 30 -i inventory.yml -a "/opt/slurm-latest/bin/sinfo" ManagementNodes
+    - ansible -B 30 -i inventory.yml -a "/opt/slurm-latest/bin/squeue" ManagementNodes
+    - ansible -B 30 -i inventory.yml -a "/opt/slurm-latest/bin/scontrol ping" LoginNodes
+    - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet ntpd" CentosNodes
+    - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet ntp" UbuntuNodes
+    - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet mariadb" SQLNodes
+    - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet slurmctld" ManagementNodes
+    - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet slurmdbd" ManagementNodes
+
+    - bash -e ./tests/run_tests.sh all "inventory.yml" "../gc_key.pem"
+    - bash -e ./tests/run_tests.sh ComputeNodes "inventory.yml" "../gc_key.pem"
+    - bash -e ./tests/run_tests.sh LoginNodes "inventory.yml" "../gc_key.pem"
+    - bash -e ./tests/run_tests.sh ManagementNodes "inventory.yml" "../gc_key.pem"
+    - bash -e ./tests/run_tests.sh NFSNodes "inventory.yml" "../gc_key.pem"
+    - bash -e ./tests/run_tests.sh SQLNodes "inventory.yml" "../gc_key.pem"
     #  Note to self: deactivated because it is broken. please fix it again - bash -e ./tests/run_tests.sh slurm "files/inventory.$STACKNAME" "../gc_key.pem"
-    - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a 'sudo su - user1 -c whoami' LoginNodes,ComputeNodes  # to test ldap
+    - ansible -i inventory.yml -a 'sudo su - user1 -c whoami' LoginNodes,ComputeNodes  # to test ldap
     #- sshpass -p 'redhat' ssh -o StrictHostKeyChecking=no user1@server.example.com
 
-extended:
-  stage: extended
-  tags:
-  - heat
-  - ansible
-  before_script:
-    - echo "cleanup stack"
-    - sleep 30
-    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
-  script:
-    - source ./$NECTAR_ALLOCATION-openrc.sh
-    - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME ${CI_PROJECT_NAME}
-  only:
-    variables:
-      - $EXTENDED != null
-
-
-manual_cluster_spawn:
-  stage: push_button_spawn_cluster
-  tags:
-  - heat
-  - ansible
-  before_script:
-    - echo "press button spawn cluster."
-    - echo "for this to work you have to provide a variable called manual stackname"
-    - echo I still need to handle os password
-    - echo $MANUAL_STACKNAME
-    - echo "$GC_KEY" > gc_key.pem
-    - chmod 400 gc_key.pem
-    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
-  script:
-    - source ./$NECTAR_ALLOCATION-openrc.sh
-    - bash -x ./CICD/heat/heatcicdwrapper.sh create $MANUAL_STACKNAME ${CI_PROJECT_NAME}
-    - openstack stack list
-    - export STACKNAME=$MANUAL_STACKNAME
-    - sleep 25
-    - bash -x CICD/ansible_create_cluster_script.sh
-  when: manual
-  only:
-    refs:
-      - "cicd"
-
-# heat_test:
-#   stage: heat_test
-#   allow_failure: false
-#   tags:
-#   - heat
-#   before_script:
-#     - echo "$GC_KEY" > gc_key.pem
-#     - chmod 400 gc_key.pem
-#     - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
-#     - source ./$NECTAR_ALLOCATION-openrc.sh
-#     - export HEAT_TEST_STACKNAME=_TESTING_HEAT
-#     - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME
-#     - sleep 60
-#   script:
-#     - echo "heat_test stage"
-#     - source ./$NECTAR_ALLOCATION-openrc.sh
-#     - bash -x ./CICD/heat/heatcicdwrapper.sh create $HEAT_TEST_STACKNAME
-#     - openstack stack list
-#     - bash -x ./CICD/heat/heatcicdwrapper.sh update $HEAT_TEST_STACKNAME
-#     - openstack stack list
-#     - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME
-#     - openstack stack list
-#   after_script:
-#     - sleep 20 # artifically wait a bit to make sure it is really dead
-
-#clean:
-#  stage: clean
-#  tags:
-#  - heat
-#  before_script:
-#    - echo "cleanup stack"
-#    - sleep 30
-#    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
-#  script:
-#    - source ./$NECTAR_ALLOCATION-openrc.sh
-#    - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME
-- 
GitLab