.gitlab-ci.yml

variables:
  GIT_SUBMODULE_STRATEGY: recursive
  STACKNAME: CICD_aciab_$CI_COMMIT_REF_NAME
  NECTAR_ALLOCATION: HPCCICD
  ANSIBLE_HOST_KEY_CHECKING: "False"
  DEFAULT_PATH: "CICD"

stages:
  - lint
  - extended
  #- heat_test
  - heat
  - ansible_create_cluster_stage
  - push_button_spawn_cluster
  - tests

  
 

yamllint:
  stage: lint
  allow_failure: true
  tags: 
  - yamllint
  script:
    - echo "stage yamllint"
    - cd CICD
    - yamllint -c ./.yamllintheat.yaml ./heat

ansiblelint:
  allow_failure: true
  stage: lint
  tags: 
  - ansiblelint
  script:
    - echo "stage ansiblelint"
    - cd CICD
    - python3 ansiblelint/run_lint.py --targets master_playbook.yml
    

build_cluster_cicd:
  stage: heat
  allow_failure: false
  tags:
  - heat
  before_script:
    - cd $DEFAULT_PATH
    - echo "$GC_KEY" > gc_key.pem
    - chmod 400 gc_key.pem
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - echo "heat stage"
    - source ./$NECTAR_ALLOCATION-openrc.sh
    - openstack stack list
    - bash -x ./heat/heatcicdwrapper.sh create_or_update $STACKNAME
    - bash -x ./heat/server_rebuild.sh all
  after_script:
    - sleep 30 # artifically wait a bit to give the nodes time to boot
#  only:
#    changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges
#    - "heat/*HOT*.yaml"
#    - schedules
#    - ./.gitlab-ci.yml

ansible_create_cluster_stage:
  stage: ansible_create_cluster_stage
  tags: 
  - ansible
  before_script:
    - cd $DEFAULT_PATH/..
    - echo "$GC_KEY" > gc_key.pem
    - chmod 400 gc_key.pem
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - echo "ansible_create_cluster_stage"
    - bash -x ./CICD/ansible_create_cluster_script.sh
    - cd CICD
    - ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem --skip-tags monitoring master_playbook.yml
    - sleep 15
    - echo uglyuglyfix
    - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -b -a "systemctl restart slurmdbd" ManagementNodes
    - sleep 60
    - echo do it again
    - ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem --skip-tags monitoring master_playbook.yml
    

tests:
  stage: tests
  tags:
  - ansible
  before_script:
    - echo "$GC_KEY" > gc_key.pem
    - chmod 400 gc_key.pem
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - echo "tests stage"
    - source ./$NECTAR_ALLOCATION-openrc.sh
    - openstack stack list
    - cd CICD
    - python3 ../scripts/make_inventory.py static $STACKNAME | tee ./files/inventory.$STACKNAME && chmod 755 ./files/inventory.$STACKNAME
    - grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME   #fail if inventory file is empty
    - ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all
    - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all
    # Need to find a better check for sinfo
    #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name sinfo -type f" ManagementNodes
    #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name squeue -type f" ManagementNodes
    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes
    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes
    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes
    
    - bash -e ./tests/run_tests.sh all "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh ComputeNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh LoginNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh ManagementNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh NFSNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh SQLNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh slurm "files/inventory.$STACKNAME" "../gc_key.pem"
    - cd plays
    - ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem --skip-tags monitoring computenodes.yml | tee nochange.log
    - [ `grep changed= ./nochange.log -c` = `grep changed=0 ./nochange.log -c` ]  # a crude way to make sure all changed lines are equal to changed=0
    
extended:
  stage: extended
  tags:
  - heat
  - ansible
  before_script:
    - echo "cleanup stack"
    - sleep 30
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - source ./$NECTAR_ALLOCATION-openrc.sh
    - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME ${CI_PROJECT_NAME}
  only:
    variables:
      - $EXTENDED != null
  
    - grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME   #fail if inventory file is empty
    - ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all
    - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all
    # Need to find a better check for sinfo
    #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name sinfo -type f" ManagementNodes
    #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name squeue -type f" ManagementNodes
    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes
    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes
    - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes
    
    - bash -e ./tests/run_tests.sh all "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh ComputeNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh LoginNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh ManagementNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh NFSNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh SQLNodes "files/inventory.$STACKNAME" "../gc_key.pem"
    - bash -e ./tests/run_tests.sh slurm "files/inventory.$STACKNAME" "../gc_key.pem"
    
extended:
  stage: extended
  tags:
  - heat
  - ansible
  before_script:
    - echo "cleanup stack"
    - sleep 30
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - source ./$NECTAR_ALLOCATION-openrc.sh
    - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME ${CI_PROJECT_NAME}
  only:
    variables:
      - $EXTENDED != null
  

manual_cluster_spawn:
  stage: push_button_spawn_cluster
  tags:
  - heat
  - ansible
  before_script:
    - echo "press button spawn cluster."
    - echo "for this to work you have to provide a variable called manual stackname"
    - echo I still need to handle os password
    - echo $MANUAL_STACKNAME
    - echo "$GC_KEY" > gc_key.pem
    - chmod 400 gc_key.pem
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - source ./$NECTAR_ALLOCATION-openrc.sh
    - bash -x ./CICD/heat/heatcicdwrapper.sh create $MANUAL_STACKNAME ${CI_PROJECT_NAME}
    - openstack stack list
    - export STACKNAME=$MANUAL_STACKNAME
    - sleep 25
    - bash -x CICD/ansible_create_cluster_script.sh
  when: manual 
  only:
    refs:
      - "cicd"
  
clean:
  stage: clean
  tags:
  - heat
  before_script:
    - echo "cleanup stack"
    - sleep 30
    - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
  script:
    - source ./$NECTAR_ALLOCATION-openrc.sh
    - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME

  
# heat_test:
#   stage: heat_test
#   allow_failure: false
#   tags:
#   - heat
#   before_script:
#     - echo "$GC_KEY" > gc_key.pem
#     - chmod 400 gc_key.pem
#     - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh
#     - source ./$NECTAR_ALLOCATION-openrc.sh
#     - export HEAT_TEST_STACKNAME=_TESTING_HEAT
#     - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME
#     - sleep 60
#   script:
#     - echo "heat_test stage"
#     - source ./$NECTAR_ALLOCATION-openrc.sh
#     - bash -x ./CICD/heat/heatcicdwrapper.sh create $HEAT_TEST_STACKNAME
#     - openstack stack list
#     - bash -x ./CICD/heat/heatcicdwrapper.sh update $HEAT_TEST_STACKNAME
#     - openstack stack list
#     - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME
#     - openstack stack list
#   after_script:
#     - sleep 20 # artifically wait a bit to make sure it is really dead