diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 24420a8b0ca9a6ba42e250509625b61b9d34d901..7c00089ebe4a657ca2881c13807c0a8bd21c6b25 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,20 +7,12 @@ variables: stages: - lint - - extended - #- heat_test - - heat - openstack_create - - ansible_create_cluster_stage - - push_button_spawn_cluster + - configure_cluster - tests - integration_test #https://docs.gitlab.com/ee/ci/triggers/ - openstack_destroy - #- clean - #- testlustre - #- clean_testlustre - trigger_pipeline_in_Clusterbuild: stage: integration_test @@ -67,7 +59,7 @@ ansiblelint: - python3 ansiblelint/run_lint.py --targets ../maintenance.yml -build_cluster_cicd: +build_infra: stage: openstack_create image: ubuntu allow_failure: false @@ -75,8 +67,6 @@ build_cluster_cicd: - heat before_script: - cd $DEFAULT_PATH - - echo "$GC_KEY" > gc_key.pem - - chmod 400 gc_key.pem - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh script: - whoami @@ -84,8 +74,8 @@ build_cluster_cicd: - apt -y upgrade - apt -y install openssh-client python3-pip - pip3 install joblib python-cinderclient python-keystoneclient python-novaclient python-openstackclient PyYAML ansible - - echo "create stage" - source ./$NECTAR_ALLOCATION-openrc.sh + - echo "Generating a random SSH key for our cluster" - ssh-keygen -f tmp_key -N "" -t ed25519 - export SSH_CA=`cat tmp_key.pub` - cd infra @@ -93,30 +83,22 @@ build_cluster_cicd: - ansible-playbook os_create.yml - python3 ./make_inventory.py $CI_PROJECT_NAME-ci-$CI_COMMIT_REF_NAME > ../inventory.yml - cd .. - #- ssh-keygen -R `cat ssh.cfg | grep Proxy | cut -f 2 -d "@"` - #- ssh `cat ssh.cfg | grep Proxy | cut -f 2 -d "="` -o StrictHostKeyChecking=accept-new exit 0 - - #- bash -x ./heat/heatcicdwrapper.sh create_or_update $STACKNAME - #- bash -x ./heat/server_rebuild.sh all - after_script: - - sleep 30 # artifically wait a bit to give the nodes time to boot artifacts: paths: - CICD/inventory.yml - CICD/tmp_key - CICD/tmp_key.pub - CICD/infra/os_vars.yml -# only: -# changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges -# - "heat/*HOT*.yaml" -# - schedules -# - ./.gitlab-ci.yml -ansible_create_cluster_stage: - stage: ansible_create_cluster_stage +configure_cluster: + stage: configure_cluster image: ubuntu tags: - ansible + artifacts: + paths: + - CICD/files + - CICD/vars script: - whoami - apt update @@ -124,147 +106,56 @@ ansible_create_cluster_stage: - apt -y install openssh-client python3-pip - pip3 install joblib python-cinderclient python-keystoneclient python-novaclient python-openstackclient PyYAML ansible - cd CICD - - pwd - python3 ./make_files.py ./inventory.yml ./infra/os_vars.yml ./vars/versions.yml - #- ssh-keygen -R `cat ssh.cfg | grep Proxy | cut -f 2 -d "@"` - mkdir -p ~/.ssh - ssh-keyscan -H `cat ssh.cfg | grep Proxy | cut -f 2 -d "@"` >> ~/.ssh/known_hosts - eval `ssh-agent` - ssh-add ./tmp_key - - ls - - ssh-add -L - - cat ansible.cfg - - cat ssh.cfg - ssh -vvv `cat ssh.cfg | grep Proxy | cut -f 2 -d "="` exit 0 - export ANSIBLE_HOST_KEY_CHECKING=False - export ANSIBLE_CONFIG=`pwd`/ansible.cfg - ansible -i inventory.yml -m ping 'all' - ansible-playbook -i inventory.yml upgrade_and_reboot.yml || true - ansible-playbook -i inventory.yml master_playbook.yml - - sleep 15 - - echo uglyuglyfix - - ansible -i inventory.yml -b -a "systemctl restart slurmdbd" ManagementNodes - - ansible -i inventory.yml -b -a "systemctl restart slurmctld" ManagementNodes - # - cd plays - # - ansible-playbook -i files/inventory.$STACKNAME --key-file ../../gc_key.pem --skip-tags monitoring computenodes.yml | tee nochange.log - # - echo [ `grep changed= ./nochange.log -c` = `grep changed=0 ./nochange.log -c` ] > bashtest.sh # a crude way to make sure all changed lines are equal to changed=0 - # - bash ./bashtest.sh - # - ansible-playbook -i files/inventory.$STACKNAME --key-file ../../gc_key.pem --skip-tags monitoring --check computenodes.yml + # I don't think this is necessary any more + #- ansible -i inventory.yml -b -a "systemctl restart slurmdbd" ManagementNodes + #- ansible -i inventory.yml -b -a "systemctl restart slurmctld" ManagementNodes tests: stage: tests tags: - ansible before_script: - - echo "$GC_KEY" > gc_key.pem - - chmod 400 gc_key.pem - - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh - script: - - echo "tests stage" - - source ./$NECTAR_ALLOCATION-openrc.sh - - openstack stack list + - whoami + - apt update + - apt -y upgrade + - apt -y install openssh-client python3-pip + - pip3 install joblib python-cinderclient python-keystoneclient python-novaclient python-openstackclient PyYAML ansible - cd CICD + - mkdir -p ~/.ssh + - ssh-keyscan -H `cat ssh.cfg | grep Proxy | cut -f 2 -d "@"` >> ~/.ssh/known_hosts + - eval `ssh-agent` + - ssh-add ./tmp_key + - export ANSIBLE_HOST_KEY_CHECKING=False - export ANSIBLE_CONFIG=`pwd`/ansible.cfg - - python3 ../scripts/make_inventory.py static $STACKNAME | tee ./files/inventory.$STACKNAME && chmod 755 ./files/inventory.$STACKNAME - - grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME #fail if inventory file is empty - - ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all - - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all - - echo -e '[defaults]\r\nallow_world_readable_tmpfiles = True' > ansible.cfg - - ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem ./tests/mockSlurmData.yml - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "/opt/slurm-latest/bin/sinfo" ManagementNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "/opt/slurm-latest/bin/squeue" ManagementNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "/opt/slurm-latest/bin/scontrol ping" LoginNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet ntpd" CentosNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet ntp" UbuntuNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes - - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes - - - bash -e ./tests/run_tests.sh all "files/inventory.$STACKNAME" "../gc_key.pem" - - bash -e ./tests/run_tests.sh ComputeNodes "files/inventory.$STACKNAME" "../gc_key.pem" - - bash -e ./tests/run_tests.sh LoginNodes "files/inventory.$STACKNAME" "../gc_key.pem" - - bash -e ./tests/run_tests.sh ManagementNodes "files/inventory.$STACKNAME" "../gc_key.pem" - - bash -e ./tests/run_tests.sh NFSNodes "files/inventory.$STACKNAME" "../gc_key.pem" - - bash -e ./tests/run_tests.sh SQLNodes "files/inventory.$STACKNAME" "../gc_key.pem" + script: + - ansible-playbook -i inventory.yml ./tests/mockSlurmData.yml + - ansible -B 30 -i inventory.yml -a "/opt/slurm-latest/bin/sinfo" ManagementNodes + - ansible -B 30 -i inventory.yml -a "/opt/slurm-latest/bin/squeue" ManagementNodes + - ansible -B 30 -i inventory.yml -a "/opt/slurm-latest/bin/scontrol ping" LoginNodes + - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet ntpd" CentosNodes + - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet ntp" UbuntuNodes + - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet mariadb" SQLNodes + - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet slurmctld" ManagementNodes + - ansible -B 30 -i inventory.yml -a "systemctl is-active --quiet slurmdbd" ManagementNodes + + - bash -e ./tests/run_tests.sh all "inventory.yml" "../gc_key.pem" + - bash -e ./tests/run_tests.sh ComputeNodes "inventory.yml" "../gc_key.pem" + - bash -e ./tests/run_tests.sh LoginNodes "inventory.yml" "../gc_key.pem" + - bash -e ./tests/run_tests.sh ManagementNodes "inventory.yml" "../gc_key.pem" + - bash -e ./tests/run_tests.sh NFSNodes "inventory.yml" "../gc_key.pem" + - bash -e ./tests/run_tests.sh SQLNodes "inventory.yml" "../gc_key.pem" # Note to self: deactivated because it is broken. please fix it again - bash -e ./tests/run_tests.sh slurm "files/inventory.$STACKNAME" "../gc_key.pem" - - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a 'sudo su - user1 -c whoami' LoginNodes,ComputeNodes # to test ldap + - ansible -i inventory.yml -a 'sudo su - user1 -c whoami' LoginNodes,ComputeNodes # to test ldap #- sshpass -p 'redhat' ssh -o StrictHostKeyChecking=no user1@server.example.com -extended: - stage: extended - tags: - - heat - - ansible - before_script: - - echo "cleanup stack" - - sleep 30 - - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh - script: - - source ./$NECTAR_ALLOCATION-openrc.sh - - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME ${CI_PROJECT_NAME} - only: - variables: - - $EXTENDED != null - - -manual_cluster_spawn: - stage: push_button_spawn_cluster - tags: - - heat - - ansible - before_script: - - echo "press button spawn cluster." - - echo "for this to work you have to provide a variable called manual stackname" - - echo I still need to handle os password - - echo $MANUAL_STACKNAME - - echo "$GC_KEY" > gc_key.pem - - chmod 400 gc_key.pem - - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh - script: - - source ./$NECTAR_ALLOCATION-openrc.sh - - bash -x ./CICD/heat/heatcicdwrapper.sh create $MANUAL_STACKNAME ${CI_PROJECT_NAME} - - openstack stack list - - export STACKNAME=$MANUAL_STACKNAME - - sleep 25 - - bash -x CICD/ansible_create_cluster_script.sh - when: manual - only: - refs: - - "cicd" - -# heat_test: -# stage: heat_test -# allow_failure: false -# tags: -# - heat -# before_script: -# - echo "$GC_KEY" > gc_key.pem -# - chmod 400 gc_key.pem -# - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh -# - source ./$NECTAR_ALLOCATION-openrc.sh -# - export HEAT_TEST_STACKNAME=_TESTING_HEAT -# - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME -# - sleep 60 -# script: -# - echo "heat_test stage" -# - source ./$NECTAR_ALLOCATION-openrc.sh -# - bash -x ./CICD/heat/heatcicdwrapper.sh create $HEAT_TEST_STACKNAME -# - openstack stack list -# - bash -x ./CICD/heat/heatcicdwrapper.sh update $HEAT_TEST_STACKNAME -# - openstack stack list -# - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME -# - openstack stack list -# after_script: -# - sleep 20 # artifically wait a bit to make sure it is really dead - -#clean: -# stage: clean -# tags: -# - heat -# before_script: -# - echo "cleanup stack" -# - sleep 30 -# - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh -# script: -# - source ./$NECTAR_ALLOCATION-openrc.sh -# - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME