diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1f974383314e5a0c373650570161e1a47ea3d88d..622235df4b8789b35f4329c676a6c3d5d80c9e3e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -13,27 +13,9 @@ stages: - ansible_create_cluster_stage - push_button_spawn_cluster - tests - - integration_test #https://docs.gitlab.com/ee/ci/triggers/ - - clean + -trigger_pipeline_in_Clusterbuild: - stage: integration_test - tags: - - ansible - script: - - echo ${CI_JOB_TOKEN} - - curl --request POST --form token=${CI_JOB_TOKEN} --form "variables[TRIGGER_CI_COMMIT_SHA]=${CI_COMMIT_SHA}" --form ref=master https://gitlab.erc.monash.edu.au/api/v4/projects/193/trigger/pipeline # ID is from clusterbuild - - -trigger_pipeline_in_monarch: - stage: integration_test - tags: - - ansible - script: - - echo ${CI_JOB_TOKEN} - - curl --request POST --form token=${CI_JOB_TOKEN} --form "variables[TRIGGER_CI_COMMIT_SHA]=${CI_COMMIT_SHA}" --form ref=master https://gitlab.erc.monash.edu.au/api/v4/projects/385/trigger/pipeline # ID is from monarch - yamllint: stage: lint @@ -45,21 +27,6 @@ yamllint: - cd CICD - yamllint -c ./.yamllintheat.yaml ./heat -# delete_stack_manual: -# stage: delete_stack_manual -# tags: -# - heat -# before_script: -# - echo "$GC_KEY" > gc_key.pem -# - chmod 400 gc_key.pem -# - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh -# script: -# - echo "heat stage" -# - source ./$NECTAR_ALLOCATION-openrc.sh -# - openstack stack list -# - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME -# when: manual - ansiblelint: allow_failure: true stage: lint @@ -148,6 +115,43 @@ tests: - bash -e ./tests/run_tests.sh NFSNodes "files/inventory.$STACKNAME" "../gc_key.pem" - bash -e ./tests/run_tests.sh SQLNodes "files/inventory.$STACKNAME" "../gc_key.pem" - bash -e ./tests/run_tests.sh slurm "files/inventory.$STACKNAME" "../gc_key.pem" + - cd plays + - ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem --skip-tags monitoring computenodes.yml | tee nochange.log + - [ `grep changed= ./nochange.log -c` = `grep changed=0 ./nochange.log -c` ] # a crude way to make sure all changed lines are equal to changed=0 + +extended: + stage: extended + tags: + - heat + - ansible + before_script: + - echo "cleanup stack" + - sleep 30 + - echo "$HPCCICD_openrc" > ./$NECTAR_ALLOCATION-openrc.sh + script: + - source ./$NECTAR_ALLOCATION-openrc.sh + - bash -x ./CICD/heat/heatcicdwrapper.sh delete_if_exists $STACKNAME ${CI_PROJECT_NAME} + only: + variables: + - $EXTENDED != null + + - grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME #fail if inventory file is empty + - ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all + - ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all + # Need to find a better check for sinfo + #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name sinfo -type f" ManagementNodes + #- ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "find /opt/ -name squeue -type f" ManagementNodes + - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes + - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes + - ansible -B 30 -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes + + - bash -e ./tests/run_tests.sh all "files/inventory.$STACKNAME" "../gc_key.pem" + - bash -e ./tests/run_tests.sh ComputeNodes "files/inventory.$STACKNAME" "../gc_key.pem" + - bash -e ./tests/run_tests.sh LoginNodes "files/inventory.$STACKNAME" "../gc_key.pem" + - bash -e ./tests/run_tests.sh ManagementNodes "files/inventory.$STACKNAME" "../gc_key.pem" + - bash -e ./tests/run_tests.sh NFSNodes "files/inventory.$STACKNAME" "../gc_key.pem" + - bash -e ./tests/run_tests.sh SQLNodes "files/inventory.$STACKNAME" "../gc_key.pem" + - bash -e ./tests/run_tests.sh slurm "files/inventory.$STACKNAME" "../gc_key.pem" extended: stage: extended diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml index df0ff262a08d5c63e85f3c0efb4e19082b4be8c2..14eee2dac9ffd693b2988efb861b2327e60f7c20 100644 --- a/roles/slurm-start/tasks/main.yml +++ b/roles/slurm-start/tasks/main.yml @@ -10,7 +10,7 @@ slurmd_enabled: True when: slurmd_enabled is not defined -- name: install slurmdbd init +- name: install slurmdbd initt template: src=slurmdbd.initd.j2 dest=/etc/init.d/slurmdbd mode=755 become: true when: use_systemd is not defined and start_slurmdbd is defined @@ -56,29 +56,35 @@ become: true when: use_systemd is defined and start_slurmdbd is defined and slurmdbd_service_installed.changed +- name: make sure munge is started + service: name=munge state=started enabled=yes + become: true + when: use_systemd is defined and start_slurmdbd is defined + - name: start munge service: name=munge state=restarted enabled=yes become: true + when: use_systemd is defined and ( slurmdbd_service_installed.changed or slurmctld_service_installed.changed or slurmd_service_installed.changed) -- name: start slurmdbd - service: name=slurmdbd state=restarted enabled=no +- name: start slurmdbd + service: name=slurmdbd state=restarted enabled={{ start_slurmdbd }} become: true - when: start_slurmdbd is defined + when: start_slurmdbd is defined and slurmdbd_service_installed.changed -- name: "create cluster in slurm db" - shell: "{{slurm_dir}}/bin/sacctmgr -i create cluster {{ clustername }}" - become: true - ignore_errors: true +#- name: "create cluster in slurm db" +# shell: "{{slurm_dir}}/bin/sacctmgr -i create cluster {{ clustername }}" +# become: true +# ignore_errors: true - name: start slurmctl - service: name=slurmctld state=restarted enabled=no + service: name=slurmctld state=restarted enabled={{ start_slurmctld }} become: true - when: use_systemd is defined and start_slurmctld is defined + when: use_systemd is defined and start_slurmctld is defined and slurmctld_service_installed.changed - name: start slurmd service: name=slurmd state=restarted enabled={{ slurmd_enabled }} become: true - when: use_systemd is defined and start_slurmd is defined + when: use_systemd is defined and start_slurmd is defined and slurmd_service_installed.changed - name: start slurm service: name=slurm state=restarted enabled={{ slurmd_enabled }}