From cd14b158e49d3f30d580502bc24ee3ef5b94a50f Mon Sep 17 00:00:00 2001 From: handreas <andreas.hamacher@monash.edu> Date: Tue, 28 Jan 2020 05:45:00 +0000 Subject: [PATCH] moving files from repository cluster_cicd to this one. DIE submodule DIE Former-commit-id: 2e6036f58049ba86e1c43d55049e00b21ee9011b --- .gitignore | 2 + .gitlab-ci.yml | 163 ++++++++++++++ CICD/ansible_create_cluster_script.sh | 22 ++ CICD/files/.gitignore | 1 + CICD/files/etcExports | 4 + CICD/files/etcHosts | 14 ++ CICD/heat/gc_HOT.yaml | 269 +++++++++++++++++++++++ CICD/heat/heatcicdwrapper.sh | 108 +++++++++ CICD/heat/mgmtnode_HOT.yaml | 38 ++++ CICD/heat/resource_registry.yaml | 2 + CICD/master_playbook.yml | 7 + {plays => CICD/plays}/allnodes.yml | 0 {plays => CICD/plays}/computenodes.yml | 2 +- {plays => CICD/plays}/files | 0 {plays => CICD/plays}/init_slurmconf.yml | 0 {plays => CICD/plays}/make_files.yml | 0 {plays => CICD/plays}/mgmtnodes.yml | 0 {plays => CICD/plays}/nfssqlnodes.yml | 0 CICD/plays/roles | 1 + {plays => CICD/plays}/vars | 0 CICD/vars/filesystems.yml | 21 ++ CICD/vars/ldapConfig.yml | 50 +++++ CICD/vars/names.yml | 3 + CICD/vars/passwords.yml | 7 + CICD/vars/slurm.yml | 45 ++++ CICD/vars/vars.yml | 10 + plays/roles | 1 - 27 files changed, 768 insertions(+), 2 deletions(-) create mode 100644 .gitlab-ci.yml create mode 100755 CICD/ansible_create_cluster_script.sh create mode 100644 CICD/files/.gitignore create mode 100644 CICD/files/etcExports create mode 100644 CICD/files/etcHosts create mode 100644 CICD/heat/gc_HOT.yaml create mode 100644 CICD/heat/heatcicdwrapper.sh create mode 100644 CICD/heat/mgmtnode_HOT.yaml create mode 100644 CICD/heat/resource_registry.yaml create mode 100644 CICD/master_playbook.yml rename {plays => CICD/plays}/allnodes.yml (100%) rename {plays => CICD/plays}/computenodes.yml (93%) rename {plays => CICD/plays}/files (100%) rename {plays => CICD/plays}/init_slurmconf.yml (100%) rename {plays => CICD/plays}/make_files.yml (100%) rename {plays => CICD/plays}/mgmtnodes.yml (100%) rename {plays => CICD/plays}/nfssqlnodes.yml (100%) create mode 120000 CICD/plays/roles rename {plays => CICD/plays}/vars (100%) create mode 100644 CICD/vars/filesystems.yml create mode 100644 CICD/vars/ldapConfig.yml create mode 100644 CICD/vars/names.yml create mode 100644 CICD/vars/passwords.yml create mode 100644 CICD/vars/slurm.yml create mode 100644 CICD/vars/vars.yml delete mode 120000 plays/roles diff --git a/.gitignore b/.gitignore index ab476012..153b6a97 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.retry +*-openrc.sh +gc_key.pem diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..3b15d116 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,163 @@ +variables: + GIT_SUBMODULE_STRATEGY: recursive + STACKNAME: _reporef$CI_COMMIT_REF_NAME + NECTAR_ALLOCATION: HPCCICD + +stages: +# - integration_test_downstream # working but unwanted here +# - trigger_pipeline_in_B # working but unwanted here + - yamllint +# - heat + - ansible_create_cluster_stage + - push_button_spawn_cluster +# - e2e + - tests +# - clean # only on master + +#trigger_pipeline_in_B: +# stage: integration_test_downstream +# tags: +# - ansible +# script: +# - "curl --request POST --form token=${CI_JOB_TOKEN} --form ref=master https://gitlab.erc.monash.edu.au/api/v4/projects/1085/trigger/pipeline" # ID is from pysshauthz + +heat_test: + stage: heat + allow_failure: false + tags: + - heat + before_script: + - source ./keys/$NECTAR_ALLOCATION-openrc.sh + - export HEAT_TEST_STACKNAME=_TESTING_HEAT + - bash -x ./heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME + - sleep 60 + script: + - echo "heat_test stage" + - source ./keys/$NECTAR_ALLOCATION-openrc.sh + - bash -x ./heat/heatcicdwrapper.sh create $HEAT_TEST_STACKNAME + - openstack stack list + - bash -x ./heat/heatcicdwrapper.sh update $HEAT_TEST_STACKNAME + - openstack stack list + - bash -x ./heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME + - openstack stack list + after_script: + - sleep 20 # artifically wait a bit to make sure it is really dead + only: + changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges + - "heat/heatcicdwrapper.sh" + - "heat/*.{yml,yaml}" + - schedules + #- CICD/.gitlab-ci.yml + +yamllint: + stage: yamllint + allow_failure: false + tags: + - yamllint + script: + - echo "stage yamllint" + - ansible-lint -x ANSIBLE0002 master_playbook.yml + - yamllint -c ./CICD/.yamllintheat.yaml ./CICD/heat + +build_cluster_cicd: + stage: heat + allow_failure: false + tags: + - heat + script: + - echo "heat stage" + - source ./$NECTAR_ALLOCATION-openrc.sh + - openstack stack list + - bash -x ./CICD/heat/heatcicdwrapper.sh create_or_update $STACKNAME + after_script: + - sleep 20 # artifically wait a bit to give the nodes time to boot + only: + changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges + - "heat/*HOT*.yaml" + - schedules + - ./.gitlab-ci.yml + +ansible_create_cluster_stage: + stage: ansible_create_cluster_stage + tags: + - ansible + script: + - echo "ansible_create_cluster_stage" + - bash -x ./CICD/ansible_create_cluster_script.sh + #after_script: + #- rm ./files/cicd_inventory + #only: + # changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges + # - "master_playbook.yml" + # - "vars/*.{yml,yaml}" + # - schedules + # - CICD/.gitlab-ci.yml + +tests: + stage: tests + tags: + - ansible + script: + - echo "tests stage" + - export ANSIBLE_HOST_KEY_CHECKING=False + - source ./keys/$NECTAR_ALLOCATION-openrc.sh + - python3 dependencies/ansible_cluster_in_a_box/scripts/make_inventory.py static CICD$STACKNAME | tee ./files/cicd_inventory + - grep -qv "I could not find any resouces tagged with project_name:" ./files/cicd_inventory #fail if inventory file is empty + - chmod 755 ./files/cicd_inventory + - chmod 400 ./keys/gc_key.pem + - ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "sinfo" ManagementNodes + - ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "squeue" ManagementNodes + - ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes + - ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes + - ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes + + - bash -e ./tests/run_tests.sh all "files/cicd_inventory" "keys/gc_key.pem" + - bash -e ./tests/run_tests.sh ComputeNodes "files/cicd_inventory" "keys/gc_key.pem" + - bash -e ./tests/run_tests.sh LoginNodes "files/cicd_inventory" "keys/gc_key.pem" + - bash -e ./tests/run_tests.sh ManagementNodes "files/cicd_inventory" "keys/gc_key.pem" + - bash -e ./tests/run_tests.sh NFSNodes "files/cicd_inventory" "keys/gc_key.pem" + - bash -e ./tests/run_tests.sh SQLNodes "files/cicd_inventory" "keys/gc_key.pem" + + # licensing https://gitlab.erc.monash.edu.au/hpc-team/license_server/tree/master/roles/avizo_license_monitor + +manual_cluster_spawn: + stage: push_button_spawn_cluster + tags: + - heat + - ansible + before_script: + - echo "press button spawn cluster." + - echo "for this to work you have to provide a variable called manual stackname" + - echo I still need to handle os password + - echo $MANUAL_STACKNAME + script: + - source ./keys/$NECTAR_ALLOCATION-openrc.sh + - bash -x ./heat/heatcicdwrapper.sh create $MANUAL_STACKNAME + - git submodule update --init --remote --merge + - openstack stack list + - export STACKNAME=$MANUAL_STACKNAME + - sleep 25 + - bash -x CICD/ansible_create_cluster_script.sh + environment: + name: openstack + url: https://dashboard.rc.nectar.org.au/project/instances/ + when: manual + only: + refs: + - "devel" + +clean: + stage: clean + tags: + - heat + before_script: + - echo "cleanup master" + - sleep 30 + script: + - source ./keys/$NECTAR_ALLOCATION-openrc.sh + - bash -x ./heat/heatcicdwrapper.sh delete_if_exists _reporefmaster # hardcoding master stackname on purpose here + only: + refs: + - "master" + changes: + - schedules diff --git a/CICD/ansible_create_cluster_script.sh b/CICD/ansible_create_cluster_script.sh new file mode 100755 index 00000000..bf753fd7 --- /dev/null +++ b/CICD/ansible_create_cluster_script.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -e +export ANSIBLE_HOST_KEY_CHECKING=False + +source ./$NECTAR_ALLOCATION-openrc.sh + +cd CICD + +echo "[ansible_create_cluster_script] Prefixing the Stackname with CICD because heatcicdwrapper does he same" +python3 ../scripts/make_inventory.py static CICD$STACKNAME | tee ./files/inventory.$STACKNAME && chmod 755 ./files/inventory.$STACKNAME +grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME #fail if inventory file is empty +chmod 400 ../gc_key.pem +ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all +ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all + +#cd roles + #- "egrep -lRZ 'sudo: true' . | xargs -0 -l sed -i -e 's/sudo: true/become: true/g' " +#cd .. +ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem master_playbook.yml +sleep 15 +echo uglyuglyfix +ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -b -a "systemctl restart slurmdbd" ManagementNodes \ No newline at end of file diff --git a/CICD/files/.gitignore b/CICD/files/.gitignore new file mode 100644 index 00000000..27b0bac4 --- /dev/null +++ b/CICD/files/.gitignore @@ -0,0 +1 @@ +inventory.* diff --git a/CICD/files/etcExports b/CICD/files/etcExports new file mode 100644 index 00000000..0867fd1b --- /dev/null +++ b/CICD/files/etcExports @@ -0,0 +1,4 @@ +/nfsvol/home *(fsid=1,rw,no_root_squash) +/slurmstate *(fsid=2,rw,no_root_squash) +/nfsvol/projects *(fsid=4,rw,no_root_squash) +/nfsvol/scratch *(fsid=5,rw,no_root_squash) diff --git a/CICD/files/etcHosts b/CICD/files/etcHosts new file mode 100644 index 00000000..19863a99 --- /dev/null +++ b/CICD/files/etcHosts @@ -0,0 +1,14 @@ +127.0.0.1 localhost +::1 ip6-localhost ip6-loopback +fe00::0 ip6-localnet +ff00::0 ip6-mcastprefix +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters +118.138.241.196 hpcldap0.erc.monash.edu.au +118.138.244.7 consistency0 + +118.138.235.28 CICDCICD-mgmt0.massive.org.au CICDCICD-mgmt0 +118.138.233.249 CICDCICD-login0.massive.org.au CICDCICD-login0 +118.138.234.145 CICDCICD-computec0.massive.org.au CICDCICD-computec0 +118.138.234.104 CICDCICD-sql0.massive.org.au CICDCICD-sql0 +118.138.235.60 CICDCICD-mgmt1.massive.org.au CICDCICD-mgmt1 diff --git a/CICD/heat/gc_HOT.yaml b/CICD/heat/gc_HOT.yaml new file mode 100644 index 00000000..618403ce --- /dev/null +++ b/CICD/heat/gc_HOT.yaml @@ -0,0 +1,269 @@ +--- +heat_template_version: 2013-05-23 +description: "A simple template to boot a cluster of desktops (LoginNode, ManagementNodes and Desktop Nodes)" +# avz parameters disabled. they are working but I want just more options than monash-02. I would like to have a parameter that says "I don't care" + +parameters: + ubuntu_1804_image_id: + type: string + label: Image ID + description: Ubuntu Image + default: 99d9449a-084f-4901-8bd8-c04aebd589ca + centos_7_image_id: + type: string + label: Image ID + description: Centos Image + default: 12da1997-5122-4be3-a2a9-2f44961c1b16 + ssh_key: + type: string + default: gc_key + avz: + type: string + default: monash-02 + project_name: + type: string + NetID: + type: string + default: Classic Provider + Flavour: + type: string + default: t3.xsmall + + +resources: + + SlurmSecGroup: + type: "OS::Neutron::SecurityGroup" + properties: + name: "heatslurmsecgroup" + rules: [ { protocol: tcp, + port_range_min: 12000, + port_range_max: 12999}, + { protocol: tcp, + port_range_min: 6817, + port_range_max: 6819}, + { protocol: tcp, + port_range_min: 1019, + port_range_max: 1019}] + NFSSecGroup: + type: "OS::Neutron::SecurityGroup" + properties: + name: "heatnfssecgroup" + rules: [ { protocol: tcp, + port_range_min: 2049, + port_range_max: 2049}, + { protocol: tcp, + port_range_min: 111, + port_range_max: 111}, + { protocol: udp, + port_range_min: 2049, + port_range_max: 2049}, + { protocol: udp, + port_range_min: 111, + port_range_max: 111}] + MySQLSecGroup: + type: "OS::Neutron::SecurityGroup" + properties: + name: "heatmysqlsecgroup" + rules: [ { protocol: tcp, + port_range_min: 3306, + port_range_max: 3306} ] + SSHMonashSecGroup: + type: "OS::Neutron::SecurityGroup" + properties: + name: "SSHMonashSecGroup" + rules: [ { protocol: tcp, + port_range_min: 22, + port_range_max: 22, + direction: ingress, + remote_ip_prefix: 118.138.240.0/21 + } ] +# SSHInternalSecGroup: +# type: "OS::Neutron::SecurityGroup" +# properties: +# name: "SSHInternalSecGroup" +# rules: [ { protocol: tcp, +# port_range_min: 22, +# port_range_max: 22, +# direction: ingress} ] + #remote_ip_prefix: { get_param: REMOTE_IP }, direction: ingress + webaccess: + type: "OS::Neutron::SecurityGroup" + properties: + name: "webaccess" + rules: [ { protocol: tcp, + port_range_min: 80, + port_range_max: 80}, + { protocol: tcp, + port_range_min: 443, + port_range_max: 443} ] + + SQLNode0: + type: "OS::Nova::Server" + properties: + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'sql0' ]] + availability_zone: { get_param: avz } + flavor: t3.small + image: { get_param: centos_7_image_id } + key_name: { get_param: ssh_key } + security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: MySQLSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ SQLNodes, NFSNodes ] + ansible_ssh_user: ec2-user + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + + NFSVolume: + type: OS::Cinder::Volume + properties: + availability_zone: { get_param: avz } + size: 1 + name: nfsvol + NFSVolumeAttachment: + type: "OS::Cinder::VolumeAttachment" + properties: + volume_id: { get_resource: NFSVolume } + instance_uuid: { get_resource: SQLNode0 } + + SLURMSTATEVolume: + type: OS::Cinder::Volume + properties: + availability_zone: { get_param: avz } + size: 1 + name: slurmstate + SLURMSTATEVolumeAttachment: + type: "OS::Cinder::VolumeAttachment" + properties: + volume_id: { get_resource: SLURMSTATEVolume } + instance_uuid: { get_resource: SQLNode0 } + + DBVolume: + type: OS::Cinder::Volume + properties: + availability_zone: { get_param: avz } + size: 512 + name: dbvol + DBVolumeAttachment: + type: "OS::Cinder::VolumeAttachment" + properties: + volume_id: { get_resource: DBVolume } + instance_uuid: { get_resource: SQLNode0 } + + MgmtNodes: + type: "OS::Heat::ResourceGroup" + properties: + count: 2 + resource_def: + type: My::Server::MgmtNode + properties: + #avz: { get_param: avz } + image: { get_param: centos_7_image_id } + ansible_ssh_user: ec2-user + mynodename: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'mgmt%index%' ]] + ssh_key: { get_param: ssh_key } + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup }, { get_resource: MySQLSecGroup } ] + project_name: { get_param: project_name } + + LoginNodes: + type: "OS::Heat::ResourceGroup" + properties: + count: 1 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: t3.small + image: { get_param: centos_7_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'login%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ LoginNodes ] + ansible_ssh_user: ec2-user + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + + DesktopNodes: + type: "OS::Heat::ResourceGroup" + properties: + count: 0 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: t3.small + image: { get_param: centos_7_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'desktopc%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ DesktopNodes, VisNodes, ComputeNodes ] + ansible_ssh_user: ec2-user + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + + ComputeNodes: + type: "OS::Heat::ResourceGroup" + properties: + count: 1 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: t3.small + image: { get_param: centos_7_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'computec%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ ComputeNodes ] + ansible_ssh_user: ec2-user + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + + UbuntuDesktopNodes: + type: "OS::Heat::ResourceGroup" + properties: + count: 0 + resource_def: + type: "OS::Nova::Server" + properties: + #availability_zone: { get_param: avz } + flavor: t3.small + image: { get_param: ubuntu_1804_image_id } + key_name: { get_param: ssh_key } + name: + list_join: [ '-', [ { get_param: "OS::stack_name" }, 'desktopu%index%' ]] + security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ] + metadata: + ansible_host_groups: [ DesktopNodes ] + ansible_ssh_user: ubuntu + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } + +# PySSHauthz: +# type: "OS::Nova::Server" +# properties: +# name: +# list_join: [ '-', [ { get_param: "OS::stack_name" }, 'pysshautz' ]] +# availability_zone: { get_param: avz } +# flavor: t3.xsmall +# image: { get_param: ubuntu_1804_image_id } +# key_name: { get_param: ssh_key } +# security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: webaccess } ] +# metadata: +# ansible_host_groups: [ PySSHauthz ] +# ansible_ssh_user: ubuntu +# project_name: { get_param: project_name } +# networks: +# - network: { get_param: NetID } diff --git a/CICD/heat/heatcicdwrapper.sh b/CICD/heat/heatcicdwrapper.sh new file mode 100644 index 00000000..97ad8fc6 --- /dev/null +++ b/CICD/heat/heatcicdwrapper.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +# This script does not check available ressources on nectar! + + +function usage { + echo $"Usage: $0 {create|update|show|create_or_update,delete_if_exists} STACKNAME" + exit 1 +} + +if [ "$#" -ne 2 ]; then + echo "Illegal number of parameters expecting 2" + usage +fi + +STACKNAME=$2 +STACKNAME="CICD"$STACKNAME + +echo "[heatcicdwrapper] Prefixing Stackname with CICD. This is a safety feature because this script can also delete stacks" + +function check_stack_exists { + if openstack stack list | grep -w $STACKNAME; + then + echo "stack found"; + else + echo "stack not found"; + return 1 + fi +} + + +function func_delete_if_exists { + if ! check_stack_exists + then + exit 0 + fi + openstack stack delete -y --wait $STACKNAME + ret=$? + if [ $ret -ne "0" ] + then + sleep 15 + openstack stack delete -y --wait $STACKNAME + ret=$? + fi + exit $ret +} + +function create_stack { + + if check_stack_exists + then + echo "I will NOT create existing stack maybe use update" + exit -44 + fi + openstack stack create --wait --template ./CICD/heat/gc_HOT.yaml --parameter "project_name=$STACKNAME" -e ./CICD/heat/resource_registry.yaml $STACKNAME + createreturn=$? + if [ $createreturn -ne "0" ] + then + openstack stack delete -y --wait $STACKNAME + echo "creation failed. trying to delete" + exit -47 + fi + exit $createreturn +} + + +case "$1" in + create) + create_stack + ;; + + update) + if ! check_stack_exists + then + echo "I cannot update a stack which does not exist" + exit -45 + fi + openstack stack update --wait --template ./CICD/heat/gc_HOT.yaml --parameter "project_name=$STACKNAME" -e ./CICD/heat/resource_registry.yaml $STACKNAME + ret=$? + exit $ret + ;; + create_or_update) + if check_stack_exists + then + openstack stack update --wait --template ./CICD/heat/gc_HOT.yaml --parameter "project_name=$STACKNAME" -e ./CICD/heat/resource_registry.yaml $STACKNAME + ret=$? + exit $ret + fi + create_stack + + ;; + delete_if_exists) + func_delete_if_exists + + ;; + + + show) + check_stack_exists + echo $? + OUTPUT=$(openstack stack show $STACKNAME| grep -w stack_status) + echo $OUTPUT + ;; + + *) + usage + +esac diff --git a/CICD/heat/mgmtnode_HOT.yaml b/CICD/heat/mgmtnode_HOT.yaml new file mode 100644 index 00000000..d4f9858a --- /dev/null +++ b/CICD/heat/mgmtnode_HOT.yaml @@ -0,0 +1,38 @@ +heat_template_version: 2013-05-23 +parameters: + mynodename: + type: string + ssh_key: + type: string + image: + type: string + #avz: + # type: string + project_name: + type: string + ansible_ssh_user: + type: string + security_groups: + type: json + NetID: + type: string + #default: 915a3d96-693d-4c9d-a2ef-04996ab085d3 + default: Classic Provider + +resources: + + instance: + type: OS::Nova::Server + properties: + #availability_zone: { get_param: avz } + flavor: t3.xsmall + image: { get_param: image } + key_name: { get_param: ssh_key } + security_groups: { get_param: security_groups } + name: { get_param: mynodename } + metadata: + ansible_host_groups: [ ManagementNodes ] + ansible_ssh_user: { get_param: ansible_ssh_user } + project_name: { get_param: project_name } + networks: + - network: { get_param: NetID } diff --git a/CICD/heat/resource_registry.yaml b/CICD/heat/resource_registry.yaml new file mode 100644 index 00000000..0638b887 --- /dev/null +++ b/CICD/heat/resource_registry.yaml @@ -0,0 +1,2 @@ +resource_registry: + My::Server::MgmtNode: mgmtnode_HOT.yaml diff --git a/CICD/master_playbook.yml b/CICD/master_playbook.yml new file mode 100644 index 00000000..04dc7e74 --- /dev/null +++ b/CICD/master_playbook.yml @@ -0,0 +1,7 @@ +--- +- import_playbook: plays/make_files.yml +- import_playbook: plays/allnodes.yml +- import_playbook: plays/init_slurmconf.yml # this requires management nodes +- import_playbook: plays/nfssqlnodes.yml +- import_playbook: plays/mgmtnodes.yml +- import_playbook: plays/computenodes.yml diff --git a/plays/allnodes.yml b/CICD/plays/allnodes.yml similarity index 100% rename from plays/allnodes.yml rename to CICD/plays/allnodes.yml diff --git a/plays/computenodes.yml b/CICD/plays/computenodes.yml similarity index 93% rename from plays/computenodes.yml rename to CICD/plays/computenodes.yml index 208ad954..a43a5a92 100644 --- a/plays/computenodes.yml +++ b/CICD/plays/computenodes.yml @@ -61,4 +61,4 @@ strategy: free roles: - { role: slurm-start, start_slurmd: True, tags: [ slurm, slurmstart ] } - - { role: mate-de-install, tags: [ mate-de-install ] } # TODO this crashes for everything except cmca + #- { role: mate-de-install, tags: [ mate-de-install ] } # TODO this crashes for everything except cmca \ No newline at end of file diff --git a/plays/files b/CICD/plays/files similarity index 100% rename from plays/files rename to CICD/plays/files diff --git a/plays/init_slurmconf.yml b/CICD/plays/init_slurmconf.yml similarity index 100% rename from plays/init_slurmconf.yml rename to CICD/plays/init_slurmconf.yml diff --git a/plays/make_files.yml b/CICD/plays/make_files.yml similarity index 100% rename from plays/make_files.yml rename to CICD/plays/make_files.yml diff --git a/plays/mgmtnodes.yml b/CICD/plays/mgmtnodes.yml similarity index 100% rename from plays/mgmtnodes.yml rename to CICD/plays/mgmtnodes.yml diff --git a/plays/nfssqlnodes.yml b/CICD/plays/nfssqlnodes.yml similarity index 100% rename from plays/nfssqlnodes.yml rename to CICD/plays/nfssqlnodes.yml diff --git a/CICD/plays/roles b/CICD/plays/roles new file mode 120000 index 00000000..b741aa3d --- /dev/null +++ b/CICD/plays/roles @@ -0,0 +1 @@ +../../roles \ No newline at end of file diff --git a/plays/vars b/CICD/plays/vars similarity index 100% rename from plays/vars rename to CICD/plays/vars diff --git a/CICD/vars/filesystems.yml b/CICD/vars/filesystems.yml new file mode 100644 index 00000000..62d91742 --- /dev/null +++ b/CICD/vars/filesystems.yml @@ -0,0 +1,21 @@ +--- +computeNfsMounts: + - { name: '/home', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/home", 'opts': 'defaults,nofail', 'fstype':'nfs4' } + - { name: '/usr/local', ipv4: "118.138.235.37", src: "/usr_local", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } + - { name: '/projects', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/projects", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } + - { name: '/scratch', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/scratch", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } +mgmtNfsMounts: + - { name: '/mnt/home', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/home", 'opts': 'defaults,nofail', 'fstype':'nfs4' } + - { name: '/slurmstate', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/slurmstate", 'opts': 'defaults,nofail', 'fstype':'nfs4' } +dbvolumes: + - { fstype: 'ext4', name: 'dbvol', mntpt: '/dbvol', linkto: '/var/lib/mysql' } +nfsvolumes: + - { fstype: 'ext4', name: 'nfsvol', mntpt: '/nfsvol' } + - { fstype: 'ext4', name: 'slurmstate', mntpt: '/slurmstate' } +exportList: + - { name: '/home', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/home", 'opts': 'defaults,nofail', 'fstype':'nfs4' } + - { name: '/usr/local', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/usr_local_centos7", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } + - { name: '/projects', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/projects", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } + - { name: '/scratch', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/scratch", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } + - { name: '/slurmstate', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/slurmstate", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' } + diff --git a/CICD/vars/ldapConfig.yml b/CICD/vars/ldapConfig.yml new file mode 100644 index 00000000..716826ee --- /dev/null +++ b/CICD/vars/ldapConfig.yml @@ -0,0 +1,50 @@ +--- +ldapServerHostIpLine: "118.138.241.196 hpcldap0.erc.monash.edu.au" +ldapCaCertContents: | + -----BEGIN CERTIFICATE----- + MIIGODCCBCCgAwIBAgIJAJPlOnRdsYibMA0GCSqGSIb3DQEBCwUAMIGoMQswCQYD + VQQGEwJBVTERMA8GA1UECAwIVmljdG9yaWExEDAOBgNVBAcMB0NsYXl0b24xIDAe + BgNVBAoMF01vbmFzaCBlUmVzZWFyY2ggQ2VudGVyMREwDwYDVQQLDAhIUEMgVGVh + bTEeMBwGA1UEAwwVTWVSQyBIUEMgVGVhbSBSb290IENBMR8wHQYJKoZIhvcNAQkB + FhBoZWxwQG1hc3NpdmUub3JnMB4XDTE1MDgxOTAyNDczOFoXDTM1MDgxNDAyNDcz + OFowgagxCzAJBgNVBAYTAkFVMREwDwYDVQQIDAhWaWN0b3JpYTEQMA4GA1UEBwwH + Q2xheXRvbjEgMB4GA1UECgwXTW9uYXNoIGVSZXNlYXJjaCBDZW50ZXIxETAPBgNV + BAsMCEhQQyBUZWFtMR4wHAYDVQQDDBVNZVJDIEhQQyBUZWFtIFJvb3QgQ0ExHzAd + BgkqhkiG9w0BCQEWEGhlbHBAbWFzc2l2ZS5vcmcwggIiMA0GCSqGSIb3DQEBAQUA + A4ICDwAwggIKAoICAQDJxc194E9MGucoutUvmVvT04D6M3S7LlySwQ5XJd4ec22z + csmpoEep+IPVjChVKTN0mRYagAlh5UZ6VYtNA29Lkd4GC5Q2IAlrR9+pgXupuD5v + Qv1pFGEuWEPp5PHn4053gYtdVQ0pZQ7ytkVqSW5TJPNcR9AwHpW7JuQkU1jRGCO0 + t8dthC1msT62UnfjXStznjATm+M253y5PF4IquGb1K6ArR79Os2Ds78NeLyZ24vC + ik2AA6QpzkOZOLzRZLyWn4Gdz/jyblZP/A/zjM83symIdn3dv0wC8A3hZsHP771X + tw2f6uyiXPftiJt0YuPQdw9kdbDda0Dp7UwiTdaUdzBsQYUGuCQhw3T3NurPZu83 + K4ftVnIez9VO+5buJQxX0dc0/w0fwIZVtMesdMt+08x6Cf9nVmDrheArTKYWOq0r + 5eNntg16JAVBixRMwiV+KL4VP/pSKXQK2a9WptzEjVHLSsN0oMAoHkBVz47fSIdD + O79jYak+yvPORMkqd0iwMnt0F+wg9JrMVhhCmU5vdqgwQy60LCHn23IX7x821YTt + inQM43FsvRCAwWabWinn1prPHLpzaeMgE0wSVBtd4CvPqQ0fW5HJjdOjzyKRim8d + 1jN+1opa7CbcM2byfUU0yd1wU4jp5DSeZokV8ECr43pUymcc2dJwmTNApcg92wID + AQABo2MwYTAdBgNVHQ4EFgQUJ4sfHiRrNF3i/yAaV+OnIvfOAwgwHwYDVR0jBBgw + FoAUJ4sfHiRrNF3i/yAaV+OnIvfOAwgwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8B + Af8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBAF/gyOaUKw0AUxfoWfC4+hsD/QFg + h+GvOTrT+xA5Z8qpaPJDJijVQ8zAVFRyUsZZ9ZXe+QkIqP1WXnX0ROeDJ3LRnaoO + Vq/jy1OratWDsoNCvhjY5ZY2eZh2CXQVj40BD6iZJpfgNayDsId7wUKTraBaZ+k4 + NXu65f6objeIx8febnazV7s9C0Ola2fpdv7/JmiiizFMn62codyztA6J9+HXirc5 + Pq+RKVqPvBEWRi2LKAsbOubFklXTwe8cTwmMFUT2BPp6gpwIXtaSOpBQX/Ynthp5 + LRGU/koLZSKAeYIoUPH4pJHe89fpgtOuKBjRlOFdnUjJ90xIh2dyZm3G4JyINwKF + HrdGsu+RunUtE1AfT5S21ilcSjqLvQUfciWEyRcnmAyi/9o7upJlQCNGcPy3l5kJ + VdpRBtmVK08k1S9HtvQvqY82fDEnbxzFOla2uPDQ3sE1LodvY4KUZrA9ML3EUyeG + F5mvvhUOSMkmB8VouE2gt0g4rFXtHL6nHQ7rr1Ha/xcm/dVQY4e4Z43OYEflRkNV + R6VdSNWq3Voh4ASrLfuv4/5Mbt5BnLKvzvnZVeNmJIh2Rc/eYfao1K7K6siAUhP2 + ONklIbbx/WISO5Vchcw65DclkEBZos2KqRoMb/Rxn5sFIvRWgrXvzw39o8agWO0K + 9jGyW0SYdK9x4Qxn + -----END CERTIFICATE----- +ldapCaCertFile: /etc/ssl/certs/cacert.crt +ldapDomain: "erc.monash.edu.au" +ldapURI: "ldaps://hpcldap0.erc.monash.edu.au:636" +ldapROURI: "ldaps://hpcldap1.erc.monash.edu.au:636" +ldapBindDN: "cn=ldapuser,ou=People,dc=erc,dc=monash,dc=edu,dc=au" +ldapBindDNPassword: "ldaprootf3D2o_lL" +ldapManagerDN: "cn=Manager,dc=erc,dc=monash,dc=edu,dc=au" +ldapBase: "dc=erc,dc=monash,dc=edu,dc=au" +ldapGroupBase: "ou=Groups,dc=erc,dc=monash,dc=edu,dc=au" +ldapRfc2307Pam: "" +ldap_access_filter: "(&(objectClass=posixAccount)(memberOf=cn=m3,ou=aclgroups,dc=erc,dc=monash,dc=edu,dc=au))" diff --git a/CICD/vars/names.yml b/CICD/vars/names.yml new file mode 100644 index 00000000..fa706376 --- /dev/null +++ b/CICD/vars/names.yml @@ -0,0 +1,3 @@ +--- +domain: massive.org.au +smtp_smarthost: smtp.monash.edu.au diff --git a/CICD/vars/passwords.yml b/CICD/vars/passwords.yml new file mode 100644 index 00000000..3d9b8430 --- /dev/null +++ b/CICD/vars/passwords.yml @@ -0,0 +1,7 @@ +--- +mungekey: ySdSOpFMyLihx4tQlR0znm07UlvALxB1 +slurmdb_passwd: ySdSOpFMyLihx4tQlR0znm07UlvALxB2 +sqlrootPasswd: ySdSOpFMyLihx4tQlR0znm07UlvALxB3 +sudo_group: systems +default_user_password_clear: ySdSOpFMyLihx4tQlR0znm07UlvALxBL +default_user_password: ySdSOpFMyLihx4tQlR0znm07UlvALxBL diff --git a/CICD/vars/slurm.yml b/CICD/vars/slurm.yml new file mode 100644 index 00000000..65def4d9 --- /dev/null +++ b/CICD/vars/slurm.yml @@ -0,0 +1,45 @@ +--- +desktopNodeList: + - { name : 'DesktopNodes', interface : 'eth0' } +clustername: "m3" +projectname: "m3" +slurm_version: 19.05.3-2 +munge_version: 0.5.11 +nhc_version: 1.4.2 +munge_dir: /opt/munge-{{ munge_version }} +slurm_dir: /opt/slurm-{{ slurm_version }} +nhc_dir: /opt/nhc-{{ nhc_version }} +nhc_config_file: nhc.conf +nhc_log_level: 0 +nhc_emails: nobody@nowhere.nowhere +nhc_email_subject: "Node Health Check" +openmpi_version: 1.8.3 +mysql_host: "{{ groups['SQLNodes'][0] }}" +slurmctrl: "{{ groups['ManagementNodes'][0] }}" +slurmctrlbackup: "{{ groups['ManagementNodes'][1] }}" +slurmdbd: "{{ groups['ManagementNodes'][0] }}" +slurmdbdbackup: "{{ groups['ManagementNodes'][1] }}" +slurm_use_vpn: false +slurm_lua: true +slurmqueues: + - {name: batch, group: ComputeNodes, default: yes} +# - {name: vis, group: DesktopNodes, default: no} +slurmlogin: "{{ groups['LoginNodes'][0] }}" +slurmlogdir: "/var/log" +slurmctlddebug: {level: 5, log: '/mnt/slurm-logs/slurmctld.log'} +slurmddebug: {level: 5, log: '/var/log/slurmd.log'} +slurmschedlog: {level: 5, log: '/mnt/slurm-logs/slurmsched.log'} +slurmdbdlog: {level: 5, log: '/mnt/slurm-logs/slurmdbd.log'} +slurmfairshare: {def: false, val: 10000} +slurmdatadir: "/opt/slurm/var/spool" +slurmstatedir: "/opt/slurm/var/state" +slurmsharedstatedir: "/slurmstate" +slurmpiddir: "/opt/slurm-latest/var/run" +slurmdbdpiddir: "/opt/slurm/var/run" +slurmaccount_create_user: "/usr/local/sbin/slurmuseraccount.sh" +slurm_provision: "/cinderVolume/local/sbin/slurm_provision.sh" +slurmselecttype: "select/linear" +slurmfastschedule: "1" +slurmschedulertype: "sched/backfill" +restartServerList: + - slurm diff --git a/CICD/vars/vars.yml b/CICD/vars/vars.yml new file mode 100644 index 00000000..d1dc95cf --- /dev/null +++ b/CICD/vars/vars.yml @@ -0,0 +1,10 @@ +--- +sudo_group: systems +nagios_home: "/var/lib/nagios" +nvidia_version: "390.46" + +gpumap: + 'K1': 'K1' + 'K80': 'K80' + 'P100-PCIE-16GB': 'P100' + 'V100-PCIE-16GB': 'V100' diff --git a/plays/roles b/plays/roles deleted file mode 120000 index d8c4472c..00000000 --- a/plays/roles +++ /dev/null @@ -1 +0,0 @@ -../roles \ No newline at end of file -- GitLab