Skip to content
Snippets Groups Projects
Commit cd14b158 authored by Andreas Hamacher's avatar Andreas Hamacher
Browse files

moving files from repository cluster_cicd to this one. DIE submodule DIE

Former-commit-id: 2e6036f5
parent 3fc1a58f
No related branches found
No related tags found
No related merge requests found
Showing
with 632 additions and 1 deletion
*.retry
*-openrc.sh
gc_key.pem
variables:
GIT_SUBMODULE_STRATEGY: recursive
STACKNAME: _reporef$CI_COMMIT_REF_NAME
NECTAR_ALLOCATION: HPCCICD
stages:
# - integration_test_downstream # working but unwanted here
# - trigger_pipeline_in_B # working but unwanted here
- yamllint
# - heat
- ansible_create_cluster_stage
- push_button_spawn_cluster
# - e2e
- tests
# - clean # only on master
#trigger_pipeline_in_B:
# stage: integration_test_downstream
# tags:
# - ansible
# script:
# - "curl --request POST --form token=${CI_JOB_TOKEN} --form ref=master https://gitlab.erc.monash.edu.au/api/v4/projects/1085/trigger/pipeline" # ID is from pysshauthz
heat_test:
stage: heat
allow_failure: false
tags:
- heat
before_script:
- source ./keys/$NECTAR_ALLOCATION-openrc.sh
- export HEAT_TEST_STACKNAME=_TESTING_HEAT
- bash -x ./heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME
- sleep 60
script:
- echo "heat_test stage"
- source ./keys/$NECTAR_ALLOCATION-openrc.sh
- bash -x ./heat/heatcicdwrapper.sh create $HEAT_TEST_STACKNAME
- openstack stack list
- bash -x ./heat/heatcicdwrapper.sh update $HEAT_TEST_STACKNAME
- openstack stack list
- bash -x ./heat/heatcicdwrapper.sh delete_if_exists $HEAT_TEST_STACKNAME
- openstack stack list
after_script:
- sleep 20 # artifically wait a bit to make sure it is really dead
only:
changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges
- "heat/heatcicdwrapper.sh"
- "heat/*.{yml,yaml}"
- schedules
#- CICD/.gitlab-ci.yml
yamllint:
stage: yamllint
allow_failure: false
tags:
- yamllint
script:
- echo "stage yamllint"
- ansible-lint -x ANSIBLE0002 master_playbook.yml
- yamllint -c ./CICD/.yamllintheat.yaml ./CICD/heat
build_cluster_cicd:
stage: heat
allow_failure: false
tags:
- heat
script:
- echo "heat stage"
- source ./$NECTAR_ALLOCATION-openrc.sh
- openstack stack list
- bash -x ./CICD/heat/heatcicdwrapper.sh create_or_update $STACKNAME
after_script:
- sleep 20 # artifically wait a bit to give the nodes time to boot
only:
changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges
- "heat/*HOT*.yaml"
- schedules
- ./.gitlab-ci.yml
ansible_create_cluster_stage:
stage: ansible_create_cluster_stage
tags:
- ansible
script:
- echo "ansible_create_cluster_stage"
- bash -x ./CICD/ansible_create_cluster_script.sh
#after_script:
#- rm ./files/cicd_inventory
#only:
# changes: #https://docs.gitlab.com/ee/ci/yaml/#onlychangesexceptchanges
# - "master_playbook.yml"
# - "vars/*.{yml,yaml}"
# - schedules
# - CICD/.gitlab-ci.yml
tests:
stage: tests
tags:
- ansible
script:
- echo "tests stage"
- export ANSIBLE_HOST_KEY_CHECKING=False
- source ./keys/$NECTAR_ALLOCATION-openrc.sh
- python3 dependencies/ansible_cluster_in_a_box/scripts/make_inventory.py static CICD$STACKNAME | tee ./files/cicd_inventory
- grep -qv "I could not find any resouces tagged with project_name:" ./files/cicd_inventory #fail if inventory file is empty
- chmod 755 ./files/cicd_inventory
- chmod 400 ./keys/gc_key.pem
- ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "sinfo" ManagementNodes
- ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "squeue" ManagementNodes
- ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "systemctl is-active --quiet mariadb" SQLNodes
- ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "systemctl is-active --quiet slurmctld" ManagementNodes
- ansible -B 30 -i files/cicd_inventory --key-file keys/gc_key.pem -a "systemctl is-active --quiet slurmdbd" ManagementNodes
- bash -e ./tests/run_tests.sh all "files/cicd_inventory" "keys/gc_key.pem"
- bash -e ./tests/run_tests.sh ComputeNodes "files/cicd_inventory" "keys/gc_key.pem"
- bash -e ./tests/run_tests.sh LoginNodes "files/cicd_inventory" "keys/gc_key.pem"
- bash -e ./tests/run_tests.sh ManagementNodes "files/cicd_inventory" "keys/gc_key.pem"
- bash -e ./tests/run_tests.sh NFSNodes "files/cicd_inventory" "keys/gc_key.pem"
- bash -e ./tests/run_tests.sh SQLNodes "files/cicd_inventory" "keys/gc_key.pem"
# licensing https://gitlab.erc.monash.edu.au/hpc-team/license_server/tree/master/roles/avizo_license_monitor
manual_cluster_spawn:
stage: push_button_spawn_cluster
tags:
- heat
- ansible
before_script:
- echo "press button spawn cluster."
- echo "for this to work you have to provide a variable called manual stackname"
- echo I still need to handle os password
- echo $MANUAL_STACKNAME
script:
- source ./keys/$NECTAR_ALLOCATION-openrc.sh
- bash -x ./heat/heatcicdwrapper.sh create $MANUAL_STACKNAME
- git submodule update --init --remote --merge
- openstack stack list
- export STACKNAME=$MANUAL_STACKNAME
- sleep 25
- bash -x CICD/ansible_create_cluster_script.sh
environment:
name: openstack
url: https://dashboard.rc.nectar.org.au/project/instances/
when: manual
only:
refs:
- "devel"
clean:
stage: clean
tags:
- heat
before_script:
- echo "cleanup master"
- sleep 30
script:
- source ./keys/$NECTAR_ALLOCATION-openrc.sh
- bash -x ./heat/heatcicdwrapper.sh delete_if_exists _reporefmaster # hardcoding master stackname on purpose here
only:
refs:
- "master"
changes:
- schedules
#!/bin/bash
set -e
export ANSIBLE_HOST_KEY_CHECKING=False
source ./$NECTAR_ALLOCATION-openrc.sh
cd CICD
echo "[ansible_create_cluster_script] Prefixing the Stackname with CICD because heatcicdwrapper does he same"
python3 ../scripts/make_inventory.py static CICD$STACKNAME | tee ./files/inventory.$STACKNAME && chmod 755 ./files/inventory.$STACKNAME
grep -qv "I could not find any resouces tagged with project_name:" ./files/inventory.$STACKNAME #fail if inventory file is empty
chmod 400 ../gc_key.pem
ansible -m ping -i files/inventory.$STACKNAME --key-file ../gc_key.pem all
ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -a "sudo ls" all
#cd roles
#- "egrep -lRZ 'sudo: true' . | xargs -0 -l sed -i -e 's/sudo: true/become: true/g' "
#cd ..
ansible-playbook -i files/inventory.$STACKNAME --key-file ../gc_key.pem master_playbook.yml
sleep 15
echo uglyuglyfix
ansible -i files/inventory.$STACKNAME --key-file ../gc_key.pem -b -a "systemctl restart slurmdbd" ManagementNodes
\ No newline at end of file
inventory.*
/nfsvol/home *(fsid=1,rw,no_root_squash)
/slurmstate *(fsid=2,rw,no_root_squash)
/nfsvol/projects *(fsid=4,rw,no_root_squash)
/nfsvol/scratch *(fsid=5,rw,no_root_squash)
127.0.0.1 localhost
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
118.138.241.196 hpcldap0.erc.monash.edu.au
118.138.244.7 consistency0
118.138.235.28 CICDCICD-mgmt0.massive.org.au CICDCICD-mgmt0
118.138.233.249 CICDCICD-login0.massive.org.au CICDCICD-login0
118.138.234.145 CICDCICD-computec0.massive.org.au CICDCICD-computec0
118.138.234.104 CICDCICD-sql0.massive.org.au CICDCICD-sql0
118.138.235.60 CICDCICD-mgmt1.massive.org.au CICDCICD-mgmt1
---
heat_template_version: 2013-05-23
description: "A simple template to boot a cluster of desktops (LoginNode, ManagementNodes and Desktop Nodes)"
# avz parameters disabled. they are working but I want just more options than monash-02. I would like to have a parameter that says "I don't care"
parameters:
ubuntu_1804_image_id:
type: string
label: Image ID
description: Ubuntu Image
default: 99d9449a-084f-4901-8bd8-c04aebd589ca
centos_7_image_id:
type: string
label: Image ID
description: Centos Image
default: 12da1997-5122-4be3-a2a9-2f44961c1b16
ssh_key:
type: string
default: gc_key
avz:
type: string
default: monash-02
project_name:
type: string
NetID:
type: string
default: Classic Provider
Flavour:
type: string
default: t3.xsmall
resources:
SlurmSecGroup:
type: "OS::Neutron::SecurityGroup"
properties:
name: "heatslurmsecgroup"
rules: [ { protocol: tcp,
port_range_min: 12000,
port_range_max: 12999},
{ protocol: tcp,
port_range_min: 6817,
port_range_max: 6819},
{ protocol: tcp,
port_range_min: 1019,
port_range_max: 1019}]
NFSSecGroup:
type: "OS::Neutron::SecurityGroup"
properties:
name: "heatnfssecgroup"
rules: [ { protocol: tcp,
port_range_min: 2049,
port_range_max: 2049},
{ protocol: tcp,
port_range_min: 111,
port_range_max: 111},
{ protocol: udp,
port_range_min: 2049,
port_range_max: 2049},
{ protocol: udp,
port_range_min: 111,
port_range_max: 111}]
MySQLSecGroup:
type: "OS::Neutron::SecurityGroup"
properties:
name: "heatmysqlsecgroup"
rules: [ { protocol: tcp,
port_range_min: 3306,
port_range_max: 3306} ]
SSHMonashSecGroup:
type: "OS::Neutron::SecurityGroup"
properties:
name: "SSHMonashSecGroup"
rules: [ { protocol: tcp,
port_range_min: 22,
port_range_max: 22,
direction: ingress,
remote_ip_prefix: 118.138.240.0/21
} ]
# SSHInternalSecGroup:
# type: "OS::Neutron::SecurityGroup"
# properties:
# name: "SSHInternalSecGroup"
# rules: [ { protocol: tcp,
# port_range_min: 22,
# port_range_max: 22,
# direction: ingress} ]
#remote_ip_prefix: { get_param: REMOTE_IP }, direction: ingress
webaccess:
type: "OS::Neutron::SecurityGroup"
properties:
name: "webaccess"
rules: [ { protocol: tcp,
port_range_min: 80,
port_range_max: 80},
{ protocol: tcp,
port_range_min: 443,
port_range_max: 443} ]
SQLNode0:
type: "OS::Nova::Server"
properties:
name:
list_join: [ '-', [ { get_param: "OS::stack_name" }, 'sql0' ]]
availability_zone: { get_param: avz }
flavor: t3.small
image: { get_param: centos_7_image_id }
key_name: { get_param: ssh_key }
security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: MySQLSecGroup }, { get_resource: NFSSecGroup } ]
metadata:
ansible_host_groups: [ SQLNodes, NFSNodes ]
ansible_ssh_user: ec2-user
project_name: { get_param: project_name }
networks:
- network: { get_param: NetID }
NFSVolume:
type: OS::Cinder::Volume
properties:
availability_zone: { get_param: avz }
size: 1
name: nfsvol
NFSVolumeAttachment:
type: "OS::Cinder::VolumeAttachment"
properties:
volume_id: { get_resource: NFSVolume }
instance_uuid: { get_resource: SQLNode0 }
SLURMSTATEVolume:
type: OS::Cinder::Volume
properties:
availability_zone: { get_param: avz }
size: 1
name: slurmstate
SLURMSTATEVolumeAttachment:
type: "OS::Cinder::VolumeAttachment"
properties:
volume_id: { get_resource: SLURMSTATEVolume }
instance_uuid: { get_resource: SQLNode0 }
DBVolume:
type: OS::Cinder::Volume
properties:
availability_zone: { get_param: avz }
size: 512
name: dbvol
DBVolumeAttachment:
type: "OS::Cinder::VolumeAttachment"
properties:
volume_id: { get_resource: DBVolume }
instance_uuid: { get_resource: SQLNode0 }
MgmtNodes:
type: "OS::Heat::ResourceGroup"
properties:
count: 2
resource_def:
type: My::Server::MgmtNode
properties:
#avz: { get_param: avz }
image: { get_param: centos_7_image_id }
ansible_ssh_user: ec2-user
mynodename:
list_join: [ '-', [ { get_param: "OS::stack_name" }, 'mgmt%index%' ]]
ssh_key: { get_param: ssh_key }
security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup }, { get_resource: MySQLSecGroup } ]
project_name: { get_param: project_name }
LoginNodes:
type: "OS::Heat::ResourceGroup"
properties:
count: 1
resource_def:
type: "OS::Nova::Server"
properties:
#availability_zone: { get_param: avz }
flavor: t3.small
image: { get_param: centos_7_image_id }
key_name: { get_param: ssh_key }
name:
list_join: [ '-', [ { get_param: "OS::stack_name" }, 'login%index%' ]]
security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ]
metadata:
ansible_host_groups: [ LoginNodes ]
ansible_ssh_user: ec2-user
project_name: { get_param: project_name }
networks:
- network: { get_param: NetID }
DesktopNodes:
type: "OS::Heat::ResourceGroup"
properties:
count: 0
resource_def:
type: "OS::Nova::Server"
properties:
#availability_zone: { get_param: avz }
flavor: t3.small
image: { get_param: centos_7_image_id }
key_name: { get_param: ssh_key }
name:
list_join: [ '-', [ { get_param: "OS::stack_name" }, 'desktopc%index%' ]]
security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ]
metadata:
ansible_host_groups: [ DesktopNodes, VisNodes, ComputeNodes ]
ansible_ssh_user: ec2-user
project_name: { get_param: project_name }
networks:
- network: { get_param: NetID }
ComputeNodes:
type: "OS::Heat::ResourceGroup"
properties:
count: 1
resource_def:
type: "OS::Nova::Server"
properties:
#availability_zone: { get_param: avz }
flavor: t3.small
image: { get_param: centos_7_image_id }
key_name: { get_param: ssh_key }
name:
list_join: [ '-', [ { get_param: "OS::stack_name" }, 'computec%index%' ]]
security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ]
metadata:
ansible_host_groups: [ ComputeNodes ]
ansible_ssh_user: ec2-user
project_name: { get_param: project_name }
networks:
- network: { get_param: NetID }
UbuntuDesktopNodes:
type: "OS::Heat::ResourceGroup"
properties:
count: 0
resource_def:
type: "OS::Nova::Server"
properties:
#availability_zone: { get_param: avz }
flavor: t3.small
image: { get_param: ubuntu_1804_image_id }
key_name: { get_param: ssh_key }
name:
list_join: [ '-', [ { get_param: "OS::stack_name" }, 'desktopu%index%' ]]
security_groups: [ default, { get_resource: SSHMonashSecGroup }, { get_resource: SlurmSecGroup }, { get_resource: NFSSecGroup } ]
metadata:
ansible_host_groups: [ DesktopNodes ]
ansible_ssh_user: ubuntu
project_name: { get_param: project_name }
networks:
- network: { get_param: NetID }
# PySSHauthz:
# type: "OS::Nova::Server"
# properties:
# name:
# list_join: [ '-', [ { get_param: "OS::stack_name" }, 'pysshautz' ]]
# availability_zone: { get_param: avz }
# flavor: t3.xsmall
# image: { get_param: ubuntu_1804_image_id }
# key_name: { get_param: ssh_key }
# security_groups: [ { get_resource: SSHMonashSecGroup }, { get_resource: webaccess } ]
# metadata:
# ansible_host_groups: [ PySSHauthz ]
# ansible_ssh_user: ubuntu
# project_name: { get_param: project_name }
# networks:
# - network: { get_param: NetID }
#!/bin/bash
# This script does not check available ressources on nectar!
function usage {
echo $"Usage: $0 {create|update|show|create_or_update,delete_if_exists} STACKNAME"
exit 1
}
if [ "$#" -ne 2 ]; then
echo "Illegal number of parameters expecting 2"
usage
fi
STACKNAME=$2
STACKNAME="CICD"$STACKNAME
echo "[heatcicdwrapper] Prefixing Stackname with CICD. This is a safety feature because this script can also delete stacks"
function check_stack_exists {
if openstack stack list | grep -w $STACKNAME;
then
echo "stack found";
else
echo "stack not found";
return 1
fi
}
function func_delete_if_exists {
if ! check_stack_exists
then
exit 0
fi
openstack stack delete -y --wait $STACKNAME
ret=$?
if [ $ret -ne "0" ]
then
sleep 15
openstack stack delete -y --wait $STACKNAME
ret=$?
fi
exit $ret
}
function create_stack {
if check_stack_exists
then
echo "I will NOT create existing stack maybe use update"
exit -44
fi
openstack stack create --wait --template ./CICD/heat/gc_HOT.yaml --parameter "project_name=$STACKNAME" -e ./CICD/heat/resource_registry.yaml $STACKNAME
createreturn=$?
if [ $createreturn -ne "0" ]
then
openstack stack delete -y --wait $STACKNAME
echo "creation failed. trying to delete"
exit -47
fi
exit $createreturn
}
case "$1" in
create)
create_stack
;;
update)
if ! check_stack_exists
then
echo "I cannot update a stack which does not exist"
exit -45
fi
openstack stack update --wait --template ./CICD/heat/gc_HOT.yaml --parameter "project_name=$STACKNAME" -e ./CICD/heat/resource_registry.yaml $STACKNAME
ret=$?
exit $ret
;;
create_or_update)
if check_stack_exists
then
openstack stack update --wait --template ./CICD/heat/gc_HOT.yaml --parameter "project_name=$STACKNAME" -e ./CICD/heat/resource_registry.yaml $STACKNAME
ret=$?
exit $ret
fi
create_stack
;;
delete_if_exists)
func_delete_if_exists
;;
show)
check_stack_exists
echo $?
OUTPUT=$(openstack stack show $STACKNAME| grep -w stack_status)
echo $OUTPUT
;;
*)
usage
esac
heat_template_version: 2013-05-23
parameters:
mynodename:
type: string
ssh_key:
type: string
image:
type: string
#avz:
# type: string
project_name:
type: string
ansible_ssh_user:
type: string
security_groups:
type: json
NetID:
type: string
#default: 915a3d96-693d-4c9d-a2ef-04996ab085d3
default: Classic Provider
resources:
instance:
type: OS::Nova::Server
properties:
#availability_zone: { get_param: avz }
flavor: t3.xsmall
image: { get_param: image }
key_name: { get_param: ssh_key }
security_groups: { get_param: security_groups }
name: { get_param: mynodename }
metadata:
ansible_host_groups: [ ManagementNodes ]
ansible_ssh_user: { get_param: ansible_ssh_user }
project_name: { get_param: project_name }
networks:
- network: { get_param: NetID }
resource_registry:
My::Server::MgmtNode: mgmtnode_HOT.yaml
---
- import_playbook: plays/make_files.yml
- import_playbook: plays/allnodes.yml
- import_playbook: plays/init_slurmconf.yml # this requires management nodes
- import_playbook: plays/nfssqlnodes.yml
- import_playbook: plays/mgmtnodes.yml
- import_playbook: plays/computenodes.yml
File moved
......@@ -61,4 +61,4 @@
strategy: free
roles:
- { role: slurm-start, start_slurmd: True, tags: [ slurm, slurmstart ] }
- { role: mate-de-install, tags: [ mate-de-install ] } # TODO this crashes for everything except cmca
#- { role: mate-de-install, tags: [ mate-de-install ] } # TODO this crashes for everything except cmca
\ No newline at end of file
File moved
File moved
File moved
File moved
File moved
../../roles
\ No newline at end of file
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment