Skip to content
Snippets Groups Projects
qa.yml 3.99 KiB
Newer Older
Andreas Hamacher's avatar
Andreas Hamacher committed
#- hosts: 'all'
  #gather_facts: false # not sure if false is clever here
  #tasks:
  #- include_vars: vars/ldapConfig.yml
  #- include_vars: vars/filesystems.yml 
  #- include_vars: vars/slurm.yml 
  #- include_vars: vars/vars.yml 
  #- { name: set use shared state, set_fact: usesharedstatedir=False }
  #tags: [ always ]
  
# this playbook is roughly sorted by 
# - hostgroupstopics like ComputeNodes or ComputeNodes,LoginNodes, last VisNodes
# - "tag_groups" each starting after a #comment see #misc or misc tag
- hosts: 'ComputeNodes'
  gather_facts: false
  tasks:
  # these are just templates. 
  #Note the tag never! Everything with never is only executed if called explicitly aka ansible-playbook --tags=foo,bar OR -tags=tag_group
  - { name: template_shell, shell: ls, tags: [never,tag_group,uniquetag_foo] }
  - { name: template_command, command: uname chdir=/bin, tags: [never,tag_group,uniquetag_bar] }
  - { name: template_scipt, script: ./scripts/qa/test.sh, tags: [never,tag_group,uniquetag_script] }
#mpi stuff
  - { name: run mpi on one computenode, command: ls, args: {chdir: "/tmp"} , failed_when: "TODO is TRUE", tags: [never,mpi,mpi_local,TODO] }
  - { name: run mpi on two computenode, command: ls, args: {chdir: "/tmp"} , failed_when: "TODO is TRUE", tags: [never,mpi,mpi_local_two,TODO] }
  #- { name: run mpi via sbatch, command: cmd=ls chdir="/tmp" , failed_when: "TODO is TRUE", tags: [never,mpi,slurm_mpi,TODO] }
  #- { name: mpi_pinging,        command: cmd=ls chdir="/tmp" , failed_when: "TODO is TRUE", tags: [never,mpi,mpi_ping,TODO] }
   #module load openmpi/3.1.6-ucx;mpirun --mca btl self --mca pml ucx -x UCX_TLS=mm  -n 24 /projects/pMOSP/mpi/parallel_mandelbrot/parallel/mandelbrot
   #module load openmpi/3.1.6-ucx;srun mpirun --mca btl self --mca pml ucx -x UCX_TLS=mm  -n 24 /projects/pMOSP/mpi/parallel_mandelbrot/parallel/mandelbrot

#slurm
  - { name: slurmd should be running, service: name=slurmd state=started, tags: [never,slurm,slurmd] }
  - { name: munged should be running, service: name=munged state=started, tags: [never,slurm,munged] }
  - { name: ensure connectivity to the controller, shell: scontrol ping, tags: [never,slurm,scontrol_ping] }
  - { name: the most simple srun test, shell: srun --reservation=AWX hostname, tags: [never,slurm,srun_hostname] }
#nhc, manually run nhc because it contains many tests
  - { name: run nhc explicitly, command: /opt/nhc-1.4.2/sbin/nhc -c /opt/nhc-1.4.2/etc/nhc/nhc.conf, become: true , tags: [never,slurm,nhc] }
# networking
  - { name: ping license server, shell: ls, tags: [never,network,ping_license] }
  - { name: ping something outside monash, command: ping -c 1 8.8.8.8, tags: [never,network,ping_external] }
#mounts
- hosts: 'ComputeNodes,LoginNodes'
  gather_facts: false
  tasks:
  - { name: check mount for usr_local, shell: "mount | grep -q local", tags: [never,mountpoints,mountpoints_local] }
  - { name: check mount for projects, shell: "lfs df -h", tags: [never,mountpoints_projects] }
  - { name: check mount for home, shell: "mount | grep -q home", tags: [never,mountpoints,mountpoints_home] }
  - { name: check mount for scratch, shell: "mount | grep -q scratch" , tags: [never,mountpoints_scratch] }
#misc
  - { name: check singularity, shell: module load octave && octave --version, tags: [never,misc,singularity3] }
  - { name: module test, shell: cmd="module load gcc" executable="/bin/bash", tags: [never,misc,modulecmd] }
  - { name: contact ldap, shell: maybe test ldapsearch, failed_when: "TODO is TRUE", tags: [never,misc,ldap,TODO] }
#gpu
- hosts: 'VisNodes'
  gather_facts: false
  tasks:
  - { name: run nvida-smi to see if a gpu driver is present, command: cmd="/bin/nvidia-smi", tags: [never,gpu,smi] }
  - { name: run gpu burn defaults to 30 seconds, command: cmd="/usr/local/gpu_burn/1.0/run_silent.sh", tags: [never,gpu,long,gpuburn] }


# extended time-consuming tests
# relion see https://docs.massive.org.au/communities/cryo-em/tuning/tuning.html
# linpack

#module load openmpi/1.10.7-mlx;ldd /usr/local/openmpi/1.10.7-mlx/bin/* | grep -ic found