diff --git a/roles/calculateSlurmConf/templates/slurm.conf.j2 b/roles/calculateSlurmConf/templates/slurm.conf.j2 index 4cc03ad73081b03a877d3ec7a488115f6c92bc70..4fcd70df3a61433f99b8c571c02bad8e2c78e908 100644 --- a/roles/calculateSlurmConf/templates/slurm.conf.j2 +++ b/roles/calculateSlurmConf/templates/slurm.conf.j2 @@ -139,6 +139,10 @@ AccountingStorageEnforce=limits,safe # #GRES #GresTypes=gpu +# + +HealthCheckInterval=300 +HealthCheckProgram={{ nhc_dir }}/sbin/nhc # Fair share {% if slurmfairshare.def %} diff --git a/roles/slurm-common/tasks/installNhc.yml b/roles/slurm-common/tasks/installNhc.yml index 9d48a4e7adf23c6e61f6e06a356b2bb43fa69acf..be576b400466658e2f50fa869bb13c1b77ef3602 100644 --- a/roles/slurm-common/tasks/installNhc.yml +++ b/roles/slurm-common/tasks/installNhc.yml @@ -1,25 +1,36 @@ -- name: Download nhc source - shell: wget https://cvl.massive.org.au/warewulf-nhc-{{ nhc_version }}.tar.gz -# shell: wget http://warewulf.lbl.gov/downloads/releases/warewulf-nhc/warewulf-nhc-{{ nhc_version }}.tar.gz - args: - chdir: /tmp - creates: /tmp/warewulf-nhc-{{ nhc_version }}.tar.gz +--- +- name: install automake + yum: name=automake state=present + become: true + become_user: root + when: ansible_os_family=='RedHat' + +- name: install automake + apt: name=automake state=present + become: true + become_user: root + when: ansible_os_family=='Debian' -- name: untar nhc - shell: tar zxf /tmp/warewulf-nhc-{{ nhc_version }}.tar.gz +- name: unarchive nhc + unarchive: args: - chdir: /tmp + src: "https://github.com/mej/nhc/archive/{{ nhc_version }}.tar.gz" + copy: no + dest: /tmp + creates: /tmp/nhc-{{ nhc_version }}/autogen.sh + - name: build nhc - shell: ./configure --prefix={{ nhc_dir }} && make + shell: ./autogen.sh && ./configure --prefix={{ nhc_dir }} && make args: - chdir: /tmp/warewulf-nhc-{{ nhc_version }} + chdir: /tmp/nhc-{{ nhc_version }} + creates: /tmp/nhc-{{ nhc_version }}/configure - name: install nhc shell: make install sudo: true args: - chdir: /tmp/warewulf-nhc-{{ nhc_version }} + chdir: /tmp/nhc-{{ nhc_version }} - name: ensure sysconfig dir exists file: dest=/etc/sysconfig state=directory owner=root group=root mode=755 @@ -43,8 +54,9 @@ register: generate_nhc_config_file - name: generate config file - shell: "{{ nhc_dir }}/sbin/nhc-genconf -d -c {{ nhc_dir }}/etc/nhc/{{ nhc_config_file }} CONFDIR={{ nhc_dir }}/etc/nhc" + shell: "{{ nhc_dir }}/sbin/nhc-genconf -c {{ nhc_dir }}/etc/nhc/{{ nhc_config_file }} CONFDIR={{ nhc_dir }}/etc/nhc" sudo: true + ignore_errors: true when: generate_nhc_config_file - name: config file extension @@ -54,7 +66,3 @@ sudo: true when: nhc_user_conf is defined and generate_nhc_config_file -- name: start cron job - cron: name=nhc_monitor job={{ nhc_dir }}/sbin/nhc_cron user=root minute=*/5 state=present - sudo: true -