diff --git a/dynamicInventory b/dynamicInventory index bd89abf0607902f4daf9912b2821bdf5f1689735..5ada7f57b7544089b04deef1de67a7b92b1fd1a9 100755 --- a/dynamicInventory +++ b/dynamicInventory @@ -1,12 +1,26 @@ #!/usr/bin/env python import sys, os, string, subprocess, socket, re import copy, shlex,uuid, random, multiprocessing, time, shutil, json -import novaclient.v1_1.client as nvclient -import novaclient.exceptions as nvexceptions +#import novaclient.v1_1.client as nvclient +#import novaclient.exceptions as nvexceptions from keystoneclient.auth.identity import v2 as v2_auth -from heatclient import client as heat_client +#from heatclient import client as heat_client +#from novaclient import client as nova_client +#from cinderclient import client as cinder_client +import heatclient +import novaclient +import cinderclient +import heatclient.client +import novaclient.client +import cinderclient.client +import keystoneclient.client +from keystoneclient.auth.identity import v2 +from keystoneclient import session +from novaclient import client from keystoneclient import session as kssession +#NOVA_STANDALONE=True +NOVA_STANDALONE=False class OpenStackConnection: @@ -18,82 +32,6 @@ class OpenStackConnection: self.tenantID= os.environ['OS_TENANT_ID'] self.authUrl="https://keystone.rc.nectar.org.au:5000/v2.0" - def _get_keystone_v2_auth(self, v2_auth_url, **kwargs): - auth_token = kwargs.pop('auth_token', None) - tenant_id = kwargs.pop('project_id', None) - tenant_name = kwargs.pop('project_name', None) - if auth_token: - return v2_auth.Token(v2_auth_url, auth_token, - tenant_id=tenant_id, - tenant_name=tenant_name) - else: - return v2_auth.Password(v2_auth_url, - username=kwargs.pop('username', None), - password=kwargs.pop('password', None), - tenant_id=tenant_id, - tenant_name=tenant_name) - - - def _get_keystone_session(self, **kwargs): - # first create a Keystone session - cacert = kwargs.pop('cacert', None) - cert = kwargs.pop('cert', None) - key = kwargs.pop('key', None) - insecure = kwargs.pop('insecure', False) - timeout = kwargs.pop('timeout', None) - verify = kwargs.pop('verify', None) - - # FIXME(gyee): this code should come from keystoneclient - if verify is None: - if insecure: - verify = False - else: - # TODO(gyee): should we do - # heatclient.common.http.get_system_ca_fle()? - verify = cacert or True - if cert and key: - # passing cert and key together is deprecated in favour of the - # requests lib form of having the cert and key as a tuple - cert = (cert, key) - return kssession.Session(verify=verify, cert=cert, timeout=timeout) - - def _get_keystone_auth(self, session, auth_url, **kwargs): - # FIXME(dhu): this code should come from keystoneclient - - # discover the supported keystone versions using the given url - v2_auth_url=auth_url - v3_auth_url=None - - # Determine which authentication plugin to use. First inspect the - # auth_url to see the supported version. If both v3 and v2 are - # supported, then use the highest version if possible. - auth = None - if v3_auth_url and v2_auth_url: - user_domain_name = kwargs.get('user_domain_name', None) - user_domain_id = kwargs.get('user_domain_id', None) - project_domain_name = kwargs.get('project_domain_name', None) - project_domain_id = kwargs.get('project_domain_id', None) - - # support both v2 and v3 auth. Use v3 if domain information is - # provided. - if (user_domain_name or user_domain_id or project_domain_name or - project_domain_id): - auth = self._get_keystone_v3_auth(v3_auth_url, **kwargs) - else: - auth = self._get_keystone_v2_auth(v2_auth_url, **kwargs) - elif v3_auth_url: - # support only v3 - auth = self._get_keystone_v3_auth(v3_auth_url, **kwargs) - elif v2_auth_url: - # support only v2 - auth = self._get_keystone_v2_auth(v2_auth_url, **kwargs) - else: - raise exc.CommandError(_('Unable to determine the Keystone ' - 'version to authenticate with using the ' - 'given auth_url.')) - - return auth - def get_stack_name(self,stack): stacks=[] for s in self.hc.stacks.list(): @@ -108,46 +46,30 @@ class OpenStackConnection: raise Exception("You have multiple heat stacks in your OpenStack Project and I'm not sure which one to use.\n You can select a stack by symlinking to a stack, for example if you have a stack called mycluster do ln -s %s mycluster\n"%stack) def auth(self): - self.nc = nvclient.Client( auth_url=self.authUrl, - username=self.username, - api_key=self.passwd, - project_id=self.tenantName, - tenant_id=self.tenantID, - service_type="compute" - ) - kwargs = { - 'insecure': False, - } - keystone_session = self._get_keystone_session(**kwargs) + kwargs = { 'username': self.username, 'password': self.passwd, - 'project_id': self.tenantID, - 'project_name': self.tenantName + 'tenant_id': self.tenantID, + 'auth_url':self.authUrl, } - keystone_auth = self._get_keystone_auth(keystone_session, - self.authUrl, - **kwargs) - - endpoint = keystone_auth.get_endpoint(keystone_session,service_type='orchestration', region_name=None) - - + auth = v2.Password(**kwargs) + sess = session.Session(auth=auth) kwargs = { - 'username': self.username, - 'include_pass': False, - 'session': keystone_session, - 'auth_url': self.authUrl, - 'region_name': '', - 'endpoint_type': 'publicURL', - 'service_type': 'orchestration', - 'password': self.passwd, - 'auth': keystone_auth, + 'session':sess, + } + api_version='2' + self.nc = novaclient.client.Client(api_version, session=sess) + api_version=1 + endpoint="https://heat.rc.nectar.org.au:8004/v1/%s"%self.tenantID + self.hc = heatclient.client.Client(api_version, endpoint, session=sess) - self.hc = heat_client.Client(api_version, endpoint, **kwargs) + api_version=1 + self.cc = cinderclient.client.Client(api_version, session=sess) def recurse_resources(self,stack,resource): @@ -170,6 +92,7 @@ class OpenStackConnection: instance_ids.extend(self.recurse_resources(stack=i,resource=r)) nc=self.nc + cc=self.cc inventory = {} inventory['_meta'] = { 'hostvars': {} } for server in nc.servers.list(): @@ -190,6 +113,7 @@ class OpenStackConnection: inventory[server.metadata['ansible_host_group']].append(hostname) else: inventory[server.metadata['ansible_host_group']] = [hostname] + #print dir(server) # Set the other host variables inventory['_meta']['hostvars'][hostname] = {} inventory['_meta']['hostvars'][hostname]['ansible_ssh_host'] = server.networks.values()[0][0] @@ -198,6 +122,13 @@ class OpenStackConnection: if 'ansible_ssh' in key: inventory['_meta']['hostvars'][hostname][key] = server.metadata[key] inventory['_meta']['hostvars'][hostname]['ansible_ssh_user'] = 'ec2-user' + for vol in server.to_dict()['os-extended-volumes:volumes_attached']: + for cv in cc.volumes.findall(): + if cv.id == vol['id']: + devname = '/dev/disk/by-id/virtio-'+cv.id[0:20] + if not 'ansible_host_volumes' in inventory['_meta']['hostvars'][hostname]: + inventory['_meta']['hostvars'][hostname]['ansible_host_volumes']={} + inventory['_meta']['hostvars'][hostname]['ansible_host_volumes'][cv.display_name]={'uuid':vol['id'],'dev':devname} print json.dumps(inventory) if __name__ == "__main__": diff --git a/roles/OpenVPN-Client/tasks/main.yml b/roles/OpenVPN-Client/tasks/main.yml index 640caecb77a0a6dd6b63c0347e2ebef484ff182f..44aab742b4a4ea1f6a4cdf736ae7c367897c197a 100644 --- a/roles/OpenVPN-Client/tasks/main.yml +++ b/roles/OpenVPN-Client/tasks/main.yml @@ -3,6 +3,6 @@ include: installOpenVPN.yml - name: "Start OpenVPN" - service: name=openvpn state=started + service: name=openvpn state=started enabled=yes sudo: true diff --git a/roles/OpenVPN-Server/tasks/main.yml b/roles/OpenVPN-Server/tasks/main.yml index 387f2bca8837485a71491c9becc9e4fc0362e416..b69a74c0cc65f0ad56cf046337355662ea7b3b03 100644 --- a/roles/OpenVPN-Server/tasks/main.yml +++ b/roles/OpenVPN-Server/tasks/main.yml @@ -3,5 +3,5 @@ include: installOpenVPN.yml - name: "Start OpenVPN" - service: name=openvpn state=started + service: name=openvpn state=started enabled=yes sudo: true diff --git a/roles/apache2/tasks/apacheDebian.yml b/roles/apache2/tasks/apacheDebian.yml index 44ffcdc4a675736cfdf50a9d0be0c1d5016cc565..acfada376e9d2e3a857be634bc417cf60fb4930e 100644 --- a/roles/apache2/tasks/apacheDebian.yml +++ b/roles/apache2/tasks/apacheDebian.yml @@ -29,6 +29,6 @@ - name: "Starting Apache2" - service: name=apache2 state=started + service: name=apache2 state=started enabled=yes sudo: true diff --git a/roles/calculateSlurmConf/tasks/main.yml b/roles/calculateSlurmConf/tasks/main.yml index ed39703643e3a03d10fb766aac80a9a03b77e644..800ad4a5db8148fe6ff6a02d2906285ba157deed 100644 --- a/roles/calculateSlurmConf/tasks/main.yml +++ b/roles/calculateSlurmConf/tasks/main.yml @@ -4,3 +4,10 @@ - name: fetch slurm.conf fetch: src=/tmp/slurm.conf dest=files/slurm.conf flat=yes + +- name: "Templating slurmdbd.conf" + template: src=slurmdbd.conf.j2 dest=/tmp/slurmdbd.conf owner=root group=root mode=644 + sudo: true + +- name: fetch slurm.conf + fetch: src=/tmp/slurmdbd.conf dest=files/slurmdbd.conf flat=yes diff --git a/roles/calculateSlurmConf/templates/slurm.conf.j2 b/roles/calculateSlurmConf/templates/slurm.conf.j2 index f9332b0e633cae26395306ce7d2d218710c64e1c..1c186035092840a713324483c77956a46213a175 100644 --- a/roles/calculateSlurmConf/templates/slurm.conf.j2 +++ b/roles/calculateSlurmConf/templates/slurm.conf.j2 @@ -10,6 +10,9 @@ # ClusterName={{ clustername }} ControlMachine={{ slurmctrl }} +{% if slurmctrlbackup is defined %} +BackupController={{ slurmctrlbackup }} +{% endif %} #ControlAddr= #BackupController= #BackupAddr= @@ -27,7 +30,7 @@ SwitchType=switch/none MpiDefault=pmi2 SlurmctldPidFile={{ slurmpiddir }}/slurmctld.pid SlurmdPidFile={{ slurmpiddir }}/slurmd.pid -ProctrackType=proctrack/linuxproc +ProctrackType=proctrack/cgroup #PluginDir= CacheGroups=0 #FirstJobId= @@ -121,7 +124,10 @@ Epilog={{ slurmjob.epilog }} #JobAcctGatherFrequency=30 # AccountingStorageType=accounting_storage/slurmdbd -AccountingStorageHost={{ slurmctrl }} +AccountingStorageHost={{ slurmdbd }} +{% if slurmdbdbackup is defined %} +AccountingStorageBackupHost={{ slurmdbdbackup }} +{% endif %} #AccountingStorageEnforce=limits,safe #AccountingStorageLoc= #AccountingStoragePass= diff --git a/roles/slurmdb-config/templates/slurmdbd.conf.j2 b/roles/calculateSlurmConf/templates/slurmdbd.conf.j2 similarity index 85% rename from roles/slurmdb-config/templates/slurmdbd.conf.j2 rename to roles/calculateSlurmConf/templates/slurmdbd.conf.j2 index 5a6cd9aed308aa8982c173a2c8d9fc3db82d90f5..dc471330d5cdf3368efac17a85ba168e1ed4eab2 100644 --- a/roles/slurmdb-config/templates/slurmdbd.conf.j2 +++ b/roles/calculateSlurmConf/templates/slurmdbd.conf.j2 @@ -17,7 +17,10 @@ AuthType=auth/munge # # slurmDBD info #DbdAddr= -DbdHost={{ slurmctrl }} +DbdHost={{ slurmdbd }} +{% if slurmdbdbackup is defined %} +DbdBackupHost={{ slurmdbdbackup }} +{% endif %} #DbdPort=7031 SlurmUser=slurm #MessageTimeout=300 @@ -36,7 +39,7 @@ PidFile=/var/run/slurmdbd.pid # # Database info StorageType=accounting_storage/mysql -StorageHost=localhost +StorageHost={{ mysql_host }} #StoragePort=1234 StoragePass={{ slurmdb_passwd }} StorageUser=slurmdb diff --git a/roles/enable_lmod/tasks/main.yml b/roles/enable_lmod/tasks/main.yml index 4676f706030c27b4b86e35a342e26fb3ae9ad74b..5c1ff887b0969578ee0a0aaa52d4603e1472b9c7 100644 --- a/roles/enable_lmod/tasks/main.yml +++ b/roles/enable_lmod/tasks/main.yml @@ -1,6 +1,16 @@ --- - include_vars: "{{ ansible_os_family }}.yml" +- name: Install epel-release + yum: name=epel-release-7-5.noarch state=present + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + +- name: Enable epel + command: yum-config-manager --enable epel + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + - name: install lua yum: name={{ item }} state=installed with_items: diff --git a/roles/gluster_client/files/glusterfs-epel.repo b/roles/gluster_client/files/glusterfs-epel.repo new file mode 100644 index 0000000000000000000000000000000000000000..843b4baef3cf4d81aca369e49c44b92c1599c3cf --- /dev/null +++ b/roles/gluster_client/files/glusterfs-epel.repo @@ -0,0 +1,22 @@ +# Place this file in your /etc/yum.repos.d/ directory + +[glusterfs-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. +baseurl=http://download.gluster.org/pub/gluster/glusterfs/3.6/LATEST/EPEL.repo/epel-$releasever/$basearch/ +enabled=1 +skip_if_unavailable=1 +gpgcheck=0 + +[glusterfs-noarch-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. +baseurl=http://download.gluster.org/pub/gluster/glusterfs/3.6/LATEST/EPEL.repo/epel-$releasever/noarch +enabled=1 +skip_if_unavailable=1 +gpgcheck=0 + +[glusterfs-source-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. - Source +baseurl=http://download.gluster.org/pub/gluster/glusterfs/3.6/LATEST/EPEL.repo/epel-$releasever/SRPMS +enabled=0 +skip_if_unavailable=1 +gpgcheck=0 diff --git a/roles/gluster_client/tasks/main.yml b/roles/gluster_client/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..68d24002695056b5b32337510a22bb5b48f187f6 --- /dev/null +++ b/roles/gluster_client/tasks/main.yml @@ -0,0 +1,22 @@ +--- +- name: add repo + copy: src=glusterfs-epel.repo dest=/etc/yum.repos.d/glusterfs-epel.repo + sudo: true + when: ansible_os_family == 'RedHat' + +- name: install gluster + yum: name={{ item }} state='latest' + when: ansible_os_family == 'RedHat' + with_items: + - glusterfs-client + sudo: true + +- name: install gluster + apt: name=glusterfs-client state='latest' + when: ansible_os_family == 'Debian' + sudo: true + +- name: mount volume +#mount: name="{{ volmnt }}" src="{{ gluster_servers[0] }}:/{{ volname }}" state="mounted" fstype="glusterfs" opts="defaults,acl,_netdev,backupvolfile-server={{ gluster_servers[1] }}" + mount: name="{{ volmnt }}" src="{{ gluster_servers[0] }}:/{{ volname }}" state="mounted" fstype="glusterfs" opts="defaults,acl,backupvolfile-server={{ gluster_servers[1] }},noauto,comment=systemd.automount" + sudo: true diff --git a/roles/gluster_server/files/glusterfs-epel.repo b/roles/gluster_server/files/glusterfs-epel.repo new file mode 100644 index 0000000000000000000000000000000000000000..843b4baef3cf4d81aca369e49c44b92c1599c3cf --- /dev/null +++ b/roles/gluster_server/files/glusterfs-epel.repo @@ -0,0 +1,22 @@ +# Place this file in your /etc/yum.repos.d/ directory + +[glusterfs-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. +baseurl=http://download.gluster.org/pub/gluster/glusterfs/3.6/LATEST/EPEL.repo/epel-$releasever/$basearch/ +enabled=1 +skip_if_unavailable=1 +gpgcheck=0 + +[glusterfs-noarch-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. +baseurl=http://download.gluster.org/pub/gluster/glusterfs/3.6/LATEST/EPEL.repo/epel-$releasever/noarch +enabled=1 +skip_if_unavailable=1 +gpgcheck=0 + +[glusterfs-source-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. - Source +baseurl=http://download.gluster.org/pub/gluster/glusterfs/3.6/LATEST/EPEL.repo/epel-$releasever/SRPMS +enabled=0 +skip_if_unavailable=1 +gpgcheck=0 diff --git a/roles/gluster_server/tasks/main.yml b/roles/gluster_server/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..82dcabaa4088bec82eb7168983735fdb9cddae5a --- /dev/null +++ b/roles/gluster_server/tasks/main.yml @@ -0,0 +1,53 @@ +--- + +- name: add repo + copy: src=glusterfs-epel.repo dest=/etc/yum.repos.d/glusterfs-epel.repo + sudo: true + when: ansible_os_family == 'RedHat' + +- name: install gluster + yum: name={{ item }} state='latest' + when: ansible_os_family == 'RedHat' + with_items: + - glusterfs + - glusterfs-server + + sudo: true + +- name: install gluster + apt: name=glusterfs-server state='latest' + when: ansible_os_family == 'Debian' + sudo: true + +- name: start daemon + service: name=glusterd enabled=yes state=started + sudo: true + when: ansible_os_family == 'RedHat' + +- name: start daemon + service: name=glusterfs-server enabled=yes state=started + sudo: true + when: ansible_os_family == 'Debian' + +- name: make server list + set_fact: + server_list: "{{ gluster_servers|join(',') }}" + + +- name: echo server list + debug: var=server_list + +- name: make brick dir + file: state=directory path="{{ brickmnt }}/brick" + sudo: true + +- name: create volume + gluster_volume: + name: "{{ volname }}" + brick: "{{ brickmnt }}/brick" + cluster: "{{ server_list }}" + replicas: "{{ replicas }}" + state: present + sudo: true + run_once: true + diff --git a/roles/ldapclient/tasks/configLdapClient.yml b/roles/ldapclient/tasks/configLdapClient.yml index 836dcdd809692626eba503b08a67ec3916453dc0..3e22db75422d4647405933e4379b34548da6aa24 100644 --- a/roles/ldapclient/tasks/configLdapClient.yml +++ b/roles/ldapclient/tasks/configLdapClient.yml @@ -40,7 +40,5 @@ notify: restart sssd - name: "start sssd" - service: name=sssd state=started + service: name=sssd state=started enabled=yes sudo: true - - diff --git a/roles/link_usr_local/tasks/main.yml b/roles/link_usr_local/tasks/main.yml index 7f3e211f98ec1ec266cf0117b663e77f05e5c232..72847692aec9dcbd7d0a654449cfac308243c549 100644 --- a/roles/link_usr_local/tasks/main.yml +++ b/roles/link_usr_local/tasks/main.yml @@ -1,13 +1,19 @@ --- -- name: stat usrlocal +- name: stat stat: path={{ dest }} - register: stat_usrlocal + register: stat_r - name: mv - command: mv /usr/local /usr/local_old - when: stat_usrlocal.stat.isdir == True + command: mv "{{ dest }}" "{{ dest }}_old" + when: stat_r.stat.exists and stat_r.stat.isdir sudo: true +- name: stat + stat: path={{ dest }} + register: stat_r + + - name: link file: src="{{ src }}" dest="{{ dest }}" state=link + when: not stat_r.stat.exists sudo: true diff --git a/roles/lmod/tasks/main.yml b/roles/lmod/tasks/main.yml index 9f16c8f5f20dcd82482697e2e276e5b3ce8839dc..6c84ac239a44e85745de9f1bc10f5777e085bfb3 100644 --- a/roles/lmod/tasks/main.yml +++ b/roles/lmod/tasks/main.yml @@ -12,6 +12,17 @@ # sudo: true # when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" +- name: Install epel-release + yum: name=epel-release-7-5.noarch state=present + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + +- name: Enable epel + command: yum-config-manager --enable epel + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + + - name: install lua yum: name={{ item }} state=installed with_items: diff --git a/roles/lustre-client/tasks/main.yml b/roles/lustre-client/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..41d03b0e4df28534dee4f9401ac3b737a560e221 --- /dev/null +++ b/roles/lustre-client/tasks/main.yml @@ -0,0 +1,52 @@ +--- +- include_vars: "{{ ansible_distribution }}_{{ ansible_distribution_major_version }}.yaml" + +- name: copy rpms/debs + copy: dest=/tmp/ src=lustre-install/{{ item }} + with_items: + "{{ lustre_pkgs }}" + + +#- name: install rpms +# yum: name="/tmp/{{ item }}" +# sudo: true +# with_items: "{{ lustre_pkgs }}" + +- name: install rpms + yum: name=/tmp/lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm + sudo: true + when: ansible_os_family == "RedHat" + +- name: install rpms + yum: name=/tmp/lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm + sudo: true + when: ansible_os_family == "RedHat" + +# instructions to build these debs: +# Instantiate an Ubuntu 14.04 instance +# git clone git://git.hpdd.intel.com/fs/lustre-release.git +# cd lustre-release +# optionally git checkout 0754bc8f2623bea184111af216f7567608db35b6 <- I know this commit works on Ubuntu, but I had a lot of trouble with other branches +# sh autogen.sh +# ./configure --enable-dist --disable-doc --disable-server --disable-dependency-tracking --with-o2ib=/var/lib/dkms/mlnx-ofed-kernel/3.1/build/ +# mkdir BUILD +# cd BUILD +# ln -s ../lustre-2.7.62.tar.gz lustre-2.7.62.orig.tar.gz +# tar zxvf ../lustre-2.7.62.tar.gz +# cd lustre-2.7.62 +# ./configure --disable-doc --disable-server --disable-dependency-tracking --with-o2ib=/var/lib/dkms/mlnx-ofed-kernel/3.1/build/ +# vi debian/changelog (the version number on the first line is incorrect) +# make debs +# +- name: install debs + apt: name="/tmp/{{ item }}" + sudo: true + with_items: "{{ lustre_pkgs }}" + when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "14" + +- name: "Mount lustre filesystems" + mount: name="{{ item.mntpt }}" src="{{ item.servers }}"/"{{ item.src }}" state="mounted" fstype="lustre" opts="_netdev,flock" + sudo: true + with_items: "{{ mntlist }}" + + diff --git a/roles/lustre-client/vars/CentOS_7.yaml b/roles/lustre-client/vars/CentOS_7.yaml new file mode 100644 index 0000000000000000000000000000000000000000..762d8c74619a6eeb819d18bdefd9d379c705a9a4 --- /dev/null +++ b/roles/lustre-client/vars/CentOS_7.yaml @@ -0,0 +1,5 @@ +--- +lustre_pkgs: + - lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm + - lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm + diff --git a/roles/lustre-client/vars/Ubuntu_14.yaml b/roles/lustre-client/vars/Ubuntu_14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e3870d0105543ff13ed9ade7813a438a3852943 --- /dev/null +++ b/roles/lustre-client/vars/Ubuntu_14.yaml @@ -0,0 +1,5 @@ +--- +lustre_pkgs: + - linux-patch-lustre_2.7.62-1_all.deb + - lustre-client-modules-3.13.0-58-generic_2.7.62-1_amd64.deb + - lustre-utils_2.7.62-1_amd64.deb diff --git a/roles/make_filesystems/tasks/main.yml b/roles/make_filesystems/tasks/main.yml index b25a554d6960ca9736a1dcee46ec0d08276a9eff..34b0c6ed5ac859fbed246f88cce09b8eb3263d81 100644 --- a/roles/make_filesystems/tasks/main.yml +++ b/roles/make_filesystems/tasks/main.yml @@ -1,13 +1,31 @@ --- -- name: Format File Systems - filesystem: fstype={{ item.fstype }} dev={{ item.dev }} opts={{ item.opts }} - with_items: mkFileSystems +- name: format volumes + filesystem: fstype={{ item.fstype }} dev={{ hostvars[ansible_hostname]['ansible_host_volumes'][item.name]['dev'] }} + with_items: volumes sudo: true - when: mkFileSystems is defined -- name: Mount device - mount: name={{ item.name }} src={{ item.dev }} fstype={{ item.fstype }} opts={{ item.opts }} state=mounted - with_items: mountFileSystems +- name: format volumes + mount: name={{ item.mntpt }} fstype={{ item.fstype }} src={{ hostvars[ansible_hostname]['ansible_host_volumes'][item.name]['dev'] }} state=mounted + with_items: volumes sudo: true - when: mountFileSystems is defined +- name: symlink volumes + file: force=yes state=link src="{{ item.mntpt }}" path="{{ item.linkto }}" + when: item.linkto is defined + with_items: volumes + sudo: true + + +#- name: Format File Systems +# filesystem: fstype={{ item.fstype }} dev={{ item.dev }} opts={{ item.opts }} +# with_items: mkFileSystems +# sudo: true +# when: mkFileSystems is defined +# +#- name: Mount device +# mount: name={{ item.name }} src={{ item.dev }} fstype={{ item.fstype }} opts={{ item.opts }} state=mounted +# with_items: mountFileSystems +# sudo: true +# when: mountFileSystems is defined +# +# diff --git a/roles/mellanox_drivers/files/mlnx_install.sh b/roles/mellanox_drivers/files/mlnx_install.sh new file mode 100755 index 0000000000000000000000000000000000000000..0e6f9802da6c0009dc005dc86957fb17b130227e --- /dev/null +++ b/roles/mellanox_drivers/files/mlnx_install.sh @@ -0,0 +1,44 @@ +#!/bin/sh +# A CRUDE Script to install Mellanox OFED drivers +# Philip.Chan@monash.edu +# +# TODO: check if MLNX_OFED is already installed! +# TODO: check kernel... + +KERN=`uname -r` + +if [ "$KERN" != "3.10.0-229.14.1.el7.x86_64" ] +then + echo "Oops! Did you forget to reboot?" + echo "Kernel version has to be 3.10.0-229.14.1.el7.x86_64" + exit 1 +fi + +sudo yum install -y pciutils gcc-gfortran libxml2-python tcsh libnl lsof tcl tk perl +sudo yum install -y gtk2 atk cairo +tar xzvf MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext.tgz +cd MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext +sudo ./mlnxofedinstall -q +cd .. + +tmpfile="/tmp/ifcfg.pc" +rm -f $tmpfile +./set_ifcfg.pl $tmpfile + +if [ -f $tmpfile ] +then + echo "Attempting to install ifcfg-ens6" + if [ -f /etc/sysconfig/network-scripts/ifcfg-ens6 ] + then + echo "/etc/sysconfig/network-scripts/ifcfg-ens6 already exists!" + grep IP /etc/sysconfig/network-scripts/ifcfg-ens6 + echo "bailing!" + else + sudo cp -ip $tmpfile /etc/sysconfig/network-scripts/ifcfg-ens6 + sudo chown root:root /etc/sysconfig/network-scripts/ifcfg-ens6 + cd /etc/sysconfig/network-scripts + sudo ./ifup ens6 + ping -c 1 172.16.228.1 + fi +fi +exit 0 diff --git a/roles/mellanox_drivers/files/set_ifcfg.pl b/roles/mellanox_drivers/files/set_ifcfg.pl new file mode 100755 index 0000000000000000000000000000000000000000..022c8cf4d3305ca744aa59cfd129dda05b4cb5cc --- /dev/null +++ b/roles/mellanox_drivers/files/set_ifcfg.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl +# +# Assumes Mellanox NIC is named as ens6 +# Philip.Chan@monash.edu +# +# Usage: +# ./set_ifcfg.pl [<tmpfilename>] +# To be used within the mlnx_install.sh +# + +my $outfile = shift @ARGV; +$outfile = "tmp.ifcfg" if (! defined $outfile); + +sub get_index +{ + my $hn = shift; + my $maxhosts = 32; + + if ($hn =~ /hc(\d+)/) { + return 33 + $1 if ($1 < $maxhosts); + } + if ($hn =~ /hs(\d+)/) { + return 1 + $1 if ($1 < $maxhosts); + } + return 0; +} + +my $hostname = `/bin/hostname`; +my $x = get_index($hostname); +die "Unable to parse hostname $hostname" if ($x eq '0'); + +my $ip = "172.16.229.$x"; +print "Assigning $ip to $hostname\n"; + +open OUT, ">$outfile" or die "Failed to create output file $outfile!"; +print OUT "DEVICE=ens6\n"; +print OUT "ONBOOT=yes\n"; +print OUT "NM_CONTROLLED=no\n"; +print OUT "BOOTPROTO=none\n"; +print OUT "IPADDR=$ip\n"; +print OUT "PREFIX=22\n"; +print OUT "MTU=9000\n"; +close OUT; + +exit 0; diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..462b7320c432d0967141833497b88696ec3d31ce --- /dev/null +++ b/roles/mellanox_drivers/tasks/main.yml @@ -0,0 +1,130 @@ +--- +- include_vars: mellanoxVars.yml + + +- name: yum install dependencies + yum: name=perl,pciutils,gtk2,atk,cairo,gcc-gfortran,libxml2-python,tcsh,libnl,lsof,tcl,tk + sudo: true + ignore_errors: true + when: ansible_os_family == "RedHat" + +- name: yum update to upgrade kernel + shell: "yum update -y" + sudo: true + ignore_errors: true + when: ansible_os_family == "RedHat" + +# +# A REBOOT IS NEEDED AFTER a KERNEL UPDATE +# +- name: restart machine + shell: sleep 5; sudo shutdown -r now "Ansible updates triggered" + async: 2 + poll: 0 + ignore_errors: true + sudo: true + when: ansible_os_family == "RedHat" + +- name: waiting for server to come back + #local_action: wait_for host={{ ansible_host }} state=started port=22 delay=10 search_regex=OpenSSH + local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH + sudo: false + +- name: waiting for server to come back number 2 + local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH + sudo: false + +- name: test for existing installation of drivers + command: ibv_devinfo + sudo: true + register: drivers_installed + ignore_errors: true + +- name: copy driver source + #make this a variable + unarchive: copy=yes src="files/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp + sudo: true + #when: drivers_installed|failed and ansible_os_family=="RedHat" and ansible_distribution_major_version == "7" + when: drivers_installed|failed + +- name: install drivers + shell: ./mlnxofedinstall -q + args: + #more changes + chdir: "/tmp/{{ MELLANOX_DRIVER_SRC }}" + sudo: true + when: drivers_installed|failed + +# +# get IP address before reboot +# +- name: get IP address + local_action: command ./scripts/map_ib_ip.pl {{ inventory_hostname }} + register: ip_address + sudo: false + + +- name: template IP address + template: dest=/etc/sysconfig/network-scripts/ifcfg-ens6 src=ifcfg-ens6.j2 owner=root group=root + sudo: true + when: ansible_os_family=="RedHat" and ansible_distribution_major_version == "7" +#ubuntu equivalent of previous command +- name: Ubuntu network interfaces - line 1 + lineinfile: + args: + dest: /etc/network/interfaces + line: auto {{ MELLANOX_DEVICE_NAME }} + state: present + sudo: true + when: ansible_os_family=="Debian" +- name: Ubuntu network interfaces - line 2 + lineinfile: + args: + dest: /etc/network/interfaces + line: iface {{ MELLANOX_DEVICE_NAME }} inet static + state: present + insertafter: "auto {{ MELLANOX_DEVICE_NAME }}" + sudo: true + when: ansible_os_family=="Debian" +- name: Ubuntu network interfaces - line 3 + lineinfile: + args: + dest: /etc/network/interfaces + line: address {{ ip_address.stdout }} + state: present + insertafter: "iface {{ MELLANOX_DEVICE_NAME }} inet static" + sudo: true + when: ansible_os_family=="Debian" +# +# A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL +# +- name: restart machine + command: "sleep 5; sudo shutdown -r now" + async: 2 + poll: 0 + ignore_errors: true + sudo: true + when: ansible_os_family=="Centos" + +- name: restart machine for Ubuntu -cos it is 'special' + command: "sudo shutdown -r now" + async: 2 + poll: 0 + ignore_errors: true + sudo: true + when: ansible_os_family=="Debian" + +- name: waiting for server to come back + local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH + sudo: false + +- name: waiting for server to come back 2 + local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH + +- name: bring up interface + #variable=eth0 or ens6 + command: ifup {{ MELLANOX_DEVICE_NAME }} + sudo: true + when: ansible_distribution_major_version == "7" + + diff --git a/roles/mellanox_drivers/templates/ifcfg-ens6.j2 b/roles/mellanox_drivers/templates/ifcfg-ens6.j2 new file mode 100644 index 0000000000000000000000000000000000000000..1066d624333b0fb6b43e45bb6127a4e31989873d --- /dev/null +++ b/roles/mellanox_drivers/templates/ifcfg-ens6.j2 @@ -0,0 +1,8 @@ +DEVICE=ens6 +ONBOOT=yes +NM_CONTROLLED=no +BOOTPROTO=none +IPADDR={{ ip_address.stdout }} +PREFIX=22 +MTU=9000 + diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml new file mode 100644 index 0000000000000000000000000000000000000000..e2277dd40ad203653226ea0a356c0806689d7822 --- /dev/null +++ b/roles/mellanox_drivers/vars/mellanoxVars.yml @@ -0,0 +1,4 @@ +--- + #note. do not add '.tgz' to driver src. done in playbook + MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" + MELLANOX_DEVICE_NAME: "{% if ansible_os_family == 'RedHat' %}ens6{% elif ansible_os_family == 'Debian' %}eth1{% endif %}" diff --git a/roles/mysql/tasks/mysql_server.yml b/roles/mysql/tasks/mysql_server.yml index a5066a349f9b64ab6636cc63d9f1b67a9869da35..1d2d054f80fcd5aaad722d01186ac9be03c4d358 100644 --- a/roles/mysql/tasks/mysql_server.yml +++ b/roles/mysql/tasks/mysql_server.yml @@ -33,3 +33,9 @@ - name: "Giving priviliges to user" mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL state=present + when: mysql_user_host is defined + +- name: "Giving priviliges to user" + mysql_user: name={{ mysql_user_name }} host={{ hostvars[item].ansible_fqdn }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL state=present + with_items: mysql_user_hosts_group + when: mysql_user_hosts_group is defined diff --git a/roles/nfs-common/handlers/main.yml b/roles/nfs-common/handlers/main.yml index 7b77dc761daa5d64900882953560e05d713c3c2e..f5c928114ee92484c0bb856b936476fbddfb5324 100644 --- a/roles/nfs-common/handlers/main.yml +++ b/roles/nfs-common/handlers/main.yml @@ -1,4 +1,4 @@ --- - name: "Run rpcbind service" - service: "name=rpcbind state=started" + service: "name=rpcbind state=started enabled=yes" diff --git a/roles/nfs-server/tasks/startServer.yml b/roles/nfs-server/tasks/startServer.yml index f326a572afecfca5d806f4950a2b0e973ad89c83..60d84ab3ca51ff23e5a93e1e648c0e04dda17b38 100644 --- a/roles/nfs-server/tasks/startServer.yml +++ b/roles/nfs-server/tasks/startServer.yml @@ -5,16 +5,16 @@ with_items: exportList - name: "Starting rpcbind" - service: "name=rpcbind state=started" + service: "name=rpcbind state=started enabled=true" sudo: true when: ansible_os_family == "RedHat" - name: "Start the Server" - service: "name=nfs state=started" + service: "name=nfs state=started enabled=true" sudo: true when: ansible_os_family == "RedHat" - name: "Start the Server" - service: "name=nfs-kernel-server state=started" + service: "name=nfs-kernel-server state=started enabled=true" sudo: true when: ansible_os_family == "Debian" diff --git a/roles/provision/tasks/main.yml b/roles/provision/tasks/main.yml index 27a2cbbd47537625c2e13efa54ff0132b0ab8c4a..c5d13aadd3c4aea253aaec4c556f65acdfe7d5a9 100644 --- a/roles/provision/tasks/main.yml +++ b/roles/provision/tasks/main.yml @@ -1,4 +1,8 @@ --- +- name: make dir + file: path="{{ provision | dirname }}" state=directory mode=755 owner=root + sudo: true + - name: copy provision template template: src=provision.sh.j2 dest={{ provision }} mode=755 owner=root sudo: true diff --git a/roles/provision/templates/provision.sh.j2 b/roles/provision/templates/provision.sh.j2 index d4082c8ae41b59824252396bbc178bdeaf7931ef..0c70a397b8cadb0ab6792a56260948f83a863ee6 100644 --- a/roles/provision/templates/provision.sh.j2 +++ b/roles/provision/templates/provision.sh.j2 @@ -32,7 +32,7 @@ for user in ${user_list[*]}; do if [ -z "${find}" ]; then su slurm -c "$sacctmgr -i add account ${account} Description=CVL Organization=monash cluster=${cluster}" || { echo "error to create account ${account}" >> ${log_file} && exit 1; } fi - find=$(sacctmgr list user ${user} | grep ${user}) + find=$(sacctmgr list user --noheader -p ${user} | grep ${user}) if [ -z "${find}" ]; then su slurm -c "$sacctmgr -i add user ${user} account=${account} cluster=${cluster}" || { echo "error to create user ${user}" >> ${log_file} && exit 1; } fi diff --git a/roles/provision/vars/main.yml b/roles/provision/vars/main.yml index ed97d539c095cf1413af30cc23dea272095b97dd..b1cfa091c1d226185fcff5b4ec03c902db11bfe1 100644 --- a/roles/provision/vars/main.yml +++ b/roles/provision/vars/main.yml @@ -1 +1,5 @@ --- +slurm_provision: "/usr/local/sbin/slurm_provision.sh" +home_dir: "/home" +provision: "/usr/local/sbin/provision.sh" + diff --git a/roles/shibboleth-sp/tasks/shibbolethConfig.yml b/roles/shibboleth-sp/tasks/shibbolethConfig.yml index 3ccd12c1d401bb7346ff52902f63375ea5eab3a9..d94f5dc70053db42194783b1145e67f5b508b141 100644 --- a/roles/shibboleth-sp/tasks/shibbolethConfig.yml +++ b/roles/shibboleth-sp/tasks/shibbolethConfig.yml @@ -140,4 +140,4 @@ - name: "Starting shibboleth" sudo: true - service: name=shibd state=started + service: name=shibd state=started enabled=yes diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml index 4e642e84595009794cf6eb2837c371862fae8ca1..a857be9a01b37ba0b4d56ccd8079ca731abe71f5 100644 --- a/roles/slurm-start/tasks/main.yml +++ b/roles/slurm-start/tasks/main.yml @@ -36,7 +36,7 @@ when: use_systemd is defined - name: start munge - service: name=munge state=started + service: name=munge state=started enabled=yes sudo: true - name: start slurmdbd @@ -44,6 +44,12 @@ sudo: true when: start_slurmdbd is defined + +- name: "create cluster in slurm db" + shell: "{{slurm_dir}}/bin/sacctmgr -i create cluster {{ clustername }}" + sudo: true + ignore_errors: true + - name: start slurmctl service: name=slurmctld state=started enabled=yes sudo: true diff --git a/roles/slurmdb-config/tasks/main.yml b/roles/slurmdb-config/tasks/main.yml index becf45105eff2e253c41da10feb35d8afa05f620..a31f5ad72b0a21cc1ebb67d654eea977205b33b1 100644 --- a/roles/slurmdb-config/tasks/main.yml +++ b/roles/slurmdb-config/tasks/main.yml @@ -22,13 +22,13 @@ sudo: true - name: install slurmdb.conf - template: src=slurmdbd.conf.j2 dest={{ slurm_dir }}/etc/slurmdbd.conf + copy: src=files/slurmdbd.conf dest={{ slurm_dir }}/etc/slurmdbd.conf sudo: true when: slurm_dir is defined - name: install slurmdbd.conf - template: src=slurmdbd.conf.j2 dest=/etc/slurm/slurmdbd.conf + copy: src=slurmdbd.conf dest=/etc/slurm/slurmdbd.conf sudo: true when: slurm_dir is not defined diff --git a/roles/slurmdb-config/templates/slurmdblog.j2 b/roles/slurmdb-config/templates/slurmdblog.j2 index f9042d6b392781293a35c0c4d70f09f63813e809..e950c18286cbc00771c94cf520f97dd5fb9d6104 100644 --- a/roles/slurmdb-config/templates/slurmdblog.j2 +++ b/roles/slurmdb-config/templates/slurmdblog.j2 @@ -1,5 +1,4 @@ {{ slurmdbdlog.log }} -{% endif %} { compress missingok