Commit 3b82f500 authored by Chris Hines's avatar Chris Hines
Browse files
parents db9fcff7 60d4566a
......@@ -3,6 +3,6 @@
include: installOpenVPN.yml
- name: "Start OpenVPN"
service: name=openvpn state=started
service: name=openvpn state=started enabled=yes
sudo: true
......@@ -3,5 +3,5 @@
include: installOpenVPN.yml
- name: "Start OpenVPN"
service: name=openvpn state=started
service: name=openvpn state=started enabled=yes
sudo: true
......@@ -29,6 +29,6 @@
-
name: "Starting Apache2"
service: name=apache2 state=started
service: name=apache2 state=started enabled=yes
sudo: true
......@@ -30,7 +30,8 @@ SwitchType=switch/none
MpiDefault=pmi2
SlurmctldPidFile={{ slurmpiddir }}/slurmctld.pid
SlurmdPidFile={{ slurmpiddir }}/slurmd.pid
ProctrackType=proctrack/linuxproc
#ProctrackType=proctrack/linuxproc
ProctrackType=proctrack/cgroup
#PluginDir=
CacheGroups=0
#FirstJobId=
......@@ -78,16 +79,16 @@ SelectType={{ slurmselecttype }}
SelectTypeParameters=CR_Core_Memory
{% endif %}
FastSchedule={{ slurmfastschedule }}
#PriorityType=priority/multifactor
PriorityType=priority/multifactor
#PriorityFlags=Ticket_Based
#PriorityCalcPeriod=5
#PriorityDecayHalfLife=0
#PriorityUsageResetPeriod=14-0
##PriorityWeightFairshare=10000
#PriorityWeightAge=10000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=10000
#PriorityMaxAge=14-0
PriorityWeightFairshare=10000
PriorityWeightAge=10000
PriorityWeightPartition=10000
PriorityWeightJobSize=10000
PriorityMaxAge=14-0
#
# LOGGING
{% if slurmctlddebug %}
......@@ -117,24 +118,27 @@ JobCompType=jobcomp/none
{% if slurmjob is defined %}
Prolog={{ slurmjob.prolog }}
Epilog={{ slurmjob.epilog }}
{% else %}
Prolog={{ slurm_dir }}/bin/slurm.prolog
Epilog={{ slurm_dir }}/bin/slurm.epilog
{% endif %}
#
# ACCOUNTING
#JobAcctGatherType=jobacct_gather/linux
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30
#
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost={{ slurmdbd }}
{% if slurmdbdbackup is defined %}
AccountingStorageBackupHost={{ slurmdbdbackup }}
{% endif %}
#AccountingStorageEnforce=limits,safe
AccountingStorageEnforce=limits,safe
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStorageUser=
#
#GRES
GresTypes=gpu
#GresTypes=gpu
# Fair share
{% if slurmfairshare.def %}
......@@ -155,6 +159,10 @@ MpiParams=ports=12000-12999
NodeName={{ hostvars[node]['ansible_hostname'] }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} RealMemory={{ hostvars[node].ansible_memory_mb.real.total }} Sockets={{ hostvars[node]['ansible_processor_vcpus'] }} CoresPerSocket=1 ThreadsPerCore={{ hostvars[node].ansible_processor_threads_per_core }} {% if hostvars[node].ansible_hostname.find('vis') != -1 %}Gres=gpu:1{% endif %} {% if hostvars[node]['ansible_processor_vcpus'] == 1 %}Weight=1{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 1 and hostvars[node]['ansible_processor_vcpus'] <= 16 %}Weight=3{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 16 and hostvars[node]['ansible_processor_vcpus'] <= 20 %}Weight=5{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 20 and hostvars[node]['ansible_processor_vcpus'] <= 40 %}Weight=7{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 40 and hostvars[node]['ansible_processor_vcpus'] <= 64 %}Weight=8{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 64 and hostvars[node]['ansible_processor_vcpus'] <= 128 %}Weight=9{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 128 %}Weight=10{% endif %} Feature=stage1 State=UNKNOWN
{% endfor %}
#monarch specific to stop stupid warning messages
NodeName={{ hostvars[groups['LoginNodes'][0]]['ansible_hostname'] }} State=DOWN
NodeName={{ slurmctrl }} State=DOWN
{% for queue in slurmqueues %}
{% set nodenames = [] %}
{% for node in groups[queue.group] %}
......
---
- name: grab cacert
shell: cat {{ ldapCertDest }}
# shell: cat /etc/openldap/certs/cacert.pem
shell: cat {{ ldapCARootDest }}
register: ldapCaCertContents
- name: dump vars
......
---
- include_vars: "{{ ansible_os_family }}.yml"
- name: Install epel-release
yum: name=epel-release-7-5.noarch state=present
sudo: true
when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
- name: Enable epel
command: yum-config-manager --enable epel
sudo: true
when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
- name: install lua
yum: name={{ item }} state=installed
with_items:
......
......@@ -12,7 +12,7 @@
sudo: true
- name: install gluster
apt: name=glusterfs state='latest'
apt: name=glusterfs-client state='latest'
when: ansible_os_family == 'Debian'
sudo: true
......
......@@ -15,13 +15,19 @@
sudo: true
- name: install gluster
apt: name=glusterfs state='latest'
apt: name=glusterfs-server state='latest'
when: ansible_os_family == 'Debian'
sudo: true
- name: start daemon
service: name=glusterd enabled=yes state=started
sudo: true
when: ansible_os_family == 'RedHat'
- name: start daemon
service: name=glusterfs-server enabled=yes state=started
sudo: true
when: ansible_os_family == 'Debian'
- name: make server list
set_fact:
......
......@@ -116,7 +116,7 @@
sudo: true
- name: install shibboleth cache file
template: src="{{ shibboleth_deploy }}_metadata.aaf.xml.j2" dest=/tmp/metadata.aaf.xml
template: src="files/{{ shibboleth_deploy }}_metadata.aaf.xml.j2" dest=/tmp/metadata.aaf.xml
-
name: "enabling Karaage configuration"
......@@ -154,11 +154,11 @@
sudo: true
when: karaage_db_init.stdout.find("0") == 0
#-
# name: "Create IDP institutes (disable it as cache is not available)"
# shell: kg-idps /tmp/metadata.aaf.xml
# sudo: true
# when: karaage_db_init.stdout.find("0") == 0
-
name: "Create IDP institutes (disable it as cache is not available)"
shell: kg-idps /tmp/metadata.aaf.xml
sudo: true
when: karaage_db_init.stdout.find("0") == 0
-
name: "Create projects"
......
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<body><h3>HPC identity system (The landing page is under the construction)</h3>
<br>
<p>Monash HPC identity system is a new HPC access control system. Access to the HPC ID system is done through the Australian Access Federation (AAF). This allows you to login using your Institutional username and password.
<br>
<br>
If it is the first time you are using the system, it will give your options to select your existing HPC username for creating a new user account. You'll need to join projects before you can access HPC system.
<br>
<br>
If your organisation is not a member of the AAF or if you need helps, please send HPC email support: help@massive.org.au.</p>
<br>
<p>Click following link <a href=https://{{ ansible_fqdn }}/aafbootstrap>to continue.</a></p>
</body>
</html>
<html><body><h3>HPC identity management</h3>
<p>To log in via AAF authentication, connect to <a href=https://{{ hpchostname }}.erc.monash.edu.au/aafbootstrap>aafbootstrap</a></p>
<p>To log in without AAF authentication, connect to <a href=https://{{ hpchostname }}.erc.monash.edu.au/users>users</a></p>
</body></html>
......@@ -26,8 +26,6 @@ class HpcIdInit():
if self.path and os.path.exists(self.path):
with open(self.path) as data:
config_data = json.load(data)
self.project = config_data["project"]
self.mc = config_data["machine_category"]
self.user = config_data["superuser"]
else:
log("Invalid input data")
......@@ -44,115 +42,6 @@ class HpcIdInit():
now = time.strftime("%c")
self.logfile.write(now + ": " + message + "\n")
def getGroup(self, name):
group = None
try:
group =Group.objects.get(name = name)
if group:
self.log("Find group %s" %(name))
except:
self.log("Group %s not found" %(name))
finally:
return group
def getProject(self, name):
self.log("Get Project 1 %s" %(name))
project = None
try:
project = Project.objects.get(name = name)
if project:
self.log("Find project %s" %(project.name))
group = project.group
if group:
self.log("Group name = %s" %(group.name))
else:
self.log("Project %s not found" %(project.name))
except Project.DoesNotExist:
self.log("project %s does not exists" %(name))
except:
self.log("Exception: ", traceback.format_exc())
finally:
return project
def createProject(self, pid, name, institute_name, superuser):
project = None
try:
institute = self.getInstitute(institute_name)
if institute:
self.log("Find insititute %s" %(institute.name))
project = Project.objects.create(pid = pid, name = name, institute = institute, group = institute.group, is_active = True, is_approved = True, approved_by = superuser)
if project:
project.leaders.add(superuser)
self.log("Create project OK")
else:
self.log("Create project failed")
else:
self.log("Insititute %s does not exist" %(institute_name))
except:
self.log("Exception: ", traceback.format_exc())
finally:
return project
def getInstitute(self, name):
institute = None
try:
institute = Institute.objects.get(name = name)
if institute:
self.log("Institute %s exist" %(institute.name))
group = institute.group
if group:
self.log("Group name = %s" %(group.name))
else:
self.log("Institute %s not found" %(name))
except Institute.DoesNotExist:
self.log("Institute %s not found" %(name))
finally:
return institute
def getDefaultDatastore(self):
for key, value in settings.MACHINE_CATEGORY_DATASTORES.items():
if value:
return key
return None
def getMachineCategory(self, name):
mc = None
self.log("Running getMachineGategory %s" %(name))
try:
mc = MachineCategory.objects.get(name = name)
if mc:
self.log("Find machine category %s" %(mc.name))
else:
slef.log("Not found machine category %s" %(name))
except MachineCategory.DoesNotExist:
self.log("Machine category %s dose not exist" %(name))
except:
self.log("Except to create machine category %s" %(traceback.format_exc()))
finally:
return mc
def getOrCreateMachineCategory(self, name):
mc = None
try:
self.log("getOrCreateMachineCategory %s" %(name))
mc = self.getMachineCategory(name)
if not mc:
datastore = self.getDefaultDatastore()
self.log("datastore = '%s'" %(datastore))
mc = MachineCategory.objects.get_or_create(name = name, datastore = datastore)
self.log("after create machine catetory '%s'" %(name))
if mc:
self.log("Create MachineCategory %s OK" %(mc.name))
else:
self.log("Create MachineCategory failed")
except:
self.log("Except to create machine category %s" %(traceback.format_exc()))
finally:
return mc
def getUser(self, username):
person = None
try:
......@@ -171,33 +60,13 @@ class HpcIdInit():
if person:
person.set_password(self.password)
person.save()
result = self.addInstituteDelegate(person, institute)
if result:
log("Add super user %s to institute %s delegate" %(person.username, institute.name))
else:
log("Faired to add super user %s to institute %s delegate" %(person.username, institute.name))
person.full_clean()
except:
log("Create super user exception: %s" %(traceback.format_exc()))
finally:
return person
def addInstituteDelegate(self, su, institute):
result = True
try:
delegates = institute.delegates.all().filter(username = su.username)
if len(delegates) == 0:
self.log("Create institution delegate %s"%(su.username))
institute.delegates.add(su)
self.log("Create institution delegate %s OK"%(su.username))
except:
result = False
self.log("Create institution delegate exception: %s" %(traceback.format_exc()))
finally:
return result
def setup(self):
self.log("Debug = %s" %(self.debug))
su = self.getUser(self.user["username"])
if su:
self.log("Find super user %s" %(su.username))
......@@ -207,25 +76,6 @@ class HpcIdInit():
self.log("Create super user %s OK" %(su.username))
else:
self.log("Create super user %s failed" %(self.user["username"]))
if self.mc:
mc = self.getOrCreateMachineCategory(self.mc)
if mc:
self.log("Get machine category = '%s'" %(self.mc))
else:
self.log("Failed to get machine category = '%s'" %(self.mc))
if su:
for p in self.project:
project = self.getProject(p["project_name"])
if project:
self.log("Find project %s" %(project.name))
else:
self.log("Create project name = %s, pid = %s, institute name = %s" %(p["project_name"], p["pid"], p["institute_name"]))
project = self.createProject(p["pid"], p["project_name"], p["institute_name"], su)
if project:
self.log("Create project %s OK." %(project.name))
else:
self.log("Create project %s failed." %(p["project_name"]))
break
def main(argv):
config_path = None
......@@ -236,7 +86,6 @@ def main(argv):
if len(sys.argv) > 3:
debug = argv[2]
init = HpcIdInit(config_path, password, debug)
init.log("Password = %s, debug = %s" %(password, debug))
init.setup()
else:
print "Usage: kg_init <config file> <superuser password> <option: debug True | False>"
......
{"project": [{"project_name": "MCC2", "pid": "pMcc2", "institute_name": "Monash University"}, {"project_name": "CVL", "pid": "pCvl", "institute_name": "Monash University"}], "machine_category": "hpc_cluster", "superuser": {"username": "admin", "email": "jupiter.hu@monash.edu", "institute_name": "Monash University", "short_name": "admin", "full_name": "admin"}}
{"superuser": {"username": "admin", "email": "jupiter.hu@monash.edu", "institute_name": "Monash University", "short_name": "admin", "full_name": "admin"}}
......@@ -267,7 +267,7 @@ LOGGING = {
# Users are advised to contact this address if having problems.
# This is also used as the from address in outgoing emails.
ACCOUNTS_EMAIL = '{{ karaageAdminEmail }}'
ACCOUNTS_EMAIL = '{{ karaageAccountEmail }}'
# This organisation name, used in outgoing emails.
ACCOUNTS_ORG_NAME = '{{ karaageAcountName }}'
......
This diff is collapsed.
......@@ -40,7 +40,5 @@
notify: restart sssd
- name: "start sssd"
service: name=sssd state=started
service: name=sssd state=started enabled=yes
sudo: true
......@@ -77,6 +77,10 @@
copy: src="files/{{ ldapCAChain }}" dest="{{ ldapCAChainDest }}"
sudo: true
- name: copy ca root cert
copy: src="files/{{ ldap_TLSCARoot }}" dest="{{ ldapCARootDest }}"
sudo: true
when: ldap_TLSCARoot is defined
- name: copy key
copy: src="files/{{ ldapKey }}" dest="{{ ldapKeyDest }}" mode=600 owner={{ ldapuser }} group={{ ldapgroup }}
......
......@@ -2,3 +2,4 @@
ldapCertDest: "{{ ldapDir }}/ssl/certs/ldapcert.pem"
ldapKeyDest: "{{ ldapDir }}/ssl/private/ldapkey.pem"
ldapCAChainDest: "{{ ldapDir }}/ssl/certs/cacert.pem"
ldapCARootDest: "{{ ldapDir }}/ssl/certs/ca_cert.pem"
......@@ -12,6 +12,17 @@
# sudo: true
# when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
- name: Install epel-release
yum: name=epel-release-7-5.noarch state=present
sudo: true
when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
- name: Enable epel
command: yum-config-manager --enable epel
sudo: true
when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
- name: install lua
yum: name={{ item }} state=installed
with_items:
......
---
- include_vars: "{{ ansible_distribution }}_{{ ansible_distribution_major_version }}.yaml"
- name: copy rpms/debs
copy: dest=/tmp/ src=lustre-install/{{ item }}
with_items:
"{{ lustre_pkgs }}"
#- name: install rpms
# yum: name="/tmp/{{ item }}"
# sudo: true
# with_items: "{{ lustre_pkgs }}"
- name: install rpms
yum: name=/tmp/lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
sudo: true
when: ansible_os_family == "RedHat"
- name: install rpms
yum: name=/tmp/lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
sudo: true
when: ansible_os_family == "RedHat"
# instructions to build these debs:
# Instantiate an Ubuntu 14.04 instance
# git clone git://git.hpdd.intel.com/fs/lustre-release.git
# cd lustre-release
# optionally git checkout 0754bc8f2623bea184111af216f7567608db35b6 <- I know this commit works on Ubuntu, but I had a lot of trouble with other branches
# sh autogen.sh
# ./configure --enable-dist --disable-doc --disable-server --disable-dependency-tracking --with-o2ib=/var/lib/dkms/mlnx-ofed-kernel/3.1/build/
# mkdir BUILD
# cd BUILD
# ln -s ../lustre-2.7.62.tar.gz lustre-2.7.62.orig.tar.gz
# tar zxvf ../lustre-2.7.62.tar.gz
# cd lustre-2.7.62
# ./configure --disable-doc --disable-server --disable-dependency-tracking --with-o2ib=/var/lib/dkms/mlnx-ofed-kernel/3.1/build/
# vi debian/changelog (the version number on the first line is incorrect)
# make debs
#
- name: install debs
apt: name="/tmp/{{ item }}"
sudo: true
with_items: "{{ lustre_pkgs }}"
when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "14"
- name: "Mount lustre filesystems"
mount: name="{{ item.mntpt }}" src="{{ item.servers }}"/"{{ item.src }}" state="mounted" fstype="lustre" opts="_netdev,flock"
sudo: true
with_items: "{{ mntlist }}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment