diff --git a/roles/slurm-common/templates/slurm.conf.j2 b/roles/slurm-common/templates/slurm.conf.j2 deleted file mode 100644 index 4d868b18af4d1f62074380c95a438b7f707f8858..0000000000000000000000000000000000000000 --- a/roles/slurm-common/templates/slurm.conf.j2 +++ /dev/null @@ -1,157 +0,0 @@ -# -# Example slurm.conf file. Please run configurator.html -# (in doc/html) to build a configuration file customized -# for your environment. -# -# -# slurm.conf file generated by configurator.html. -# -# See the slurm.conf man page for more information. -# -ClusterName={{ clustername }} -ControlMachine={{ slurmctrl }} -#ControlAddr= -#BackupController= -#BackupAddr= -# -SlurmUser=slurm -SlurmdUser=root -SlurmctldPort=6817 -SlurmdPort=6818 -AuthType=auth/munge -#JobCredentialPrivateKey= -#JobCredentialPublicCertificate= -StateSaveLocation={{ slurmstatedir }} -SlurmdSpoolDir={{ slurmdatadir }} -SwitchType=switch/none -MpiDefault=pmi2 -SlurmctldPidFile={{ slurmpiddir }}/slurmctld.pid -SlurmdPidFile={{ slurmpiddir }}/slurmd.pid -ProctrackType=proctrack/linuxproc -#PluginDir= -CacheGroups=0 -#FirstJobId= -ReturnToService=1 -#MaxJobCount= -#PlugStackConfig= -#PropagatePrioProcess= -#PropagateResourceLimits= -#PropagateResourceLimitsExcept= -#Prolog= -#Epilog= -#SrunProlog= -#SrunEpilog= -#TaskProlog= -#TaskEpilog= -TaskPlugin=task/cgroup -#TaskPlugin=task/affinity -#TaskPlugin=task/affinity,task/cgroup -{% if slurm_lua is defined %} -JobSubmitPlugins=lua -{% endif %} -OverTimeLimit=1 -CompleteWait=10 - -#TrackWCKey=no -#TreeWidth=50 -#TmpFS= -#UsePAM= -# -# TIMERS - -SlurmctldTimeout=3000 #added due to network failures causing jobs to be killed - -#SlurmctldTimeout=300 -#SlurmdTimeout=300 -#InactiveLimit=0 -#MinJobAge=300 -KillWait=10 -#Waittime=0 -# -# SCHEDULING -SchedulerType={{ slurmschedulertype }} -#SchedulerAuth= -#SchedulerPort= -#SchedulerRootFilter= -SelectType={{ slurmselecttype }} -{% if slurmselecttype.find("cons_res") > 0 %} -SelectTypeParameters=CR_Core_Memory -{% endif %} -FastSchedule={{ slurmfastschedule }} -#PriorityType=priority/multifactor -#PriorityFlags=Ticket_Based -#PriorityCalcPeriod=5 -#PriorityDecayHalfLife=0 -#PriorityUsageResetPeriod=14-0 -##PriorityWeightFairshare=10000 -#PriorityWeightAge=10000 -#PriorityWeightPartition=10000 -#PriorityWeightJobSize=10000 -#PriorityMaxAge=14-0 -# -# LOGGING -{% if slurmctlddebug %} -SlurmctldDebug={{ slurmctlddebug.level }} -SlurmctldLogFile={{ slurmctlddebug.log }} -{% else %} -#SlurmctldDebug= -#SlurmctldLogFile= -{% endif %} -{% if slurmddebug %} -SlurmdDebug={{ slurmddebug.level }} -SlurmdLogFile={{ slurmddebug.log }} -{% else %} -#SlurmdDebug= -#SlurmdLogFile= -{% endif %} -{% if slurmschedlog %} -SlurmSchedlogLevel={{ slurmschedlog.level }} -SlurmSchedLogFile={{ slurmschedlog.log }} -{% else %} -#SlurmSchedlogLevel= -#SlurmSchedLogFile= -{% endif %} -JobCompType=jobcomp/none -#JobCompLoc= -# -{% if slurmjob is defined %} -Prolog={{ slurmjob.prolog }} -Epilog={{ slurmjob.epilog }} -{% endif %} -# -# ACCOUNTING -#JobAcctGatherType=jobacct_gather/linux -#JobAcctGatherFrequency=30 -# -AccountingStorageType=accounting_storage/slurmdbd -AccountingStorageHost={{ slurmctrl }} -#AccountingStorageEnforce=limits,safe -#AccountingStorageLoc= -#AccountingStoragePass= -#AccountingStorageUser= -# -#GRES -GresTypes=gpu - -# Fair share -{% if slurmfairshare.def %} -PriorityWeightFairshare={{ slurmfairshare.val }} -{% endif %} - -DisableRootJobs=YES -MpiParams=ports=12000-12999 -# COMPUTE NODES -{% set nodelist = [] %} -{% for queue in slurmqueues %} -{% for node in groups[queue.group] %} -{% if nodelist.append(node) %} -{% endif %} -{% endfor %} -{% endfor %} -{% for node in nodelist|unique %} -NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} RealMemory={{ hostvars[node].ansible_memory_mb.real.total }} Sockets={{ hostvars[node]['ansible_processor_vcpus'] }} CoresPerSocket=1 ThreadsPerCore={{ hostvars[node].ansible_processor_threads_per_core }} {% if hostvars[node].ansible_hostname.find('vis') != -1 %}Gres=gpu:1{% endif %} {% if hostvars[node]['ansible_processor_vcpus'] == 1 %}Weight=1{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 1 and hostvars[node]['ansible_processor_vcpus'] <= 16 %}Weight=3{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 16 and hostvars[node]['ansible_processor_vcpus'] <= 20 %}Weight=5{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 20 and hostvars[node]['ansible_processor_vcpus'] <= 40 %}Weight=7{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 40 and hostvars[node]['ansible_processor_vcpus'] <= 64 %}Weight=8{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 64 and hostvars[node]['ansible_processor_vcpus'] <= 128 %}Weight=9{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 128 %}Weight=10{% endif %} Feature=stage1 State=UNKNOWN -{% endfor %} - -{% for queue in slurmqueues %} -PartitionName={{ queue.name }} {% if queue.default %}Default=yes{% endif %} Nodes={{ groups[queue.group]|join(',') }} DefaultTime=72:00:00 State=UP -{% endfor %}