Commit eb7ae882 authored by Samuel Tan's avatar Samuel Tan
Browse files

initial commit

parent 6c75e99c
#!/usr/bin/env python3
import numpy as np
import periodictable as ptab
from operator import itemgetter
import itertools, sys, re, os
### the various different fundamental components are described
### here as classes and functions
class Atom(object):
""" fundamental customed defined type
note that types are strictly enforced
in particular, coordinates are tuples (immutable)
"""
def __init__(self, elem_sym = None, coord = None, atomic_num = None,
charge = None, atom_id = None):
self.elem_sym = str(elem_sym)
self.coord = tuple(float(x) for x in coord)
self.x, self.y, self.z = self.coord
self.atom_id = atom_id
# fill in other info using imported module periodictable
self.ptabHandle = getattr(ptab, self.elem_sym)
if not atomic_num:
self.atomic_num = int(getattr(self.ptabHandle, "number"))
if not charge:
# should be 0 usually
self.charge = int(getattr(self.ptabHandle, "charge"))
def printAtom(self, fmt = None):
# various format for different computational packages
if fmt == "psi4":
# note the space added in front of elem_sym for readability
#self.data = [" " + self.elem_sym, self.x, self.y, self.z]
# ensure they are strings, then concatenate with spaces
# return one string only
#return " ".join(map(str, self.data))
elem_sym = '{:^4}'.format(self.elem_sym)
coord = ['{:> 15.8f}'.format(self.coord[i]) for i in range(3)]
self.data = [elem_sym] + coord
return " ".join(self.data)
elif fmt == "gamess":
elem_sym = '{:^4}'.format(self.elem_sym)
atom_id = '{:^4}'.format(self.atomic_num)
coord = ['{:> 15.8f}'.format(self.coord[i]) for i in range(3)]
self.data = [elem_sym] + [atom_id] + coord
return " ".join(self.data)
elif fmt:
print("Unrecognized printing format: Atom.printAtom()")
else:
self.data = [self.atom_id, self.atomic_num, self.elem_sym, self.coord]
return self.data
def dist(self, otherAtom):
if isinstance(otherAtom, Atom):
return euclid_Dist(self.coord, otherAtom.coord)
else:
print("TypeError in Atom.dist(), need two atoms")
class Molecule(object):
""" collection of atoms """
def __init__(self, atomKeys, atomList):
""" pretty much only a wrapper for addAtom() """
self.atomDict = {}
if len(atomKeys) == len(atomList):
for i in range(len(atomKeys)):
self.addAtom(atomKeys[i], atomList[i])
else:
print("Error mismatch in Molecule.__init__")
def addAtom(self, key, newAtom):
""" method to add Atom to molecule """
if isinstance(newAtom, Atom):
self.atomDict[key] = newAtom
# this is updated every time an atom is added
self.natoms = len(self.atomDict)
else:
print("TypeError in Molecule object: objects of class Atom required")
def atomListAsElem_Sym(self):
""" return *sorted* list of atoms as elem_sym for identification
against database, i.e. for charge etc.
"""
return [x.elem_sym for x in self.atomDict.values()]
def printMol(self, fmt = None):
return [atom.printAtom(fmt) for atom in self.atomDict.values()]
def minDist(self, otherMol):
if isinstance(otherMol, Molecule):
distList = []
for i in self.atomDict.keys():
for j in otherMol.atomDict.keys():
dist = self.atomDict[i].dist(otherMol.atomDict[j])
distList.append(dist)
return min(distList)
else:
print("TypeError in Molecule.minDist: need 2 arguments of class Molecule")
class System(object):
""" usually from xyz file """
def __init__(self, read_file = None, v = False):
if read_file:
self.f = read_xyz(read_file)
self.f = [line.split() for line in self.f]
self.size = int(self.f[0][0]) # number of atoms
if self.f[1]:
self.comment = str(self.f[1]) # comment line in xyz files
# systems are composed of molecules
# initiate every atom as a molecule
self.molDict = {}
for i in range(self.size):
a = self.f[i+2]
# create Atom
self.molDict[i] = Atom(atom_id = i, elem_sym = a[0], coord = a[1:4])
# create molecule with key "i"; note the lists
# to satisfy length checking in Molecule.addAtom()
self.molDict[i] = Molecule([i], [self.molDict[i]])
else:
print("creating empty System object")
if v: # verbose
self.aggregateMol(2) # default 2 fragments
self.printSys() # show some results
def calcAllDist(self):
# calculate all distances between *molecules* in system
self.distList = []
for i,j in itertools.combinations(self.molDict.keys(), 2):
min_dist = self.molDict[i].minDist(self.molDict[j])
self.distList.append([i, j, min_dist])
return self.distList
def aggregateMol(self, nfrag):
""" collect molecules iteratively """
while len(set(self.molDict.keys())) > nfrag:
minPair = self.calcAllDist()
# don't know why, but sort cannot be on the same
# line as assignment
minPair.sort(key = itemgetter(2))
mol1, mol2 = minPair[0][:2]
for key, atom in self.molDict[mol2].atomDict.items():
self.molDict[mol1].addAtom(key, atom)
self.molDict.pop(mol2)
def printSys(self, fmt = None):
for i,mol in self.molDict.items():
print("this is molecule: ", i)
print(mol.printMol(fmt))
####################################################################
### ============ FUNCTIONS ================== ###
####################################################################
def warning(*objs):
print(*objs, file = sys.stderr)
# only 3D Euclidean distance
def euclid_Dist(a, b):
if len(a) != len(b):
print("dist fn: length mismatch")
return 1
else:
val = 0.0
for i in range(3):
val += (a[i] - b[i]) ** 2
return np.sqrt(val)
# separate function might be useful, e.g. to purely read file
# mainly used by the class System though
def read_xyz(xyz_f):
with open(xyz_f) as f:
return [line.strip() for line in f.readlines()]
def scriptPath(f = None):
p = os.path.dirname(os.path.realpath(sys.argv[0]))
if not f:
return p
elif f: # f a string
return p + "/" + f
def pos_uniq(inList, tag):
pos = [i for i,j in enumerate(inList) if re.search(tag, j, flags = re.IGNORECASE)]
if not pos:
return pos # empty list
elif len(pos) != 1:
warning("Warning: more than one instance matched in ", inList)
return pos
else:
return int(pos[0])
def insert_List(inList, tag, insertThis):
""" this function iterates through a (flat) list, inList,
and looks for "tag" AT THE BEGINNING OF THE STRING,
so *not* a regex search
then splits inList, inserts "insertThis" (for replacement)
insertThis must be a list
and joins list together back again
effectively replaces "tag" with "insertThis"
and returns the modified list
"""
if not isinstance(insertThis, list):
warning("TypeError: insertList(), arg insertThis must be a list")
if len(inList) > 1:
tag_occurence = pos_uniq(inList, tag)
if tag_occurence:
l_top = inList[:tag_occurence]
l_bot = inList[tag_occurence + 1:] # note that the tag position is gone
return l_top + insertThis + l_bot
else:
#warning("Warning: no matches for ", tag, " in ", inList)
return inList
elif len(inList) == 1: #single element list
if re.search(tag, inList[0], flags = re.IGNORECASE):
return insertThis
else:
return inList
else: #empty list
return inList
def mySed(lineList_asStr, tag, repl):
""" lineList_asStr = list of strings representing lines
split them to get list of list of strings = lol_str
do subbing, then return another lineList_asStr
NOTE THAT repl IS split() !! so supply a string
insert_List takes a list in its argument
"""
lol_str = [line.split() for line in lineList_asStr]
lol_str = [insert_List(line, tag, repl.split()) for line in lol_str] # ensure replacement is in list form
return [" ".join(line) for line in lol_str]
task=genefpinput
nfrag=2
#basis=GBASIS=N311 NGAUSS=6 DIFFS=.TRUE. DIFFSP=.TRUE. NDFUNC=1 NPFUNC=1
mwords=300
memddi=100
guess_charges=true
using=xyz_files/c1mim-br-p1.xyz
title=random title
runtyp=OPTIMIZE
#!/usr/bin/env python3
# this file describes printing for different scenarios
from chemF import *
import molDB as mDB
import collections as col
import re
# must ensure last step is atomPrint()
# otherwise the string mangling messes up the tabling space!!
def genPsi4(curr_sys, tsk_d, template = None):
if not curr_sys:
print("missing system arg in genPsi4()!!")
return 1
if not template:
out_f = read_xyz(scriptPath("psi4.template"))
elif template:
out_f = read_xyz(template)
# replace anything mentioned in task file with
# "variable = value"
for key,val in tsk_d.items():
tmp = mySed(tmp, key, str(key) + "=" + str(val))
# guessing charges
charge_mult = col.defaultdict(list)
if "guess_charges" in tsk_d and tsk_d["guess_charges"].lower() == "true":
pos1 = pos_uniq(out_f, "molecule.+")
pos2 = pos_uniq(out_f, "monomer_0")
tos1 = pos_uniq(out_f, "--")
tos2 = pos_uniq(out_f, "monomer_1")
pos = pos2 - pos1
tos = tos2 - tos1
def no_guess(i):
warning("Charge and multiplicity present in template file for monomer", i)
warning("So I won't guess charges, even though it is set to true in task file")
if pos == 2:
no_guess(0)
elif tos == 2:
no_guess(1)
elif pos != 1 or tos != 1:
warning("Check your template file: lines in molecule specifier")
else:
for mol in list(curr_sys.molDict.values()):
if mDB.isAnion(mol):
charge_mult[mol].append('-1 1')
elif mDB.isCation(mol):
charge_mult[mol].append('1 1')
# optionals, read directly from globals
if "memory" in tsk_d:
out_f = [re.sub("memory.+", "memory " + tsk_d["memory"], x) for x in out_f]
if "basis" in tsk_d:
out_f = [re.sub("basis.+", "basis " + tsk_d["basis"], x) for x in out_f]
if "title" in tsk_d:
out_f[0] = tsk_d["title"]
# this is for dimer SAPT
if len(curr_sys.molDict) == 2:
# extract molecules so no need to know the actual
# keys in the dictionary (unlikely to be 0 and 1)
for i,mol in enumerate(curr_sys.molDict.values()):
ins = charge_mult[mol] + mol.printMol("psi4")
out_f = insert_List(out_f, tag = "monomer_" + str(i), insertThis = ins)
else:
warning("Error generating Psi4 file: can only accept 2 molecules for SAPT calculation!")
return out_f
def genGamess(curr_sys, tsk_d, template = None):
if not curr_sys:
print("missing system arg in genGamess()")
return 1
if template:
tmp = read_xyz(template)
elif not template:
tmp = read_xyz(scriptPath("efp.template"))
# replace anything mentioned in task file with
# "variable = value"
for key,val in tsk_d.items():
tmp = mySed(tmp, key, str(key) + "=" + str(val))
#if tsk_d.get("title"):
# tmp = mySed(tmp, "title", tsk_d["title"])
#else:
# print("no title")
out_f = {}
# create EFP inputs
if len(curr_sys.molDict) == 2:
for i,mol in enumerate(curr_sys.molDict.values()):
out_f[i] = tmp #initial
if tsk_d["guess_charges"] == "true":
if mDB.isAnion(mol):
out_f[i] = mySed(out_f[i], tag = "ICHARG", repl = "ICHARG=-1")
elif mDB.isCation(mol):
out_f[i] = mySed(out_f[i], tag = "ICHARG", repl = "ICHARG=1")
out_f[i] = insert_List(out_f[i], tag = "xyz_data", insertThis = mol.printMol("gamess"))
# add space in front of GROUP keywords, signaled by "$"
# doing this last so insert_List() etc don't interfere with it
out_f[i] = [" " + line if line.startswith("$") else line for line in out_f[i]]
else:
warning("Error generating GAMESS file: can currently only accept 2 molecules for EFP calculation!")
return out_f
#!/usr/bin/env python3
from chemF import warning
import collections as col
# molecule databases
AnionDB = {"br" : ["Br"]}
AnionDB["cl"] = ["Cl"]
AnionDB["bf4"] = ['B', 'F', 'F', 'F', 'F']
AnionDB["dca"] = ['N', 'C', 'N', 'C', 'N']
AnionDB["pf6"] = ['F', 'P', 'F', 'F', 'F', 'F', 'F']
AnionDB["mes"] = ['S', 'O', 'O', 'O', 'C', 'H', 'H', 'H']
AnionDB["ntf2"] = ['F', 'F', 'F', 'F', 'F', 'N', 'S', 'S',
'O', 'O', 'O', 'O', 'C', 'C', 'F']
AnionDB["tos"] = ['C', 'C', 'C', 'C', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'S', 'O', 'O', 'O', 'C', 'C', 'C']
CationDB = {"c1mim": ['C', 'N', 'C', 'N',
'C', 'C', 'C', 'H',
'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H']}
CationDB["c1mpyr"] = ['C', 'C', 'C', 'N', 'C', 'C', 'C',
'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H', 'H']
CationDB["c2mim"] = ['C', 'N', 'C', 'N', 'C',
'C', 'C', 'C', 'H', 'H',
'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H']
CationDB["c2mpyr"] = ['N', 'C', 'C', 'C', 'C', 'C',
'C', 'C', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H']
CationDB["c3mim"] = ['N', 'C', 'N', 'C', 'C', 'C', 'C', 'H',
'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H']
CationDB["c3mpyr"] = ['N', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H']
CationDB["c4mim"] = ['C', 'N', 'C', 'C', 'N', 'C', 'C', 'H', 'C', 'C',
'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H']
CationDB["c4mpyr"] = ['N', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H']
def isAnion(mol):
a = mol.atomListAsElem_Sym()
# note the next only returns the first value--no duplicates allowed, or detected
# checking done via Counter() from collections, no sorting required, duplicates included
#return next((key for key, anion in AnionDB.items()
# if col.Counter(a) == col.Counter(anion)), None)
for key, anion in AnionDB.items():
if col.Counter(a) == col.Counter(anion):
warning("Anion detected ", key)
return True
def isCation(mol):
a = mol.atomListAsElem_Sym()
for key, cation in CationDB.items():
if col.Counter(a) == col.Counter(cation):
warning("Cation detected ", key)
return True
#!/usr/bin/env python3
from chemF import *
from inp_formats import *
import re, os
import sys, getopt
import collections as col
taskDict = {}
def readTaskFile(tsk_f):
""" reads in task file and puts variables
in a global dictionary, for all to see
ignores comment lines, which start with '#'
"""
f = read_xyz(tsk_f)
global taskDict
for line in f:
if not line.strip().startswith("#"):
l = line.split("=")
taskDict[l[0]] = l[1]
# ensure correct types
taskDict["nfrag"] = int(taskDict["nfrag"])
if "guess_charges" in taskDict:
taskDict["guess_charges"] = taskDict["guess_charges"].lower()
def prettyPrint(contentsList, out_filename = None):
""" contentsList is the contents we want to output in list form
out_filename is the file we are writing to
"""
if out_filename:
f = open(out_filename, 'w+')
f.write("\n".join(contentsList))
else:
print("\n".join(contentsList))
def usage():
usage_str = "Usage:\n \t sqcpp.py <task_file> -i <xyz_file> -o <output_file>"
warning(usage_str)
def main():
""" main execution happens here """
# returns 2 things:
# list of (option,value) pairs
# arguments after optional arguments removed
# optional parameters that take an arg must be followed by a ":"
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:o:t:")
except getopt.GetoptError:
usage()
sys.exit(3)
optDict = dict(opts)
if "-h" in optDict:
usage()
sys.exit()
# read task file first
if len(args) == 1:
tsk_f = args[0]
try:
readTaskFile(tsk_f)
except FileNotFoundError:
print("Task file cannot be found")
usage()
sys.exit(2)
else:
print("Task file cannot be found: did you forget to specify it?")
usage()
sys.exit(2)
# override task file defaults with optional arguments
if "-i" in optDict:
taskDict["using"] = optDict["-i"]
elif "-o" in optDict:
taskDict["out_filename"] = optDict["-o"]
elif "-t" in optDict:
taskDict["template"] = optDict["-t"]
# System class from chemF
try:
curr_sys = System(read_file = taskDict["using"])
except FileNotFoundError:
print("FileNotFoundError: specified xyz file can't be found")
sys.exit(2)
# collect atoms into molecules
curr_sys.aggregateMol(nfrag = taskDict["nfrag"])
# printing using functions in formats.py
a = 1
if taskDict["task"] == "psi4sapt":
a = genPsi4(curr_sys = curr_sys, tsk_d = taskDict, template = taskDict["template"])
a = prettyPrint(a, taskDict.get("out_filename"))
elif taskDict["task"] == "genefpinput":
key_list = [str(a).lower() for a in taskDict.keys()]
if [b for b in key_list if b in ["basis", "gbasis"]]:
print("No support for Basis group at the moment")
print("Please remove line from task file and use a template file instead")
return 0
a = genGamess(curr_sys = curr_sys, tsk_d = taskDict)
prettyPrint(a[0], "efp_frag_0.inp")
prettyPrint(a[1], "efp_frag_1.inp")
else:
print("Task not recognized")
return a
main()
task=psi4sapt
title=c1mim-bf4-p1-opt_IL_aTZ_sapt2plus3
using=c1mim-bf4-p1.xyz
nfrag=2
basis=aug-cc-pVTZ
memory=32 Gb
guess_charges=true
$SYSTEM mwords=120 memddi=50 $END
$CONTRL SCFTYP=RHF RUNTYP=MAKEFP maxit=200 ispher=1
ICHARG=-1
$END
$SCF DIRSCF=.TRUE. FDIFF=.FALSE. DIIS=.TRUE. $END
! $BASIS GBASIS=N311 NGAUSS=6 DIFFS=.TRUE. DIFFSP=.TRUE. NDFUNC=1
NPFUNC=1 $END
$DATA
title
C1
xyz_data
$END
# SAPT CALCULATION: c1mim-bf4-p1_aDZ_sapt2plus3
memory 32 Gb
molecule dimer {
1 1
monomer_0
--
-1 1
monomer_1
units angstrom
no_reorient
symmetry c1
}
set globals {
basis aug-cc-pVDZ
scf_type DF
freeze_core True
guess sad
basis_guess 3-21G
}
energy('sapt2+3-ct')
21
c1mim-bf4-p1-mp2-cp.com
C 0.00000000 0.00000000 0.00000000
N 0.00000000 0.00000000 1.37405500
C 1.26815400 0.00000000 1.80843900
N 2.07877900 -0.02792200 0.74098800
C 1.31304500 -0.01765900 -0.39985000
C -1.17649400 0.19094100 2.22511900
C 3.53427200 0.12780900 0.79070400
F 1.09782400 2.71748500 1.13271500