#!/usr/bin/env python
"""Set of classes to deal with molecular data and formats"""
__author__="Pablo Baudin"
__email__="pablo.baudin@epfl.ch"
from comp_chem_utils.conversions import BOHR_TO_ANG
from comp_chem_utils.mysql_tables import mol_info_table, mol_xyz_table
from comp_chem_utils.periodic import element
hostname = 'localhost'
username = 'root'
password = ''
database = 'molecules'
[docs]class mol_data(object):
""" All information regarding a given molecule. """
# INITIALIZTION FUNCTIONS
# -----------------------
def __init__(self):
self.idd = 0
self.name = ""
self.chem_name = ""
self.note = ""
self.tab_name = ""
self.charge = 0
self.natoms = 0
self.natom_types = 0
self.atom_types = []
[docs] def init_from_mol(self, mol_file):
""" update mol_data with data in mol (DALTON) format"""
self.name = mol_file.fname.split("/")[-1][:-4]
self.note = mol_file.title1+" "+mol_file.title2
self.charge = mol_file.charge
self.natoms = mol_file.natoms
self.natom_types = mol_file.natom_types
self.atom_types = mol_file.to_angstrom().atom_types
self.get_chem_name()
return self.check() # check validity of mol_data"
[docs] def init_from_xyz(self, xyz_file):
""" update mol_data with data in xyz format"""
self.name = xyz_file.fname.split("/")[-1][:-4]
self.note = xyz_file.title
#print("WARNING: init_from_xyz setting default charge to zero")
self.charge = 0
self.natoms = xyz_file.natoms
self.natom_types = xyz_file.natom_types
self.atom_types = xyz_file.atom_types
self.get_chem_name()
return self.check() # check validity of mol_data"
[docs] def init_from_db(self, cur, idd):
""" update mol_data with data from database"""
# get main information from mol_info table
main_tab = mol_info_table()
cur.execute( main_tab.get_row(idd) )
for (idd, name, chem_name, note, charge, natoms, natom_types) in cur:
self.idd = idd
self.name = name
self.chem_name = chem_name
self.note = note
self.charge = charge
self.natoms = natoms
self.natom_types = natom_types
# get xyz data from molecule specific table
xyz_tab = mol_xyz_table( idd )
cur.execute( xyz_tab.get_table() )
old_charge = 0
for (idd, symb, charge, x, y, z) in cur:
if charge!=old_charge:
if (old_charge>0):
# add old atype to atom_types list
self.atom_types.append(atype)
# new atom type
atype = atom_type()
old_charge = charge
atype.charge = charge
atype.symb = symb
atype.xvals.append(x)
atype.yvals.append(y)
atype.zvals.append(z)
atype.natoms += 1
# add last atype to atom_types list
self.atom_types.append(atype)
self.get_chem_name()
return self.check() # check validity of mol_data"
# OUTPUT FUNCTIONS
# ----------------
[docs] def output(self):
""" return list of strings containing mol_data"""
lines = []
lines.append("ID : "+str(self.idd))
lines.append("NAME : "+self.name)
lines.append("CHEM NAME : "+self.chem_name)
lines.append("NOTE : "+self.note)
lines.append("TABLE NAME : "+self.tab_name)
lines.append("CHARGE : "+str(self.charge))
lines.append("NATOMS : "+str(self.natoms))
lines.append("NATOM TYPES: "+str(self.natom_types))
# print atom types
for atype in self.atom_types:
lines.extend(atype.output())
return lines
[docs] def out_to_mol(self):
""" generate mol_file data from mol_data """
mol = mol_file()
mol.fname = self.name.replace(' ','_')+".mol"
mol.charge = self.charge
mol.natoms = self.natoms
mol.bohr = False
if len(self.note.split())>8:
mol.title1 = self.note[:6].replace('\n',' ')
mol.title2 = self.note[6:].replace('\n',' ')
else:
mol.title1 = self.note.replace('\n',' ')
mol.natom_types = self.natom_types
mol.atom_types = self.atom_types
return mol
[docs] def out_to_xyz(self):
""" generate xyz_file data from mol_data """
xyz = xyz_file()
xyz.fname = self.name.replace(' ','_')+".xyz"
xyz.natoms = self.natoms
xyz.title = self.note.replace('\n',' ')
xyz.natom_types = self.natom_types
xyz.atom_types = self.atom_types
return xyz
[docs] def out_to_db(self, cur):
""" add mol_data as a new entry to the molecules database"""
if self.check(): # check validity of mol_data before saving to database"
return self.check()
main_tab = mol_info_table()
cur.execute(main_tab.add_row(), (self.name, self.chem_name, self.note, self.charge, self.natoms, self.natom_types) )
# get mol_id for mol_info
self.idd = cur.lastrowid
# create xyz table:
xyz_tab = mol_xyz_table( self.idd )
cur.execute( xyz_tab.create_table() )
for atype in self.atom_types:
atype.add_db_entry(cur, xyz_tab.add_row())
[docs] def get_chem_name(self):
""" get/set chemical formula based on mol_data"""
self.chem_name = ""
for atype in self.atom_types:
self.chem_name += "{0.symb}_{0.natoms} ".format(atype)
# add total charge
self.chem_name += " ({:+d})".format(self.charge)
[docs] def check(self):
""" check consistency of mol_data"""
# sum of atoms in atom_types must be equal to natoms
nat = 0
for atype in self.atom_types:
nat += atype.natoms
if nat!=self.natoms:
print("ERROR: Inconsistency in mol_data (natoms)".format(nat,self.natoms))
return "Inconsistency in mol_data (natoms)"
#======================================================================================
[docs]class atom_type(object):
""" atom type as used in mol files (DALTON format)"""
def __init__(self):
self.symb = "X"
self.charge = 0
self.natoms = 0
self.bohr = False
self.basis = ""
self.aux_basis = ""
self.xvals = []
self.yvals = []
self.zvals = []
[docs] def to_angstrom(self):
""" convert coordinates from bohr to angstrom"""
if self.bohr:
self.xvals = [x*BOHR_TO_ANG for x in self.xvals]
self.yvals = [y*BOHR_TO_ANG for y in self.yvals]
self.zvals = [z*BOHR_TO_ANG for z in self.zvals]
self.bohr = False
return self
[docs] def read_from_mol(self, lines, iline, bohr):
self.bohr = bohr
line = lines[iline].split()
for elmt in line:
el = elmt.split("=")
test = el[0].lower()
if test == "atoms":
self.natoms = int(float(el[1]))
elif test == "charge":
self.charge =int(float(el[1]))
elif test == "basis" or el[0] == "bas":
self.basis = el[1]
elif test == "aux":
self.aux_basis = el[1]
self.symb = element(self.charge).symbol
# read all atoms in type
for nato in xrange(self.natoms):
iline += 1
line = lines[iline].split()
self.xvals.append(float(line[1]))
self.yvals.append(float(line[2]))
self.zvals.append(float(line[3]))
[docs] def return_xyz_table(self):
table = []
for x,y,z in zip(self.xvals, self.yvals, self.zvals):
line = [self.symb, x, y, z ]
table.append( line )
return table
[docs] def output(self,mol=False):
""" return list of strings as in mol file (Atomtypes section) """
lines = []
if mol:
# when call for mol file as in DALTON format
line = "Charge="+str(self.charge)+" Atoms="+str(self.natoms)
if self.basis!='':
line += " Basis="+self.basis
if self.aux_basis!='':
line += " Aux="+self.aux_basis
lines.append(line)
for x,y,z in zip(self.xvals, self.yvals, self.zvals):
line = "{:<4} {:20.10f} {:20.10f} {:20.10f}".format( self.symb, x, y, z )
lines.append( line )
return lines
[docs] def add_db_entry(self, cur, add_mol_xyz):
""" add xyz data as a new entry to the molecules database"""
for x, y, z in zip(self.xvals, self.yvals, self.zvals):
cur.execute(add_mol_xyz, (self.symb, self.charge, x, y, z) )
[docs] def set_from_GUI(self, atype_sec):
""" set atom_type information from GUI """
self.symb = atype_sec.symb
self.charge = atype_sec.charge
self.natoms = atype_sec.natoms
for x, y, z in zip(atype_sec.xvals, atype_sec.yvals, atype_sec.zvals):
self.xvals.append( x.get() )
self.yvals.append( y.get() )
self.zvals.append( z.get() )
#======================================================================================
[docs]class mol_file(object):
""" structure and data of mol files (DALTON format)"""
def __init__(self):
self.fname = ""
self.atom_basis = False
self.basis = "cc-pVDZ"
self.aux_basis = "cc-pVDZ-RI"
self.title1 = ""
self.title2 = ""
self.natoms = 0
self.natom_types = 0
self.symmetry = False
self.bohr = True
self.charge = 0
self.atom_types = []
[docs] def check(self):
""" check consistency of mol_file"""
# sum of atoms in atom_types must be equal to natoms
nat = 0
for atype in self.atom_types:
nat += atype.natoms
if nat!=self.natoms:
print("ERROR: Inconsistency in mol_file data (natoms)".format(nat,self.natoms))
return "Inconsistency in mol_file data (natoms)"
[docs] def read_mol(self, filename):
""" update mol_file data from mol file"""
self.fname = filename
lines = []
with open(self.fname, "r") as xyzfile:
for line in xyzfile:
lines.append(line.rstrip('\n'))
self.atom_basis = lines[0].strip().lower() == "atombasis"
if self.atom_basis:
offset = 1
else:
offset = 2
# read basis
line = lines[1].split()
self.basis = line[0]
if len(line)>1:
# read aux basis
if line[1][0:3].lower() == "aux":
self.aux_basis = line[1][4:]
# read title lines:
self.title1 = lines[offset]
self.title2 = lines[offset+1]
# read main line:
line = lines[offset+2].split()
for elmt in line:
el = elmt.split("=")
test = el[0].lower()
if test == "atomtypes":
self.natom_types=int(float(el[1]))
elif test == "charge":
self.charge=int(float(el[1]))
elif test == "nosymmetry":
self.symmetry = False
elif test == "angstrom":
self.bohr = False
# read all atom types
iline = offset + 2
self.atom_types = []
for natp in xrange(self.natom_types):
iline += 1
new_atype = atom_type()
new_atype.read_from_mol(lines, iline, self.bohr)
self.atom_types.append( new_atype )
self.natoms += self.atom_types[-1].natoms
iline += self.atom_types[-1].natoms
self.natom_types = len(self.atom_types)
return self.check() # check validity of data
[docs] def to_angstrom(self):
""" convert coordinates from bohr to angstrom"""
if self.bohr:
# convert data
ang_types = []
for atype in self.atom_types:
ang_types.append(atype.to_angstrom())
self.atom_types = ang_types
self.bohr = False
return self
[docs] def output(self):
""" return list of strings as in mol file """
lines = []
# print basis info
if (self.atom_basis):
lines.append("ATOMBASIS")
else:
lines.append("BASIS")
line = self.basis
if (self.aux_basis!=''):
line += " Aux="+self.aux_basis
lines.append(line)
# print title lines
lines.append(self.title1)
lines.append(self.title2)
# print main line
line = "Atomtypes="+str(self.natom_types)
line += " Charge="+str(self.charge)
if not self.symmetry:
line += " Nosymmetry"
if not self.bohr:
line += " Angstrom"
lines.append(line)
# print atom types
for atype in self.atom_types:
lines.extend(atype.output(True))
return lines
[docs] def out_to_file(self, fname=''):
if self.check():
return self.check() # check validity of data"
if not fname:
fname=self.fname
with open(fname, 'w') as outfile:
for line in self.output():
outfile.write(line + "\n")
#======================================================================================
[docs]class xyz_file(object):
""" structure and data of xyz files"""
def __init__(self):
self.fname = ""
self.natoms = 0
self.title = ""
self.natom_types = 0
self.atom_types = []
[docs] def check(self):
""" check consistency of xyz_file"""
# sum of atoms in atom_types must be equal to natoms
nat = 0
for atype in self.atom_types:
nat += atype.natoms
if nat!=self.natoms:
print("ERROR: Inconsistency in xyz_file data (natoms)".format(nat,self.natoms))
return "Inconsistency in xyz_file data (natoms)"
[docs] def read_xyz(self, filename):
""" update xyz_file data from xyz file"""
self.fname = filename
lines = []
with open(self.fname, "r") as xyzfile:
for line in xyzfile:
lines.append(line.rstrip('\n'))
# Get number of atoms:
try:
self.natoms = int(lines[0])
except:
return 'file: '+self.fname+' does not fit the xyz format (cannot read natoms)'
# check that file contains enough lines
# i.e. at least 2 + natoms
if len(lines) < 2 +self.natoms:
return 'file: '+self.fname+' does not fit the xyz format (too few lines)'
# get title line
self.title=str(lines[1])
# make table:
table = read_xyz_table(lines[2:self.natoms+2])
# update self:
self.read_from_table(table)
[docs] def read_from_table(self, table, fname='', title=''):
"""xyz data is just a table with 4 columns and natoms lines"""
if fname:
self.fname = fname
if title:
self.title = title
self.natoms = len(table)
# read labels and xyz coordinates
charges = []
xvals = []
yvals = []
zvals = []
for line in table:
# get atomic number from symbol
charge = element(line[0]).atomic
charges.append(charge)
xvals.append(float(line[1]))
yvals.append(float(line[2]))
zvals.append(float(line[3]))
self.atom_types = get_clean_atom_types(charges, xvals, yvals, zvals)
self.natom_types = len(self.atom_types)
return self.check() # check validity of data"
[docs] def output(self):
""" return list of strings as in xyz file """
lines = []
lines.append(str(self.natoms))
lines.append(self.title)
# print atom types
for atype in self.atom_types:
lines.extend(atype.output())
return lines
[docs] def out_to_file(self, fname=''):
if self.check():
return self.check() # check validity of data"
if not fname:
fname=self.fname
with open(fname, 'w') as outfile:
for line in self.output():
outfile.write(line + "\n")
[docs]def read_xyz_table(lines):
table = []
for row in lines:
line = []
line.append(row.split()[0])
line.append(float(row.split()[1]))
line.append(float(row.split()[2]))
line.append(float(row.split()[3]))
table.append(line)
return table
[docs]def get_clean_atom_types(charges, xvals, yvals, zvals, bohr=False):
""" order and merge atom types"""
# get non redundant charges in decreasing order
charge_types = sorted(set(charges), reverse=True)
atypes = []
# build atom types
for charge in charge_types:
atype = atom_type()
# get atomic symbol
atype.symb = element(charge).symbol
atype.charge = charge
atype.bohr = bohr
for c, x, y, z in zip(charges, xvals, yvals, zvals):
if c == charge:
atype.xvals.append(x)
atype.yvals.append(y)
atype.zvals.append(z)
atype.natoms += 1
# add atype to atom_type list
atypes.append(atype)
return atypes