Source code for comp_chem_utils.molecule_data

#!/usr/bin/env python
"""Set of classes to deal with molecular data and formats"""

__author__="Pablo Baudin"
__email__="pablo.baudin@epfl.ch"


from comp_chem_utils.conversions import BOHR_TO_ANG
from comp_chem_utils.mysql_tables import mol_info_table, mol_xyz_table
from comp_chem_utils.periodic import element

hostname = 'localhost'
username = 'root'
password = ''
database = 'molecules'


[docs]class mol_data(object): """ All information regarding a given molecule. """ # INITIALIZTION FUNCTIONS # ----------------------- def __init__(self): self.idd = 0 self.name = "" self.chem_name = "" self.note = "" self.tab_name = "" self.charge = 0 self.natoms = 0 self.natom_types = 0 self.atom_types = []
[docs] def init_from_mol(self, mol_file): """ update mol_data with data in mol (DALTON) format""" self.name = mol_file.fname.split("/")[-1][:-4] self.note = mol_file.title1+" "+mol_file.title2 self.charge = mol_file.charge self.natoms = mol_file.natoms self.natom_types = mol_file.natom_types self.atom_types = mol_file.to_angstrom().atom_types self.get_chem_name() return self.check() # check validity of mol_data"
[docs] def init_from_xyz(self, xyz_file): """ update mol_data with data in xyz format""" self.name = xyz_file.fname.split("/")[-1][:-4] self.note = xyz_file.title #print("WARNING: init_from_xyz setting default charge to zero") self.charge = 0 self.natoms = xyz_file.natoms self.natom_types = xyz_file.natom_types self.atom_types = xyz_file.atom_types self.get_chem_name() return self.check() # check validity of mol_data"
[docs] def init_from_db(self, cur, idd): """ update mol_data with data from database""" # get main information from mol_info table main_tab = mol_info_table() cur.execute( main_tab.get_row(idd) ) for (idd, name, chem_name, note, charge, natoms, natom_types) in cur: self.idd = idd self.name = name self.chem_name = chem_name self.note = note self.charge = charge self.natoms = natoms self.natom_types = natom_types # get xyz data from molecule specific table xyz_tab = mol_xyz_table( idd ) cur.execute( xyz_tab.get_table() ) old_charge = 0 for (idd, symb, charge, x, y, z) in cur: if charge!=old_charge: if (old_charge>0): # add old atype to atom_types list self.atom_types.append(atype) # new atom type atype = atom_type() old_charge = charge atype.charge = charge atype.symb = symb atype.xvals.append(x) atype.yvals.append(y) atype.zvals.append(z) atype.natoms += 1 # add last atype to atom_types list self.atom_types.append(atype) self.get_chem_name() return self.check() # check validity of mol_data"
# OUTPUT FUNCTIONS # ----------------
[docs] def output(self): """ return list of strings containing mol_data""" lines = [] lines.append("ID : "+str(self.idd)) lines.append("NAME : "+self.name) lines.append("CHEM NAME : "+self.chem_name) lines.append("NOTE : "+self.note) lines.append("TABLE NAME : "+self.tab_name) lines.append("CHARGE : "+str(self.charge)) lines.append("NATOMS : "+str(self.natoms)) lines.append("NATOM TYPES: "+str(self.natom_types)) # print atom types for atype in self.atom_types: lines.extend(atype.output()) return lines
[docs] def out_to_mol(self): """ generate mol_file data from mol_data """ mol = mol_file() mol.fname = self.name.replace(' ','_')+".mol" mol.charge = self.charge mol.natoms = self.natoms mol.bohr = False if len(self.note.split())>8: mol.title1 = self.note[:6].replace('\n',' ') mol.title2 = self.note[6:].replace('\n',' ') else: mol.title1 = self.note.replace('\n',' ') mol.natom_types = self.natom_types mol.atom_types = self.atom_types return mol
[docs] def out_to_xyz(self): """ generate xyz_file data from mol_data """ xyz = xyz_file() xyz.fname = self.name.replace(' ','_')+".xyz" xyz.natoms = self.natoms xyz.title = self.note.replace('\n',' ') xyz.natom_types = self.natom_types xyz.atom_types = self.atom_types return xyz
[docs] def out_to_db(self, cur): """ add mol_data as a new entry to the molecules database""" if self.check(): # check validity of mol_data before saving to database" return self.check() main_tab = mol_info_table() cur.execute(main_tab.add_row(), (self.name, self.chem_name, self.note, self.charge, self.natoms, self.natom_types) ) # get mol_id for mol_info self.idd = cur.lastrowid # create xyz table: xyz_tab = mol_xyz_table( self.idd ) cur.execute( xyz_tab.create_table() ) for atype in self.atom_types: atype.add_db_entry(cur, xyz_tab.add_row())
[docs] def get_chem_name(self): """ get/set chemical formula based on mol_data""" self.chem_name = "" for atype in self.atom_types: self.chem_name += "{0.symb}_{0.natoms} ".format(atype) # add total charge self.chem_name += " ({:+d})".format(self.charge)
[docs] def check(self): """ check consistency of mol_data""" # sum of atoms in atom_types must be equal to natoms nat = 0 for atype in self.atom_types: nat += atype.natoms if nat!=self.natoms: print("ERROR: Inconsistency in mol_data (natoms)".format(nat,self.natoms)) return "Inconsistency in mol_data (natoms)"
#======================================================================================
[docs]class atom_type(object): """ atom type as used in mol files (DALTON format)""" def __init__(self): self.symb = "X" self.charge = 0 self.natoms = 0 self.bohr = False self.basis = "" self.aux_basis = "" self.xvals = [] self.yvals = [] self.zvals = []
[docs] def to_angstrom(self): """ convert coordinates from bohr to angstrom""" if self.bohr: self.xvals = [x*BOHR_TO_ANG for x in self.xvals] self.yvals = [y*BOHR_TO_ANG for y in self.yvals] self.zvals = [z*BOHR_TO_ANG for z in self.zvals] self.bohr = False return self
[docs] def read_from_mol(self, lines, iline, bohr): self.bohr = bohr line = lines[iline].split() for elmt in line: el = elmt.split("=") test = el[0].lower() if test == "atoms": self.natoms = int(float(el[1])) elif test == "charge": self.charge =int(float(el[1])) elif test == "basis" or el[0] == "bas": self.basis = el[1] elif test == "aux": self.aux_basis = el[1] self.symb = element(self.charge).symbol # read all atoms in type for nato in xrange(self.natoms): iline += 1 line = lines[iline].split() self.xvals.append(float(line[1])) self.yvals.append(float(line[2])) self.zvals.append(float(line[3]))
[docs] def return_xyz_table(self): table = [] for x,y,z in zip(self.xvals, self.yvals, self.zvals): line = [self.symb, x, y, z ] table.append( line ) return table
[docs] def output(self,mol=False): """ return list of strings as in mol file (Atomtypes section) """ lines = [] if mol: # when call for mol file as in DALTON format line = "Charge="+str(self.charge)+" Atoms="+str(self.natoms) if self.basis!='': line += " Basis="+self.basis if self.aux_basis!='': line += " Aux="+self.aux_basis lines.append(line) for x,y,z in zip(self.xvals, self.yvals, self.zvals): line = "{:<4} {:20.10f} {:20.10f} {:20.10f}".format( self.symb, x, y, z ) lines.append( line ) return lines
[docs] def add_db_entry(self, cur, add_mol_xyz): """ add xyz data as a new entry to the molecules database""" for x, y, z in zip(self.xvals, self.yvals, self.zvals): cur.execute(add_mol_xyz, (self.symb, self.charge, x, y, z) )
[docs] def set_from_GUI(self, atype_sec): """ set atom_type information from GUI """ self.symb = atype_sec.symb self.charge = atype_sec.charge self.natoms = atype_sec.natoms for x, y, z in zip(atype_sec.xvals, atype_sec.yvals, atype_sec.zvals): self.xvals.append( x.get() ) self.yvals.append( y.get() ) self.zvals.append( z.get() )
#======================================================================================
[docs]class mol_file(object): """ structure and data of mol files (DALTON format)""" def __init__(self): self.fname = "" self.atom_basis = False self.basis = "cc-pVDZ" self.aux_basis = "cc-pVDZ-RI" self.title1 = "" self.title2 = "" self.natoms = 0 self.natom_types = 0 self.symmetry = False self.bohr = True self.charge = 0 self.atom_types = []
[docs] def check(self): """ check consistency of mol_file""" # sum of atoms in atom_types must be equal to natoms nat = 0 for atype in self.atom_types: nat += atype.natoms if nat!=self.natoms: print("ERROR: Inconsistency in mol_file data (natoms)".format(nat,self.natoms)) return "Inconsistency in mol_file data (natoms)"
[docs] def read_mol(self, filename): """ update mol_file data from mol file""" self.fname = filename lines = [] with open(self.fname, "r") as xyzfile: for line in xyzfile: lines.append(line.rstrip('\n')) self.atom_basis = lines[0].strip().lower() == "atombasis" if self.atom_basis: offset = 1 else: offset = 2 # read basis line = lines[1].split() self.basis = line[0] if len(line)>1: # read aux basis if line[1][0:3].lower() == "aux": self.aux_basis = line[1][4:] # read title lines: self.title1 = lines[offset] self.title2 = lines[offset+1] # read main line: line = lines[offset+2].split() for elmt in line: el = elmt.split("=") test = el[0].lower() if test == "atomtypes": self.natom_types=int(float(el[1])) elif test == "charge": self.charge=int(float(el[1])) elif test == "nosymmetry": self.symmetry = False elif test == "angstrom": self.bohr = False # read all atom types iline = offset + 2 self.atom_types = [] for natp in xrange(self.natom_types): iline += 1 new_atype = atom_type() new_atype.read_from_mol(lines, iline, self.bohr) self.atom_types.append( new_atype ) self.natoms += self.atom_types[-1].natoms iline += self.atom_types[-1].natoms self.natom_types = len(self.atom_types) return self.check() # check validity of data
[docs] def to_angstrom(self): """ convert coordinates from bohr to angstrom""" if self.bohr: # convert data ang_types = [] for atype in self.atom_types: ang_types.append(atype.to_angstrom()) self.atom_types = ang_types self.bohr = False return self
[docs] def output(self): """ return list of strings as in mol file """ lines = [] # print basis info if (self.atom_basis): lines.append("ATOMBASIS") else: lines.append("BASIS") line = self.basis if (self.aux_basis!=''): line += " Aux="+self.aux_basis lines.append(line) # print title lines lines.append(self.title1) lines.append(self.title2) # print main line line = "Atomtypes="+str(self.natom_types) line += " Charge="+str(self.charge) if not self.symmetry: line += " Nosymmetry" if not self.bohr: line += " Angstrom" lines.append(line) # print atom types for atype in self.atom_types: lines.extend(atype.output(True)) return lines
[docs] def out_to_file(self, fname=''): if self.check(): return self.check() # check validity of data" if not fname: fname=self.fname with open(fname, 'w') as outfile: for line in self.output(): outfile.write(line + "\n")
#======================================================================================
[docs]class xyz_file(object): """ structure and data of xyz files""" def __init__(self): self.fname = "" self.natoms = 0 self.title = "" self.natom_types = 0 self.atom_types = []
[docs] def check(self): """ check consistency of xyz_file""" # sum of atoms in atom_types must be equal to natoms nat = 0 for atype in self.atom_types: nat += atype.natoms if nat!=self.natoms: print("ERROR: Inconsistency in xyz_file data (natoms)".format(nat,self.natoms)) return "Inconsistency in xyz_file data (natoms)"
[docs] def read_xyz(self, filename): """ update xyz_file data from xyz file""" self.fname = filename lines = [] with open(self.fname, "r") as xyzfile: for line in xyzfile: lines.append(line.rstrip('\n')) # Get number of atoms: try: self.natoms = int(lines[0]) except: return 'file: '+self.fname+' does not fit the xyz format (cannot read natoms)' # check that file contains enough lines # i.e. at least 2 + natoms if len(lines) < 2 +self.natoms: return 'file: '+self.fname+' does not fit the xyz format (too few lines)' # get title line self.title=str(lines[1]) # make table: table = read_xyz_table(lines[2:self.natoms+2]) # update self: self.read_from_table(table)
[docs] def read_from_table(self, table, fname='', title=''): """xyz data is just a table with 4 columns and natoms lines""" if fname: self.fname = fname if title: self.title = title self.natoms = len(table) # read labels and xyz coordinates charges = [] xvals = [] yvals = [] zvals = [] for line in table: # get atomic number from symbol charge = element(line[0]).atomic charges.append(charge) xvals.append(float(line[1])) yvals.append(float(line[2])) zvals.append(float(line[3])) self.atom_types = get_clean_atom_types(charges, xvals, yvals, zvals) self.natom_types = len(self.atom_types) return self.check() # check validity of data"
[docs] def output(self): """ return list of strings as in xyz file """ lines = [] lines.append(str(self.natoms)) lines.append(self.title) # print atom types for atype in self.atom_types: lines.extend(atype.output()) return lines
[docs] def out_to_file(self, fname=''): if self.check(): return self.check() # check validity of data" if not fname: fname=self.fname with open(fname, 'w') as outfile: for line in self.output(): outfile.write(line + "\n")
[docs]def read_xyz_table(lines): table = [] for row in lines: line = [] line.append(row.split()[0]) line.append(float(row.split()[1])) line.append(float(row.split()[2])) line.append(float(row.split()[3])) table.append(line) return table
[docs]def get_clean_atom_types(charges, xvals, yvals, zvals, bohr=False): """ order and merge atom types""" # get non redundant charges in decreasing order charge_types = sorted(set(charges), reverse=True) atypes = [] # build atom types for charge in charge_types: atype = atom_type() # get atomic symbol atype.symb = element(charge).symbol atype.charge = charge atype.bohr = bohr for c, x, y, z in zip(charges, xvals, yvals, zvals): if c == charge: atype.xvals.append(x) atype.yvals.append(y) atype.zvals.append(z) atype.natoms += 1 # add atype to atom_type list atypes.append(atype) return atypes