Source code for comp_chem_utils.qmmm_utils

#!/usr/bin/env python
"""
Set of routine to deal with xyz data of QMMM calculations.

In particular for solute/solvent system it is possible to extract 
a QM region based on a distance criterion from the solvent.
"""

import numpy as np
from comp_chem_utils.utils import center_of_mass

[docs]def read_xyz_structure(xyz, nat_solute, nat_solvent): """decompose xyz data as solute + group of solvent molecules""" xyz_struc = [] xyz_struc.append(xyz[0:nat_solute]) nlines = len(xyz) xyz_struc += [ xyz[x:x+nat_solvent] for x in xrange(nat_solute, nlines, nat_solvent) ] return xyz_struc
[docs]def get_distance(xyz_struc, ref): """For each block in xyz_struc, calculate distance to ref""" distance = [0.0] # solute has distance zero by definition for block in xyz_struc[1:]: # calculate distance based on Oxygen atom in water molecule # i.e. first atom coord = np.array(block[0][1:]) distance.append(np.linalg.norm(ref-coord)) return distance
[docs]def get_QM_region(xyz, nat_slu, nat_sol, nmol_sol): """Extract QM information from the full set of coordinates. The following structure is assumed: First ``nat_slu`` atoms are the atoms of the solute molecules The rest are the solvent molecules arranged in blocks. For example, for a water solvent (``nat_sol = 3``):: O ... H ... H ... O ... H ... H ... : Args: xyz (list): xyz_data (list of list) for the whole system. Each sublist contains an atomic symbol and 3 (x,y,z) coordinates. nat_slu (int): number of atoms of the solute molecule. nat_sol (int): number of atoms of a single solvent molecule. nmol_sol (int): total number of solvent molecules that should be included in the QM region. Returns: xyz_QM, atom_types, max_dist xyz_QM (list): xyz_data for the QM region. atom_types (dic): Each key is an atomic symbol and the corresponding value is a list of indices of the atom of that type present in the QM region. The indices refer to the ordering of the original xyz_data. max_dist (float): Distance (in AA) between the center of mass of the solute and the center of mass of the farthest solvent molecule included in the QM region """ # the distance is measured with respect to the COM of the solute ref = center_of_mass(xyz[0:nat_slu]) # convert xyz data in block structure (solute and solvent) xyz_struc = read_xyz_structure(xyz, nat_slu, nat_sol) # associate a distance to each block dist = get_distance(xyz_struc, ref) # get the distance for the farthest molecule to be included in the QM region max_dist = sorted(dist)[nmol_sol] # Build a collection of atom types. # each atom type is characterized by its symbol (e.g. Cl) # for each type a list of indices is kept idx = 1 atom_types = {} xyz_QM = [] for d, b in zip(dist, xyz_struc): # only include atoms "close" to the solute if d<=max_dist: for atom in b: xyz_QM.append(atom) symb = atom[0] if symb in atom_types: # update the list of indices atom_types[symb].append( idx ) else: # start new atom type list atom_types[symb] = [ idx ] idx +=1 else: idx += len(b) return xyz_QM, atom_types, max_dist