Source code for comp_chem_utils.utils

#!/usr/bin/env python
"""Collection of simple functions useful in computational chemistry scripting.

Many of the following functions are used to make operations on xyz coordinates
of molecular structure. When refering to ``xyz_data`` bellow, the following
structures (also used in :py:mod:`~comp_chem_utils.molecule_data`) is assumed::

    atom 1 label  and corresponding xyz coordinate
    atom 2 label  and corresponding xyz coordinate
    : : :
    atom N label  and corresponding xyz coordinate

For example the ``xyz_data`` of a Hydrogen molecule along the z-axis
should be passed as::

    >>> xyz_data
    [['H', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 1.0]]

"""

__author__="Pablo Baudin"
__email__="pablo.baudin@epfl.ch"


import os
import shutil
import numpy as np
from scipy.spatial.distance import pdist

from comp_chem_utils.periodic import element


[docs]def vel_auto_corr(vel, max_corr_index, tstep):
    """Calculate velocity autocorrelation function.

    Args:
        vel (list): The velocities along the trajectory are given as
            a list of ``np.array()`` of dimensions N_atoms . 3.

        max_corr_index (int): Maximum number of steps to consider
            for the auto correlation function. In other words, it
            corresponds to the number of points in the output function.
    """

    max_step_index = len(vel) - max_corr_index
    natoms = vel[0].shape[0]

    G = np.zeros((max_corr_index), dtype='float64')


    for itau in range(max_corr_index):
        for it in range(max_step_index):
            #for i in range(natoms):
            #    G[itau] += np.dot(vel[it][i,:], vel[it+itau][i,:])

            G[itau] += np.trace(np.dot(vel[it],np.transpose(vel[it+itau])))

    G = G / (natoms * max_corr_index * tstep)
    xpts = np.arange(max_corr_index)
    xpts = xpts * tstep

    return xpts, G


[docs]def get_lmax_from_atomic_charge(charge):
    """Return the maximum angular momentum based on the input atomic charge.

    This function is designed to return LMAX in a CPMD input file.

    Args:
        charge (int): Atomic charge.

    Returns:
        'S' for H or He; 'P' for second row elements; and 'D' for heavier elements.
    """

    if charge <= 2:
        # H or He
        lmax = 'S'
    elif charge <= 10:
        lmax = 'P'
    else:
        lmax = 'D'

    return lmax


[docs]def get_file_as_list(filename, raw=False):
    """Read a file and return it as a list of lines (str).

    By default comments (i.e. lines starting with #)
    and empty lines are ommited. This can be changed
    by setting ``raw=True``

    Args:
        filename (str): Name of the file to read.
        raw (bool, optional): To return the file as it is,
            i.e. with comments and blank lines. Default
            is ``raw=False``.

    Returns:
        A list of lines (str)."""

    lines = []
    with open(filename,'r') as myfile:
        for line in myfile:
            if raw:
                lines.append(line)
            else:
                # remove empty lines
                if line.strip():
                    # remove comments
                    if line.strip()[0] != '#':
                        lines.append(line)

    return lines


[docs]def make_new_dir(dirn):
    """Make new empty directory.

    If the directory already exists it is erased and replaced.

    Args:
        dirn (str): Name for the new directory (can include path).
    """

    if not os.path.exists(dirn):
        os.makedirs(dirn)
    else:
        try:
            os.removedirs(dirn)
        except(OSError):
            print("WARNING: erasing (not empty) directory! {}".format(dirn))
            shutil.rmtree(dirn)

        os.makedirs(dirn)


[docs]def center_of_mass(xyz_data):
    """Calculate center of mass of a molecular structure based on xyz coordinates.

    Args:
        xyz_data (list): xyz atomic coordinates arranged as described above.

    Returns:
        3-dimensional ``np.array()`` containing the xyz coordinates of the
        center of mass of the molecular structure. The unit of the center of mass
        matches the xyz input units (usually Angstroms).
    """

    totM = 0.0
    COM = np.zeros((3))
    for line in xyz_data:
        symbol = line[0]
        coord = np.array(line[1:])

        mass = element(symbol).mass
        totM += mass
        COM += coord*mass

    COM = COM/totM
    return COM


[docs]def change_vector_norm(fix, mob, R):
    """Scale a 3-D vector defined by two points in space to have a new norm R.

    The input vector is defined by a fix position in 3-D space ``fix``,
    and a mobile position ``mob``. The function returns a new mobile
    position such that the new vector has the norm R.

    Args:
        fix (np.array): xyz coordinates of the fix point.
        mob (np.array): Original xyz coordinates of the mobile point.
        R (float): Desired norm for the new vector.

    Returns:
        The new mobile position as an ``np.array()`` of dimenssion 3.
    """

    unit = mob - fix
    unit = unit/np.linalg.norm(unit)

    # return new position
    return fix + R * unit


[docs]def change_vector_norm_sym(pos1, pos2, R):
    """Symmetric version of change_vector_norm function.

    In other word both positions are modified symmetrically.
    """

    unit = pos2 - pos1
    norm = np.linalg.norm(unit)
    unit = unit/norm

    shift = (R-norm)/2.0

    # return new position
    new_pos1 = pos1 - unit * shift
    new_pos2 = pos2 + unit * shift
    return new_pos1, new_pos2


[docs]def get_rmsd(xyz_data1, xyz_data2):
    """Calculate RMSD between two sets of coordinates.

    The Root-mean-square deviation of atomic positions is calculated as

    .. math::
        RMSD = \\sqrt{ \\frac{1}{N} \\sum_{i=1}^N \\delta_{i}^{2} }

    Where ``\delta_i`` is the distance between atom i in ``xyz_data1`` and in
    ``xyz_data2``.

    Args:
        xyz_data1 (list): List of atomic coordinates for the first structure
            arranged as described above for xyz_data.
        xyz_data2 (list): Like ``xyz_data1`` but for the second structure.

    Returns:
        The RMSD (float).
    """

    rmsd = 0
    for c1, c2 in zip(xyz_data1, xyz_data2):

        d1 = np.array([c1[x] for x in range(1,4)])
        d2 = np.array([c2[x] for x in range(1,4)])
        vector = d2 - d1
        rmsd += np.dot(vector, vector)

    rmsd = rmsd/(len(xyz_data1))
    return np.sqrt(rmsd)


[docs]def get_distance(xyz_data, atoms, box_size=None):
    """Calculate distance between two atoms in xyz_data.

    Args:
        xyz_data (list): xyz atomic coordinates arranged as described above.
        atoms (list): list of two indices matching the two rows of the
            xyz_data for wich the distance should be calculated.

    Returns:
        Distance between the two atoms in the list as a ``float``, the
        unit will match the input unit in the ``xyz_data``.

    """
    coord1 = np.array([xyz_data[atoms[0]][x] for x in range(1,4)])
    coord2 = np.array([xyz_data[atoms[1]][x] for x in range(1,4)])
    vector = coord2 - coord1

    if box_size:
       for i,x in enumerate(vector):
          if abs(x) > box_size/2.0:
             vector[i] = box_size - abs(x)

    return np.linalg.norm(vector)


[docs]def get_distance_matrix(xyz_data, box_size=None):

   # make np.array
   natoms = len(xyz_data)
   coord = np.zeros((natoms,3), dtype='float')
   for i,line in enumerate(xyz_data):
      coord[i,:] = line[1:]

   if box_size:
      npairs = natoms * (natoms - 1)
      matrix = np.zeros((npairs,3), dtype='float')

      for i in range(natoms):
         for j in range(i+1,natoms):
            ij = i + (j-1)*natoms

            matrix[ij,:] = coord[i,:] - coord[j,:]

      # find out which element to shift:
      # basically shift is an array of same shape as matrix
      # with zeros every where except where the elements of
      # matrix are larger than box_size/2.0
      # in that case shift as the value box_size
      shift = box_size * (matrix > box_size/2.0).astype(int)

      # we can now shift the matrix as follows:
      matrix = abs(shift - matrix)

      # and get the distances...
      matrix = np.linalg.norm(matrix, axis=1)

   else:
      matrix = pdist(coord)

   return matrix

[docs]def get_distance_matrix_2(xyz_data1, xyz_data2, box_size=None):
   # repeat as above for 2 different sets of coordinates
   nat1 = len(xyz_data1)
   coord1 = np.zeros((nat1,3), dtype='float')
   for i,line in enumerate(xyz_data1):
      coord1[i,:] = line[1:]

   nat2 = len(xyz_data2)
   coord2 = np.zeros((nat2,3), dtype='float')
   for i,line in enumerate(xyz_data2):
      coord2[i,:] = line[1:]

   if box_size:
      matrix = np.zeros((nat1,nat2,3), dtype='float')

      for i in range(nat1):
         for j in range(nat2):
            matrix[i,j,:] = coord[i,:] - coord[j,:]

      # find out which element to shift:
      # basically shift is an array of same shape as matrix
      # with zeros every where except where the elements of
      # matrix are larger than box_size/2.0
      # in that case shift as the value box_size
      shift = box_size * (matrix > box_size/2.0).astype(int)

      # we can now shift the matrix as follows:
      matrix = abs(shift - matrix)

      # and get the distances...
      matrix = np.linalg.norm(matrix, axis=2)

   else:
      matrix = cdist(coord1, coord2)

   return matrix

[docs]def get_angle(xyz_data, atoms):
    """Calculate angle between three atoms in xyz_data.

    Args:
        xyz_data (list): xyz atomic coordinates arranged as described above.
        atoms (list): list of three indices matching the rows of the
            xyz_data for wich the angle should be calculated.

    Returns:
        Angle between the three atoms in the list as a ``float`` in degrees.

    """

    coord1 = np.array([xyz_data[atoms[0]][x] for x in range(1,4)])
    coord2 = np.array([xyz_data[atoms[1]][x] for x in range(1,4)])
    coord3 = np.array([xyz_data[atoms[2]][x] for x in range(1,4)])
    vec1 = coord1-coord2
    vec2 = coord3-coord2

    return np.degrees( np.arccos( np.dot(vec1,vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) ) )

[docs]def get_dihedral_angle(table, atoms):
    """Calculate dihedral angle defined by 4 atoms in xyz_data.

    It relies on the praxeolitic formula (1 sqrt, 1 cross product).

    Args:
        xyz_data (list): xyz atomic coordinates arranged as described above.
        atoms (list): list of 4 indices matching the rows of the
            xyz_data for wich the dihedral angle should be calculated.

    Returns:
        Dihedral angle defined by the 4 atoms in the list as a ``float`` in degrees.

    """

    p0 = np.array([table[atoms[0]][x] for x in range(1,4)])
    p1 = np.array([table[atoms[1]][x] for x in range(1,4)])
    p2 = np.array([table[atoms[2]][x] for x in range(1,4)])
    p3 = np.array([table[atoms[3]][x] for x in range(1,4)])

    b0 = -1.0*(p1 - p0)
    b1 = p2 - p1
    b2 = p3 - p2

    # normalize b1 so that it does not influence magnitude of vector
    # rejections that come next
    b1 /= np.linalg.norm(b1)

    # vector rejections
    # v = projection of b0 onto plane perpendicular to b1
    #   = b0 minus component that aligns with b1
    # w = projection of b2 onto plane perpendicular to b1
    #   = b2 minus component that aligns with b1
    v = b0 - np.dot(b0, b1)*b1
    w = b2 - np.dot(b2, b1)*b1

    # angle between v and w in a plane is the torsion angle
    # v and w may not be normalized but that's fine since tan is y/x
    x = np.dot(v, w)
    y = np.dot(np.cross(b1, v), w)

    return np.degrees(np.arctan2(y, x))