Source code for comp_chem_utils.read_gaussian

#!/usr/bin/env python
"""This file contains a set of functions usefull to extract some information from Gaussian output files."""

__author__="Pablo Baudin"
__email__="pablo.baudin@epfl.ch"


import numpy as np

from comp_chem_utils.periodic import element
from comp_chem_utils.utils import get_file_as_list
from comp_chem_utils.conversions import convert


[docs]class gauss_atom: """Contain basis information about an atom.""" def __init__(self): self.label = "X" self.index = 0 self.basis = {}
[docs] def output(self): print("label = {}".format(self.label)) print("index = {}".format(self.index)) print("basis = {}".format(self.basis))
[docs]def get_gaussian_info(filename): """Extract relevant information from a Gaussian output file. Goes through a gaussian output file looking for the information required to calculate PDOS. Args: filename (str): Name of the Gaussian output file, it may also include the path to the file. Returns: nocc (int): Number of occupied orbitals. nbas (int): Number of basis functions (atomic orbitals). overlap (np.array): Squared AO overlap matrix. epsilon (list): Molecular orbital energies. coef (np.array): Matrix of the molecular orbital coefficients. """ # Store the whole gaussian file in output # --------------------------------------- output = get_file_as_list(filename) # Get Number of basis functions # ----------------------------- nline = get_line(output," NBasis = ") nbas = int(output[nline].split()[2]) # get number of occupied orbitals (assume closed shell) # ----------------------------------------------------- nline = get_line(output,"alpha electrons") nocc = int(output[nline].split()[0]) print("Number of basis functions = {}".format(nbas)) print("Number of occupied orbitals = {}\n".format(nocc)) print("Reading Overlap and MO transformation matrices...") # Read overlap matrix from gaussian output # ---------------------------------------- nline = get_line(output," *** Overlap ***") + 1 overlap = read_triangular_matrix(nline,nbas,output) # Read Orbital energies and Orbital coefficients # ---------------------------------------------- nline = get_line(output,"Molecular Orbital Coefficients") + 1 # The MO coefficients are stored in the coef matrix. # we assume the number of molecular orbitals equal to # the number of atomic basis functions. coef=np.zeros((nbas,nbas)) epsilon=[] # orbital energies iline = nline + 1 # skip column headers # first read complete blocks for iblock in range(nbas/5): iline = read_orbitals_and_coefs(iblock,nbas,iline,5,epsilon,coef,output) # If nbas is not a multiple of five the next routine # reads the last data of the matrix rest=nbas%5 if (rest!=0): iblock = nbas/5 iline = read_orbitals_and_coefs(iblock,nbas,iline,rest,epsilon,coef,output) print("Gaussian output file parsed") return nocc,nbas,overlap,epsilon,coef
[docs]def get_line(output,string): """ Return the line number in 'output' containing the first occurence of 'string' """ for i,line in enumerate(output): if (string in line): return i
[docs]def read_triangular_matrix(start,dim,output): # The GAUSSIAN's matrix are made by N/5 block of 5 columns and N lines each one. # (caution: there is just the half matrix in the GAUSSIAN file so we have to rebuild the other part) mat = np.zeros((dim,dim)) iline = start # Loop over each block of 5 columns for iblock in range(dim/5): iline += 1 # increment index in output (empty line between blocks) # Reads each line of a block (the length of the block decrease with the index dim) iline = read_triangular_block(iblock,dim,mat,iline,output) # If dim is not a multiple of five the next routine reads the last data of the matrix rest=dim%5 if (rest!=0): iline += 1 # increment index in output (empty line between blocks) iblock = dim/5 iline = read_triangular_block(iblock,dim,mat,iline,output) return mat
[docs]def read_triangular_block(iblock,dim,mat,iline,output): start_ind = 5*iblock # start row and column index depending on block for irow in range(start_ind,dim): # read line and replace double precision fortran notation with standard exponent test=output[iline].replace("D","e").split()[1:] # read the (maximum) 5 numbers in a row for i,f in enumerate(test): # i goes from 0 to maximum 4, we add start_ind depending on block icol = start_ind + i mat[irow,icol] = float(f) mat[icol,irow] = float(f) # symmetrize iline += 1 # increment index in output (read next line) return iline
[docs]def read_orbitals_and_coefs(iblock,nbas,iline,ncol,epsilon,coef,output): # read orbital energies # --------------------- iline += 1 test = output[iline] for i in range (ncol): # Conversion: [Hartree] --> [eV] e = float(test.split()[i+2]) epsilon.append( convert( e, 'ENERGY: a.u.', 'ENERGY: eV') ) # Read each line of a block # ------------------------- start_ind = iblock*5 for irow in range(0,nbas): iline += 1 # go to next line test = output[iline].split()[-ncol:] # get ncol last values in line for i,f in enumerate(test): icol = start_ind + i coef[irow,icol] = float(f) # Reads the empty lines iline += 2 # skip lines between blocks return iline
[docs]def read_basis(iline, nbas, output): basis = {} # read first basis set for given atom # the gaussian line look like: # 1 1 Ru 1S .00000 .00000 .00000 .00000 .00000 # # in that case label = 1S and we store the index 1. label = output[iline].split()[3] basis[label] = int(output[iline].split()[0]) iline += 1 if (iline >= nbas): return iline, basis # now the gaussian line should look like: # 2 2S .00000 .00000 .00000 .00000 .00000 # # check if next line is a next atom next_atom = element(output[iline].split()[2]) while(not next_atom): label = output[iline].split()[1] basis[label] = int(output[iline].split()[0]) iline += 1 if (iline >= nbas): break next_atom = element(output[iline].split()[2]) # the atom's basis has been read, we are done return iline, basis
[docs]def read_atom_and_basis(gaussian_file): # Store the whole gaussian file in output # --------------------------------------- myfile = open(gaussian_file,"r") output = [] for line in myfile: output.append(line) myfile.close() # Parse Gaussian output file # make a dictionary of atoms and basis functions # ---------------------------------------------- nline = get_line(output," NBasis = ") nbas = int(output[nline].split()[2]) nline = get_line(output,"EIGENVALUES") + 1 output = output[nline:nline + nbas] gauss_list = [] # read all lines in output iline = 0 while (iline < nbas): # read output line line = output[iline] # Check for atomic symbol in second position if element(line.split()[2]): atom = gauss_atom() atom.index = int(line.split()[1]) atom.label = line.split()[2] else: print("Something wrong while reading gaussian file") exit() # read basis set for atom iline, atom.basis = read_basis(iline, nbas, output) gauss_list.append(atom) return gauss_list
[docs]def read_boolean(string): """ read boolean from users (yes/no question where yes is default)""" # read capitalized input inp = raw_input(string).upper() # set logical based on input if (inp == "N" or inp == "NO"): logical = False else: logical = True return logical