main.py :  » Business-Application » PDB2PQR » pdb2pqr-1.6 » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Business Application » PDB2PQR 
PDB2PQR » pdb2pqr 1.6 » main.py
"""
    Driver for PDB2PQR

    This module takes a PDB file as input and performs optimizations
    before yielding a new PDB-style file as output.

    Ported to Python by Todd Dolinsky (todd@ccb.wustl.edu)
    Washington University in St. Louis

    Parsing utilities provided by Nathan A. Baker (baker@biochem.wustl.edu)
    Washington University in St. Louis

  Copyright (c) 2002-2010, Jens Erik Nielsen, University College Dublin; 
  Nathan A. Baker, Washington University in St. Louis; Paul Czodrowski & 
  Gerhard Klebe, University of Marburg

  All rights reserved.

  Redistribution and use in source and binary forms, with or without modification, 
  are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, 
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, 
      this list of conditions and the following disclaimer in the documentation 
      and/or other materials provided with the distribution.
    * Neither the names of University College Dublin, Washington University in 
      St. Louis, or University of Marburg nor the names of its contributors may 
      be used to endorse or promote products derived from this software without 
      specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
  OF THE POSSIBILITY OF SUCH DAMAGE.

"""

__date__  = "5 April 2010"
__author__ = "Todd Dolinsky, Nathan Baker, Jens Nielsen, Paul Czodrowski, Jan Jensen, Samir Unni, Yong Huang"
__version__ = "1.6"


import string
import sys
import getopt
import os
import time
from src import pdb
from src import utilities
from src import structures
from src import routines
from src import protein
from src.pdb import *
from src.utilities import *
from src.structures import *
from src.definitions import *
from src.forcefield import *
from src.routines import *
from src.protein import *
from src.server import *
from src.hydrogens import *
from src.aconf import *
from StringIO import *

def usage(rc):
    """
        Print usage for this script to stdout.

        Parameters
            rc:  Exit status (int)
    """

    str = "\n"
    str = str + "pdb2pqr  (Version %s)\n" % __version__
    str = str + "\n"
    str = str + "This module takes a PDB file as input and performs\n"
    str = str + "optimizations before yielding a new PDB-style file as\n"
    str = str + "output\n"
    str = str + "\n"
    str = str + "Usage: pdb2pqr.py [options] --ff=<forcefield> <path> <output-path>\n"
    str = str + "    Required Arguments:\n"
    str = str + "        <forcefield>  :  The forcefield to use - currently amber,\n"
    str = str + "                         charmm, parse, tyl06, peoepb and swanson\n"
    str = str + "                         are supported.\n"
    str = str + "        <path>        :  The path to the PDB file or an ID\n"
    str = str + "                         to obtain from the PDB archive\n"
    str = str + "        <output-path> :  The desired output name of the PQR file\n"
    str = str + "                         to be generated\n"
    str = str + "    Optional Arguments:\n"
    str = str + "        --nodebump    :  Do not perform the debumping operation\n"
    str = str + "        --noopt       :  Do not perform hydrogen optimization\n"
    str = str + "        --chain       :  Keep the chain ID in the output PQR file\n" 
    str = str + "        --assign-only :  Only assign charges and radii - do not add\n"
    str = str + "                         atoms, debump, or optimize.\n"
    str = str + "        --clean       :  Do no optimization, atom addition, or\n"
    str = str + "                         parameter assignment, just return the\n"
    str = str + "                         original PDB file in aligned format.\n"
    str = str + "        --ffout=<name>:  Instead of using the standard canonical\n"
    str = str + "                         naming scheme for residue and atom names,\n"
    str = str + "                         use the names from the given forcefield.\n"
    str = str + "        --with-ph=<ph>:  Use propka to calculate pKas and apply them\n"
    str = str + "                         to the molecule given the pH value. Actual\n"
    str = str + "                         PropKa results will be output to \n"
    str = str + "                         <output-path>.propka.\n"
    str = str + "        --apbs-input  :  Create a template APBS input file based on\n"
    str = str + "                         the generated PQR file.  Also creates a Python\n"
    str = str + "                         pickle for using these parameters in other programs.\n"
    str = str + "        --ligand=<path>: Calculate the parameters for the ligand in\n"
    str = str + "                         mol2 format at the given path. Pdb2pka must\n"
    str = str + "                         be compiled\n"
    str = str + "        --whitespace  :  Insert whitespaces between atom name and residue\n"
    str = str + "                         name, between x and y, and between y and z\n"
    str = str + "        --typemap :      Create Typemap output\n"
    str = str + "        --neutraln  :    Make the N-terminus of this protein neutral \n"
    str = str + "                         (default is charged)\n"
    str = str + "        --neutralc  :    Make the C-terminus of this protein neutral \n"
    str = str + "                         (default is charged)\n"
    str = str + "        --verbose (-v):  Print information to stdout\n"
    str = str + "        --help    (-h):  Display the usage information\n"

    # Check to see if there are usage statements from the
    # extensions directory

    extensions = getAvailableExtensions()
    if len(extensions) > 0:
        str = str + "\n    Optional Arguments from Extensions Directory:\n"
        for ext in extensions:
            str += extensions[ext].usage()
    
    str = str + "\n"
    sys.stderr.write(str)
    sys.exit(rc)

def printPQRHeader(atomlist, reslist, charge, ff, warnings, options):
    """
        Print the header for the PQR file

        Parameters:
            atomlist: A list of atoms that were unable to have
                      charges assigned (list)
            reslist:  A list of residues with non-integral charges
                      (list)
            charge:   The total charge on the protein (float)
            ff:       The forcefield name (string)
            warnings: A list of warnings generated from routines (list)
            options:  A dictionary of command lnie options (float)
        Returns
            header:   The header for the PQR file (string)
    """
    header = "REMARK   1 PQR file generated by PDB2PQR (Version %s)\n" % __version__
    header = header + "REMARK   1\n"
    header = header + "REMARK   1 Forcefield Used: %s\n" % ff
    if "ffout" in options:
        header = header + "REMARK   1 Naming Scheme Used: %s\n" % options["ffout"]
    header = header + "REMARK   1\n"
    
    if "ph" in options:
        header = header + "REMARK   1 pKas calculated by propka and assigned using pH %.2f\n" % options["ph"]
        header = header + "REMARK   1\n"

    for warning in warnings:
        header = header + "REMARK   5 " + warning 
    header = header + "REMARK   5\n"
    
    if len(atomlist) != 0:
        header += "REMARK   5 WARNING: PDB2PQR was unable to assign charges\n"
        header += "REMARK   5          to the following atoms (omitted below):\n"
        for atom in atomlist:
            header += "REMARK   5              %i %s in %s %i\n" % \
                      (atom.get("serial"), atom.get("name"), \
                       atom.get("residue").get("name"), \
                       atom.get("residue").get("resSeq"))
        header += "REMARK   5 This is usually due to the fact that this residue is not\n"
        header += "REMARK   5 an amino acid or nucleic acid; or, there are no parameters\n" 
        header += "REMARK   5 available for the specific protonation state of this\n" 
        header += "REMARK   5 residue in the selected forcefield.\n"
        header += "REMARK   5\n"
    if len(reslist) != 0:
        header += "REMARK   5 WARNING: Non-integral net charges were found in\n"
        header += "REMARK   5          the following residues:\n"
        for residue in reslist:
            header += "REMARK   5              %s - Residue Charge: %.4f\n" % \
                      (residue, residue.getCharge())
        header += "REMARK   5\n"
    header += "REMARK   6 Total charge on this protein: %.4f e\n" % charge
    header += "REMARK   6\n"

    return header

def runPDB2PQR(pdblist, ff, options):
    """
        Run the PDB2PQR Suite

        Parameters
            pdblist: The list of objects that was read from the PDB file
                     given as input (list)
            ff:      The name of the forcefield (string)
            options: A dictionary of PDB2PQR options, including:
                     verbose: When 1, script will print information to stdout
                              When 0, no detailed information will be printed (int)
                     debump:  When 1, debump heavy atoms (int)
                     opt:     When 1, run hydrogen optimization (int)
                     ph:      The desired ph of the system (float)
                     outname: The name of the desired output file
        Returns
            header:  The PQR file header (string)
            lines:   The PQR file atoms (list)
            missedligandresidues:  A list of ligand residue names whose charges could
                     not be assigned (ligand)
    """
    ph = None
    pkaname = ""
    outname = ""
    outroot = ""
    typemapname = ""
    neutraln = None
    neutralc = None
    lines = []
    Lig = None
    atomcount = 0   # Count the number of ATOM records in pdb

    # userff is CGI-based User Forcefield file object

    if "userff" in options: userff = options["userff"]
    else: userff = None

    if "usernames" in options: usernames = options["usernames"]
    else: usernames = None

    if "verbose" in options: verbose = 1
    else: verbose = 0

    if "opt" in options: optflag = 1
    else: optflag = 0

    if "typemap" in options: typemapflag = 1
    else: typemapflag = 0

    if "chain" in options: chainflag = 1
    else: chainflag = 0

    if "outname" not in options or options["outname"] == None:
        text = "Error: Output name not set!"
        raise ValueError, text
    else:
        outname = options["outname"]
        period = string.rfind(outname,".")
        if period > 0: outroot = outname[0:period]
        else: outroot = outname

    if "ph" in options:
        pka = 1
        ph = options["ph"]
        pkaname = outroot + ".propka"
        if os.path.isfile(pkaname): os.remove(pkaname)
    else: pka = 0

    typemapname = "%s-typemap.html" % outroot

    extmap = options["extensions"]
    
    start = time.time()

    if verbose:
        print "Beginning PDB2PQR...\n"

    myDefinition = Definition()
    if verbose:
        print "Parsed Amino Acid definition file."   

    # Check for the presence of a ligand!  This code is taken from pdb2pka/pka.py

    if "ligand" in options:
        from pdb2pka.ligandclean import ligff
        myProtein, myDefinition, Lig = ligff.initialize(myDefinition, options["ligand"], pdblist, verbose)        
        for atom in myProtein.getAtoms():
            if atom.type == "ATOM": 
                atomcount += 1
    else:
        myProtein = Protein(pdblist, myDefinition)

    if verbose:
        print "Created protein object -"
        print "\tNumber of residues in protein: %s" % myProtein.numResidues()
        print "\tNumber of atoms in protein   : %s" % myProtein.numAtoms()
        
    myRoutines = Routines(myProtein, verbose)

    for residue in myProtein.getResidues():
        multoccupancy = 0
        for atom in residue.getAtoms():
            if atom.altLoc != "":
                multoccupancy = 1
                txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue)
                sys.stderr.write(txt)
        if multoccupancy == 1:
            myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue))
            myRoutines.warnings.append("         at least one of the instances is being ignored.\n")

    if "neutraln" in options: neutraln = 1
    if "neutralc" in options: neutralc = 1

    myRoutines.setTermini(neutraln, neutralc)
    myRoutines.updateBonds()

    if "clean" in options:
        header = ""
        lines = myProtein.printAtoms(myProtein.getAtoms(), chainflag)
      
        # Process the extensions
        for ext in extmap:
            module = extmap[ext]
            call = "module.%s(myRoutines, outroot)" % ext
            eval(call)  
    
        if verbose:
            print "Total time taken: %.2f seconds\n" % (time.time() - start)
        return header, lines

    if not "assign-only" in options:
        # It is OK to process ligands with no ATOM records in the pdb
        if atomcount == 0 and Lig != None:
            pass
        else:
            myRoutines.findMissingHeavy()
        myRoutines.updateSSbridges()

        if "debump" in options:
            myRoutines.debumpProtein()  

        if pka:
            myRoutines.runPROPKA(ph, ff, pkaname)

        myRoutines.addHydrogens()

        myhydRoutines = hydrogenRoutines(myRoutines)

        if "debump" in options:
            myRoutines.debumpProtein()  

        if optflag:
            myhydRoutines.setOptimizeableHydrogens()
            myhydRoutines.initializeFullOptimization()
            myhydRoutines.optimizeHydrogens()
        else:
            myhydRoutines = hydrogenRoutines(myRoutines)
            myhydRoutines.initializeWaterOptimization()
            myhydRoutines.optimizeHydrogens()

        # Special for GLH/ASH, since both conformations were added
        myhydRoutines.cleanup()


    else:  # Special case for HIS if using assign-only
        for residue in myProtein.getResidues():
            if isinstance(residue, HIS):
                myRoutines.applyPatch("HIP", residue)

    myRoutines.setStates()

    myForcefield = Forcefield(ff, myDefinition, userff, usernames)
    hitlist, misslist = myRoutines.applyForcefield(myForcefield)
  
    ligsuccess = 0
    if "ligand" in options:

        # If this is independent, we can assign charges and radii here
 
        for residue in myProtein.getResidues():
            if isinstance(residue, LIG):
                templist = []
                Lig.make_up2date(residue)
                for atom in residue.getAtoms():
                    atom.ffcharge = Lig.ligand_props[atom.name]["charge"]
                    atom.radius = Lig.ligand_props[atom.name]["radius"]
                    if atom in misslist:
                        misslist.pop(misslist.index(atom))
                        templist.append(atom)

                charge = residue.getCharge()
                if abs(charge - int(charge)) > 0.001:
                    # Ligand parameterization failed
                    myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n")
                    myRoutines.warnings.append("         the desired ligand; it has been left out of\n")
                    myRoutines.warnings.append("         the PQR file.\n")
                    myRoutines.warnings.append("\n")
                    
                    # remove the ligand
                    myProtein.residues.remove(residue) 
                    for chain in myProtein.chains:
                        if residue in chain.residues: chain.residues.remove(residue)
                else:
                    ligsuccess = 1
                    # Mark these atoms as hits
                    hitlist = hitlist + templist
    
    # Temporary fix; if ligand was successful, pull all ligands from misslist
    if ligsuccess:
        templist = misslist[:]
        for atom in templist:
            if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue
            misslist.remove(atom)

    # Creat the Typemap
    if typemapflag:
        myProtein.createHTMLTypeMap(myDefinition, typemapname)

    # Grab the protein charge

    reslist, charge = myProtein.getCharge()

    # If we want a different naming scheme, use that

    if "ffout" in options:
        scheme = options["ffout"]
        userff = None # Currently not supported
        if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff)
        else: myNameScheme = myForcefield
        myRoutines.applyNameScheme(myNameScheme)

    header = printPQRHeader(misslist, reslist, charge, ff, myRoutines.getWarnings(), options)
    lines = myProtein.printAtoms(hitlist, chainflag)

    # Determine if any of the atoms in misslist were ligands
    missedligandresidues = []
    for atom in misslist:
        if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue
        if atom.resName not in missedligandresidues:
            missedligandresidues.append(atom.resName)

    # Process the extensions
 
    for ext in extmap:
        module = extmap[ext]
        call = "module.%s(myRoutines, outroot)" % ext
        eval(call)

    if verbose:
        print "Total time taken: %.2f seconds\n" % (time.time() - start)

    return header, lines, missedligandresidues

def getAvailableExtensions(displayflag=0):
    """
        Grab available extensions from the extensions directory

        Parameters
            displayflag: Display the error message if 1
        Returns
            extensions: A map containing the extensions name and
                        the module instance.
    """
    extensions = {}
    dir = "%s" % os.path.dirname(sys.argv[0])
    if dir == "": extdir = "extensions"
    else: extdir = "%s/extensions" % dir
    for filename in os.listdir(extdir):
        if filename.endswith(".py"):
            if filename == "__init__.py": continue
            
            # Test to see if we can find the function

            name = filename[:-3]
            try:
                e = __import__("extensions.%s" % name, globals(), locals(), name)
                if callable(eval("e.%s" % name)) and \
                   callable(eval("e.usage")):
                    extensions[name] = e
            except (AttributeError, ImportError):
                txt = "\nWarning: Missing either \"%s\" or \"usage\" functions in %s!" %\
                      (name, filename)
                txt += "\nThis extension will not be included.\n\n"
                if displayflag:
                    sys.stderr.write(txt)

    return extensions

def mainCommand(argv):
    """
        Main driver for running program from the command line.
    """

    # Append Numeric/Numpy path to sys.path if the user specified a non-standard location during configuration
    sys.argv=argv
    package_path = PACKAGE_PATH
    if package_path != "":
        sys.path.extend(package_path.split(":"))

    shortOptlist = "h,v"
    longOptlist = ["help","verbose","ff=","ffout=","nodebump","noopt","with-ph=","apbs-input","chain","clean","assign-only", "ligand=", "whitespace","typemap","neutraln","neutralc","userff","usernames"]

    extensions = getAvailableExtensions(1)
    longOptlist += extensions.keys()

    try: opts, args = getopt.getopt(sys.argv[1:], shortOptlist, longOptlist)
    except getopt.GetoptError, details:
        sys.stderr.write("GetoptError:  %s\n" % details)
        usage(2)

    if len(args) != 2:
        sys.stderr.write("Incorrect number (%d) of arguments!\n" % len(args))
        sys.stderr.write("argv: %s, args: %s" % (argv, args))
        usage(2)

    options = {"debump":1,"opt":1,"extensions":{}}
 
    outpath = None
    ff = None
    for o,a in opts:
        undashed = o[2:]
        if o in ("-v","--verbose"):
            options["verbose"] = 1
        elif o in ("-h","--help"):
            usage(2)
            sys.exit()
        elif o == "--nodebump":  del options["debump"]
        elif o == "--noopt":    del options["opt"]
        elif o == "--apbs-input": options["input"] = 1
        elif o == "--whitespace": options["whitespace"]  = 1
        elif o == "--typemap": options["typemap"] = 1
        elif o == "--with-ph":
            try:
                ph = float(a)
                options["ph"] = ph
                if ph < 0.0 or ph > 14.0: raise ValueError
            except ValueError:
                text = "%s is not a valid pH!  " % a
                text += "Please choose a pH between 0.0 and 14.0."
                raise ValueError, text
        elif o == "--assign-only":
            del options["debump"]
            del options["opt"]
            options["assign-only"] = 1
        elif o == "--clean":
            del options["debump"]
            del options["opt"]
            options["clean"] = 1
        elif o == "--ff":      
            if "userff" in opts:
                ff = "user-defined"
            else:
                ff = a
            
                # Check to make sure forcefield file is available

                defpath = getFFfile(ff)
                if defpath == "":
                    raise ValueError, "Unable to find parameter files for forcefield %s!" % ff
        elif o == "--neutraln": 
            options["neutraln"]  = 1

        elif o == "--neutralc": 
            options["neutralc"]  = 1

        elif o == "--chain": options["chain"] = 1
        elif o == "--ffout":
            if a.lower() in ["amber","charmm","parse","tyl06","peoepb","swanson"]:
                options["ffout"] = a
            else:
                raise ValueError, "Invalid forcefield naming scheme %s!" % a
        elif o == "--ligand":
            if os.path.isfile(a):
                options["ligand"] = open(a, 'rU')
            else:
                raise ValueError, "Unable to find ligand file %s!\n" % a
        elif undashed in extensions.keys():
            options["extensions"][undashed] = extensions[undashed]

    if "--neutraln" in opts:
        if ff not in ["parse", "PARSE"]:
            raise ValueError, "neutraln option only works with PARSE forcefield!"

    if "--neutralc" in opts:
        if ff not in ["parse", "PARSE"]:
            raise ValueError, "neutralc option only works with PARSE forcefield!"

    if ff == None and "clean" not in options:
        raise ValueError, "Forcefield not specified!"

    text =  "\n--------------------------\n"
    text += "PDB2PQR - a Python-based structural conversion utility\n"
    text += "--------------------------\n"
    text += "Please cite your use of PDB2PQR as:\n"
    text += "  Dolinsky TJ, Nielsen JE, McCammon JA, Baker NA.\n"
    text += "  PDB2PQR: an automated pipeline for the setup, execution,\n"
    text += "  and analysis of Poisson-Boltzmann electrostatics calculations.\n"
    text += "  Nucleic Acids Research 32 W665-W667 (2004).\n\n"
    sys.stdout.write(text)
            
    path = args[0]
    file = getPDBFile(path)
    pdblist, errlist = readPDB(file)
    
    if len(pdblist) == 0 and len(errlist) == 0:
        try: os.remove(path)
        except OSError: pass
        raise ValueError, "Unable to find file %s!\n" % path

    if len(errlist) != 0 and "verbose" in options:
        print "Warning: %s is a non-standard PDB file.\n" % path
        print errlist

    outpath = args[1]
    options["outname"] = outpath

    if "clean" not in options:
        header, lines, missedligands = runPDB2PQR(pdblist, ff, options)
    else:
        header, lines = runPDB2PQR(pdblist, ff, options)
        missedligands = None

    # Print the PQR file
    outfile = open(outpath,"w")
    outfile.write(header)
    # Adding whitespaces if --whitespace is in the options
    for line in lines:
        if "whitespace" in options: 
            if line[0:4] == 'ATOM':
                newline = line[0:16] + ' ' + line[16:38] + ' ' + line[38:46] + ' ' + line[46:]
                outfile.write(newline)
            elif line[0:6] == 'HETATM':
                newline = line[0:16] + ' ' + line[16:38] + ' ' + line[38:46] + ' ' + line[46:]
                outfile.write(newline)
        else: 
            outfile.write(line)
    outfile.close()

    if "input" in options:
        from src import inputgen
        from src import psize
        method = "mg-auto"
        size = psize.Psize()
        size.parseInput(outpath)
        size.runPsize(outpath)
        async = 0 # No async files here!
        input = inputgen.Input(outpath, size, method, async)
        input.printInputFiles()
        input.dumpPickle()


if __name__ == "__main__":
    mainCommand(sys.argv)
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.