Commit 958d45d9 authored by Jade Young's avatar Jade Young

PDB class import math, Atom, Point, and MathFunctions. Also contains classes...

PDB class import math, Atom, Point, and MathFunctions. Also contains classes AromaticRing and Charged
parent 9b5fa572
# this file containd the class PDB
# for binana.py
from math_functions import MathFunctions
from atom import Atom
from point import Point
import math
"""
Class PDB handles PDB filing
"""
class PDB:
# Initialize PDB
def __init__(self):
self.all_atoms = {}
self.non_protein_atoms = {}
self.max_x = -9999.99
self.min_x = 9999.99
self.max_y = -9999.99
self.min_y = 9999.99
self.max_z = -9999.99
self.min_z = 9999.99
self.rotateable_bonds_count = 0
self.functions = MathFunctions()
self.protein_resnames = [
"ALA",
"ARG",
"ASN",
"ASP",
"ASH",
"ASX",
"CYS",
"CYM",
"CYX",
"GLN",
"GLU",
"GLH",
"GLX",
"GLY",
"HIS",
"HID",
"HIE",
"HIP",
"ILE",
"LEU",
"LYS",
"LYN",
"MET",
"PHE",
"PRO",
"SER",
"THR",
"TRP",
"TYR",
"VAL",
]
self.aromatic_rings = []
self.charges = [] # a list of points
# Load PDB file
# Param file_name (string)
# Param min_x (float): minimum x coordinate
# Param max_x (float): maximum x coordinate
# Param min_y (float): minimum y coordinate
# Param max_y (float): maximum y coordinate
# Param min_z (float): minimum z coordinate
# Param max_z (float): maximum z coordinate
def load_PDB(
self,
file_name,
min_x=-9999.99,
max_x=9999.99,
min_y=-9999.99,
max_y=9999.99,
min_z=-9999.99,
max_z=9999.99,
):
autoindex = 1
self.__init__()
# Now load the file into a list
file = open(file_name, "r")
lines = file.readlines()
file.close()
# Keep track of atomname_resid_chain pairs, to make sure redundants
# aren't loaded This basically gets rid of rotamers, I think.
atom_already_loaded = []
for t in range(0, len(lines)):
line = lines[t]
if line[:3] == "END" and line[:7] != "ENDROOT" and line[:9] != "ENDBRANCH":
t = textwrap.wrap(
"WARNING: END or ENDMDL term found in "
+ file_name
+ ". Everything after the first instance of this term will be ignored. \
If any of your PDBQT files have multiple frames/poses, please partition them \
into separate files using vina_split and feed each of the the single-frame files into Binana separately.",
80,
)
print(("\n".join(t) + "\n"))
print(line)
break
if "between atoms" in line and " A " in line:
self.rotateable_bonds_count = self.rotateable_bonds_count + 1
if len(line) >= 7:
if line[0:4] == "ATOM" or line[0:6] == "HETATM":
# Load atom data (coordinates, etc.)
TempAtom = Atom()
TempAtom.read_PDB_line(line)
if (
TempAtom.coordinates.x > min_x
and TempAtom.coordinates.x < max_x
and TempAtom.coordinates.y > min_y
and TempAtom.coordinates.y < max_y
and TempAtom.coordinates.z > min_z
and TempAtom.coordinates.z < max_z
):
if self.max_x < TempAtom.coordinates.x:
self.max_x = TempAtom.coordinates.x
if self.max_y < TempAtom.coordinates.y:
self.max_y = TempAtom.coordinates.y
if self.max_z < TempAtom.coordinates.z:
self.max_z = TempAtom.coordinates.z
if self.min_x > TempAtom.coordinates.x:
self.min_x = TempAtom.coordinates.x
if self.min_y > TempAtom.coordinates.y:
self.min_y = TempAtom.coordinates.y
if self.min_z > TempAtom.coordinates.z:
self.min_z = TempAtom.coordinates.z
# this string uniquely identifies each atom
key = (
TempAtom.atom_name.strip()
+ "_"
+ str(TempAtom.resid)
+ "_"
+ TempAtom.residue.strip()
+ "_"
+ TempAtom.chain.strip()
)
if (
key in atom_already_loaded
and TempAtom.residue.strip() in self.protein_resnames
):
# so this is a protein atom that has already been
# loaded once
self.printout(
'Warning: Duplicate protein atom detected: "'
+ TempAtom.line.strip()
+ '". Not loading this duplicate.'
)
print("")
if (
key not in atom_already_loaded
or not TempAtom.residue.strip() in self.protein_resnames
):
# So either the atom hasn't been loaded, or else
# it's a non-protein atom So note that non-protein
# atoms can have redundant names, but protein
# atoms cannot. This is because protein residues
# often contain rotamers
atom_already_loaded.append(
key
) # So each atom can only be loaded once. No rotamers.
self.all_atoms[
autoindex
] = (
TempAtom
) # So you're actually reindexing everything here.
if TempAtom.residue[-3:] not in self.protein_resnames:
self.non_protein_atoms[autoindex] = TempAtom
autoindex = autoindex + 1
self.check_protein_format()
# Only for the ligand, because bonds can be inferred based on
# atomnames from PDB
self.create_bonds_by_distance()
self.assign_aromatic_rings()
self.assign_charges()
# Print the PDB line
# Param self (PDB)
# Param the_string (string)
def printout(self, the_string):
lines = textwrap.wrap(the_string, 80)
for line in lines:
print(line)
# Write and save PDB line to a file
# Param self (PDB)
# Param file_name (string)
def save_PDB(self, file_name):
f = open(file_name, "w")
towrite = self.save_PDB_String()
if towrite.strip() == "":
# So no PDB is empty, VMD will load them all
towrite = "ATOM 1 X XXX 0.000 0.000 0.000 X"
f.write(towrite)
f.close()
# Returns a new PDB line
# Param self (PDB)
def save_PDB_String(self):
to_output = ""
# Write coordinates of all atoms
for atom_index in self.all_atoms:
to_output = (
to_output
+ self.all_atoms[atom_index].create_PDB_line(atom_index)
+ "\n"
)
return to_output
# Adds a new atom to this PDB
# Param self (PDB)
# Param atom (Atom): new atom being added
def add_new_atom(self, atom):
# first get available index
t = 1
while t in list(self.all_atoms.keys()):
t = t + 1
# now add atom
self.all_atoms[t] = atom
# Assign residue name to atom
# Param self (PDB)
# Param resname (string): residue name
def set_resname(self, resname):
for atom_index in self.all_atoms:
self.all_atoms[atom_index].residue = resname
# Returns a list of the indeces of atoms connected to a given element
# Param self (PDB)
# Param index (integer): index of atom
# Param connected_atom_element (string): element in question
def connected_atoms_of_given_element(self, index, connected_atom_element):
atom = self.all_atoms[index]
connected_atoms = []
for index2 in atom.indecies_of_atoms_connecting:
atom2 = self.all_atoms[index2]
if atom2.element == connected_atom_element:
connected_atoms.append(index2)
return connected_atoms
# Returns a list of the indeces of heavy atoms connected at specified index
# Param self (PDB)
# Param index (integer): index of atom
def connected_heavy_atoms(self, index):
atom = self.all_atoms[index]
connected_atoms = []
for index2 in atom.indecies_of_atoms_connecting:
atom2 = self.all_atoms[index2]
if atom2.element != "H":
connected_atoms.append(index2)
return connected_atoms
# Correct format of the protein
# Param self (PDB)
def check_protein_format(self):
curr_res = ""
first = True
residue = []
for atom_index in self.all_atoms:
atom = self.all_atoms[atom_index]
key = atom.residue + "_" + str(atom.resid) + "_" + atom.chain
if first is True:
curr_res = key
first = False
if key != curr_res:
self.check_protein_format_process_residue(residue, last_key)
residue = []
curr_res = key
residue.append(atom.atom_name.strip())
last_key = key
self.check_protein_format_process_residue(residue, last_key)
# Correct format of the protein and residues
# Param self (PDB)
# Param residue ()
# Param last_key ()
def check_protein_format_process_residue(self, residue, last_key):
temp = last_key.strip().split("_")
resname = temp[0]
real_resname = resname[-3:]
resid = temp[1]
chain = temp[2]
if real_resname in self.protein_resnames: # so it's a protein residue
if "N" not in residue:
self.printout(
'Warning: There is no atom named "N" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine secondary structure. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "C" not in residue:
self.printout(
'Warning: There is no atom named "C" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine secondary structure. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CA" not in residue:
self.printout(
'Warning: There is no atom named "CA" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine secondary structure. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "GLU" or real_resname == "GLH" or real_resname == "GLX":
if "OE1" not in residue:
self.printout(
'Warning: There is no atom named "OE1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "OE2" not in residue:
self.printout(
'Warning: There is no atom named "OE2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "ASP" or real_resname == "ASH" or real_resname == "ASX":
if "OD1" not in residue:
self.printout(
'Warning: There is no atom named "OD1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "OD2" not in residue:
self.printout(
'Warning: There is no atom named "OD2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "LYS" or real_resname == "LYN":
if "NZ" not in residue:
self.printout(
'Warning: There is no atom named "NZ" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "ARG":
if "NH1" not in residue:
self.printout(
'Warning: There is no atom named "NH1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "NH2" not in residue:
self.printout(
'Warning: There is no atom named "NH2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if (
real_resname == "HIS"
or real_resname == "HID"
or real_resname == "HIE"
or real_resname == "HIP"
):
if "NE2" not in residue:
self.printout(
'Warning: There is no atom named "NE2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "ND1" not in residue:
self.printout(
'Warning: There is no atom named "ND1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "PHE":
if "CG" not in residue:
self.printout(
'Warning: There is no atom named "CG" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD1" not in residue:
self.printout(
'Warning: There is no atom named "CD1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD2" not in residue:
self.printout(
'Warning: There is no atom named "CD2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE1" not in residue:
self.printout(
'Warning: There is no atom named "CE1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE2" not in residue:
self.printout(
'Warning: There is no atom named "CE2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CZ" not in residue:
self.printout(
'Warning: There is no atom named "CZ" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "TYR":
if "CG" not in residue:
self.printout(
'Warning: There is no atom named "CG" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD1" not in residue:
self.printout(
'Warning: There is no atom named "CD1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD2" not in residue:
self.printout(
'Warning: There is no atom named "CD2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE1" not in residue:
self.printout(
'Warning: There is no atom named "CE1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE2" not in residue:
self.printout(
'Warning: There is no atom named "CE2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CZ" not in residue:
self.printout(
'Warning: There is no atom named "CZ" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if real_resname == "TRP":
if "CG" not in residue:
self.printout(
'Warning: There is no atom named "CG" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD1" not in residue:
self.printout(
'Warning: There is no atom named "CD1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD2" not in residue:
self.printout(
'Warning: There is no atom named "CD2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "NE1" not in residue:
self.printout(
'Warning: There is no atom named "NE1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE2" not in residue:
self.printout(
'Warning: There is no atom named "CE2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE3" not in residue:
self.printout(
'Warning: There is no atom named "CE3" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CZ2" not in residue:
self.printout(
'Warning: There is no atom named "CZ2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CZ3" not in residue:
self.printout(
'Warning: There is no atom named "CZ3" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CH2" not in residue:
self.printout(
'Warning: There is no atom named "CH2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if (
real_resname == "HIS"
or real_resname == "HID"
or real_resname == "HIE"
or real_resname == "HIP"
):
if "CG" not in residue:
self.printout(
'Warning: There is no atom named "CG" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "ND1" not in residue:
self.printout(
'Warning: There is no atom named "ND1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CD2" not in residue:
self.printout(
'Warning: There is no atom named "CD2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "CE1" not in residue:
self.printout(
'Warning: There is no atom named "CE1" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
if "NE2" not in residue:
self.printout(
'Warning: There is no atom named "NE2" in the protein residue '
+ last_key
+ ". Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore."
)
print("")
# Functions to determine the bond connectivity based on distance
# ==============================================================
# Define bonds between atoms using distance on the grid
# Param self (PDB)
def create_bonds_by_distance(self):
for AtomIndex1 in self.non_protein_atoms:
atom1 = self.non_protein_atoms[AtomIndex1]
if atom1.residue[-3:] not in self.protein_resnames:
# so it's not a protein residue
for AtomIndex2 in self.non_protein_atoms:
if AtomIndex1 != AtomIndex2:
atom2 = self.non_protein_atoms[AtomIndex2]
if not atom2.residue[-3:] in self.protein_resnames:
# so it's not a protein residue
dist = self.functions.distance(
atom1.coordinates, atom2.coordinates
)
if (
dist
< self.bond_length(atom1.element, atom2.element) * 1.2
):
atom1.add_neighbor_atom_index(AtomIndex2)
atom2.add_neighbor_atom_index(AtomIndex1)
# Retuns bond length between two elements
# Param self (PDB)
# Param element1 (string): symbol of first element
# Param element2 (string): symbol of second element
def bond_length(self, element1, element2):
"""Bond lengths taken from Handbook of Chemistry and Physics. The information provided there was very specific,
so I tried to pick representative examples and used the bond lengths from those. Sitautions could arise where these
lengths would be incorrect, probably slight errors (<0.06) in the hundreds."""