Commit 82719001 authored by jspiegel's avatar jspiegel

update gypsum-dl version 1.1.3

parent 471bb7b8
......@@ -12,6 +12,7 @@ WIP (4.0.1)
`$PATH/autogrow4/autogrow/operators/convert_files/conversion_to_3d.py` to
remove `None` objects which failed to be imported into RDKit. This prevents
the `None` objects from causing errors in `def convert_single_sdf_to_pdb`.
* Updated Gypsum-DL to version 1.1.3.
* Add `raise Exception` in
`$PATH/autogrow4/autogrow/operators/convert_files/conversion_to_3d.py` with
debugging instructions if Gypsum-Dl produced are no SDF files. `raise
......
Changes
=======
1.1.3
-----
* Gypsum-DL used to crash when provided with certain mal-formed SMILES
strings. It now just skips those SMILES and warns the user that they are
poorly formed. See Start.py:303 and MyMol.py:747.
* Durrant-lab filters now remove molecules containing metal and boron atoms.
* Some Durrant-lab filters are now applied immediately after desalting. We
discovered that certain substructures cause Gypsum-DL to delay during the
add-hydrogens step, specifically when Gypsum-DL generates the 3D structures
required to rank conformers. Removing these compounds before adding
hydrogens avoids the problem.
* Improved code formatting.
* Made minor spelling corrections to the output.
1.1.2
-----
......
File mode changed from 100755 to 100644
# Gypsum-DL 1.1.2
# Gypsum-DL 1.1.3
Gypsum-DL is a free, open-source program for preparing 3D small-molecule
models. Beyond simply assigning atomic coordinates, Gypsum-DL accounts for
......
......@@ -26,6 +26,7 @@ try:
except:
Utils.exception("You need to install rdkit and its dependencies.")
def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
"""Pick molecules with low energies. If necessary, the definition also
makes a conformer without minimization (so not too computationally
......@@ -62,7 +63,7 @@ def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
data = []
for i, mol in enumerate(mols_3d):
mol.make_first_3d_conf_no_min() # Make sure at least one conformer
# exists.
# exists.
if len(mol.conformers) > 0:
energy = mol.conformers[0].energy
data.append((energy, i))
......@@ -78,6 +79,7 @@ def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
# Return those molecules.
return new_mols_list
def remove_highly_charged_molecules(mol_lst):
"""Remove molecules that are highly charged.
......@@ -101,15 +103,21 @@ def remove_highly_charged_molecules(mol_lst):
new_mol_lst.append(mol_lst[i])
else:
Utils.log(
"\tWARNING: Discarding highly charged form: " +
mol_lst[i].smiles() + "."
"\tWARNING: Discarding highly charged form: "
+ mol_lst[i].smiles()
+ "."
)
return new_mol_lst
def bst_for_each_contnr_no_opt(contnrs, mol_lst, max_variants_per_compound,
thoroughness,
crry_ovr_frm_lst_step_if_no_fnd=True):
def bst_for_each_contnr_no_opt(
contnrs,
mol_lst,
max_variants_per_compound,
thoroughness,
crry_ovr_frm_lst_step_if_no_fnd=True,
):
"""Keep only the top few compound variants in each container, to prevent a
combinatorial explosion. This is run periodically on the growing
containers to keep them in check.
......@@ -159,9 +167,7 @@ def bst_for_each_contnr_no_opt(contnrs, mol_lst, max_variants_per_compound,
# Pick the lowest-energy molecules. Note that this creates a
# conformation if necessary, but it is not minimized and so is not
# computationally expensive.
mols = pick_lowest_enrgy_mols(
mols, max_variants_per_compound, thoroughness
)
mols = pick_lowest_enrgy_mols(mols, max_variants_per_compound, thoroughness)
if len(mols) > 0:
# Now remove all previously determined mols for this
......@@ -181,20 +187,25 @@ def bst_for_each_contnr_no_opt(contnrs, mol_lst, max_variants_per_compound,
if crry_ovr_frm_lst_step_if_no_fnd:
# Just use previous ones.
Utils.log(
"\tWARNING: Unable to find low-energy conformations: " +
contnr.orig_smi_deslt + " (" +
contnr.name + "). Keeping original " +
"conformers."
"\tWARNING: Unable to find low-energy conformations: "
+ contnr.orig_smi_deslt
+ " ("
+ contnr.name
+ "). Keeping original "
+ "conformers."
)
else:
# Discard the conformation.
Utils.log(
"\tWARNING: Unable to find low-energy conformations: " +
contnr.orig_smi_deslt + " (" +
contnr.name + "). Discarding conformer."
"\tWARNING: Unable to find low-energy conformations: "
+ contnr.orig_smi_deslt
+ " ("
+ contnr.name
+ "). Discarding conformer."
)
contnr.mols = []
def uniq_mols_in_list(mol_lst):
# You need to make new molecules to get it to work.
# new_smiles = [m.smiles() for m in self.mols]
......
......@@ -32,6 +32,7 @@ try:
except:
Utils.exception("You need to install rdkit and its dependencies.")
class MolContainer:
"""The molecucle container class. It stores all the molecules (tautomers,
etc.) associated with a single input SMILES entry."""
......@@ -54,8 +55,8 @@ class MolContainer:
# level)
self.contnr_idx = index
self.contnr_idx_orig = index # Because if some circumstances (mpi),
# might be reset. But good to have
# original for filename output.
# might be reset. But good to have
# original for filename output.
self.orig_smi = smiles
self.orig_smi_deslt = smiles # initial assumption
self.mols = []
......@@ -69,7 +70,9 @@ class MolContainer:
self.orig_smi_canonical = self.mol_orig_frm_inp_smi.smiles()
# Get the number of nonaromatic rings
self.num_nonaro_rngs = len(self.mol_orig_frm_inp_smi.get_idxs_of_nonaro_rng_atms())
self.num_nonaro_rngs = len(
self.mol_orig_frm_inp_smi.get_idxs_of_nonaro_rng_atms()
)
# Get the number of chiral centers, assigned
self.num_specif_chiral_cntrs = len(
......@@ -192,7 +195,9 @@ class MolContainer:
self.mol_orig_frm_inp_smi = MyMol.MyMol(self.orig_smi, self.name)
self.frgs = ""
self.orig_smi_canonical = self.mol_orig_frm_inp_smi.smiles()
self.num_nonaro_rngs = len(self.mol_orig_frm_inp_smi.get_idxs_of_nonaro_rng_atms())
self.num_nonaro_rngs = len(
self.mol_orig_frm_inp_smi.get_idxs_of_nonaro_rng_atms()
)
self.num_specif_chiral_cntrs = len(
self.mol_orig_frm_inp_smi.chiral_cntrs_only_asignd()
)
......@@ -289,7 +294,7 @@ class MolContainer:
:type new_idx: int
"""
if type(new_idx)!= int:
if type(new_idx) != int:
Utils.exception("New idx value must be an int.")
self.contnr_idx = new_idx
self.mol_orig_frm_inp_smi.contnr_idx = self.contnr_idx
......@@ -17,9 +17,12 @@ import __future__
import rdkit
from rdkit import Chem
#Disable the unnecessary RDKit warnings
# Disable the unnecessary RDKit warnings
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')
RDLogger.DisableLog("rdApp.*")
def check_sanitization(mol):
"""
......@@ -43,7 +46,11 @@ def check_sanitization(mol):
# easiest nearly everything should get through
try:
sanitize_string = Chem.SanitizeMol(mol, sanitizeOps = rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, catchErrors = True)
sanitize_string = Chem.SanitizeMol(
mol,
sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
catchErrors=True,
)
except:
return None
......@@ -52,20 +59,35 @@ def check_sanitization(mol):
else:
# try to fix the nitrogen (common problem that 4 bonded Nitrogens improperly lose their + charges)
mol = Nitrogen_charge_adjustment(mol)
Chem.SanitizeMol(mol, sanitizeOps = rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, catchErrors = True)
sanitize_string = Chem.SanitizeMol(mol, sanitizeOps = rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, catchErrors = True)
Chem.SanitizeMol(
mol,
sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
catchErrors=True,
)
sanitize_string = Chem.SanitizeMol(
mol,
sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
catchErrors=True,
)
if sanitize_string.name == "SANITIZE_NONE":
return mol
# run a sanitation Filter 1 more time incase something slipped through
# ie. if there are any forms of sanition which fail ie. KEKULIZE then return None
sanitize_string = Chem.SanitizeMol(mol, sanitizeOps = rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, catchErrors = True)
sanitize_string = Chem.SanitizeMol(
mol,
sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
catchErrors=True,
)
if sanitize_string.name != "SANITIZE_NONE":
return None
else:
return mol
#
def handleHs(mol, protanate_step):
"""
Given a rdkit.Chem.rdchem.Mol this script will sanitize the molecule, remove all non-explicit H's
......@@ -89,7 +111,7 @@ def handleHs(mol, protanate_step):
mol = try_deprotanation(mol)
if mol is None:
# mol failed deprotanation
# mol failed deprotanation
return None
if protanate_step is True:
......@@ -100,8 +122,11 @@ def handleHs(mol, protanate_step):
return None
return mol
#
def try_deprotanation(sanitized_mol):
"""
Given an already sanitize rdkit.Chem.rdchem.Mol object, we will try to deprotanate the mol of all non-explicit
......@@ -114,16 +139,18 @@ def try_deprotanation(sanitized_mol):
it returns None if H's can't be added or if sanitation fails
"""
try:
mol = Chem.RemoveHs(sanitized_mol, sanitize = False)
mol = Chem.RemoveHs(sanitized_mol, sanitize=False)
except:
return None
mol_sanitized = check_sanitization(mol)
return mol_sanitized
#
def try_reprotanation(sanitized_deprotanated_mol):
"""
Given an already sanitize and deprotanate rdkit.Chem.rdchem.Mol object, we will try to reprotanate the mol with
......@@ -142,13 +169,15 @@ def try_reprotanation(sanitized_deprotanated_mol):
except:
mol = None
mol_sanitized = check_sanitization(mol)
return mol_sanitized
else:
return None
#
def remove_atoms(mol, list_of_idx_to_remove):
"""
This function removes atoms from an rdkit mol based on
......@@ -170,7 +199,7 @@ def remove_atoms(mol, list_of_idx_to_remove):
try:
atomsToRemove = list_of_idx_to_remove
atomsToRemove.sort(reverse = True)
atomsToRemove.sort(reverse=True)
except:
return None
......@@ -184,8 +213,11 @@ def remove_atoms(mol, list_of_idx_to_remove):
return new_mol
except:
return None
#
def Nitrogen_charge_adjustment(mol):
"""
When importing ligands with sanitation turned off, one can successfully import
......@@ -223,13 +255,16 @@ def Nitrogen_charge_adjustment(mol):
# GetBondTypeAsDouble prints out 1 for single, 2.0 for double,
# 3.0 for triple, 1.5 for AROMATIC but if AROMATIC WE WILL SKIP THIS ATOM
num_bond_sums = sum(bonds)
# Check if the octet is filled
if num_bond_sums == 4.0:
atom.SetFormalCharge(+1)
return mol
#
def check_for_unassigned_atom(mol):
"""
Check there isn't a missing atom group ie. '*'
......@@ -244,11 +279,14 @@ def check_for_unassigned_atom(mol):
return None
for atom in atoms:
if atom.GetAtomicNum()==0:
if atom.GetAtomicNum() == 0:
return None
return mol
#
def handle_frag_check(mol):
"""
This will take a RDKit Mol object. It will check if it is fragmented.
......@@ -259,12 +297,11 @@ def handle_frag_check(mol):
return None
try:
frags = Chem.GetMolFrags(mol, asMols = True, sanitizeFrags = False)
frags = Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=False)
except:
return None
if len(frags)==1:
if len(frags) == 1:
return mol
else:
frag_info_list = []
......@@ -283,8 +320,10 @@ def handle_frag_check(mol):
if len(frag_info_list) == 0:
return None
# Get the largest Fragment
frag_info_list.sort(key = lambda x: float(x[-1]),reverse = True)
frag_info_list.sort(key=lambda x: float(x[-1]), reverse=True)
largest_frag_idx = frag_info_list[0][0]
largest_frag = frags[largest_frag_idx]
return largest_frag
#
......@@ -744,9 +744,16 @@ class MyConformer:
# Calculate some energies, other housekeeping.
if self.mol is not False:
ff = AllChem.UFFGetMoleculeForceField(self.mol)
try:
ff = AllChem.UFFGetMoleculeForceField(self.mol)
self.energy = ff.CalcEnergy()
except:
Utils.log("Warning: Could not calculate energy for molecule " +
Chem.MolToSmiles(self.mol))
# Example of smiles that cause problem here without try...catch:
# NC1=NC2=C(N[C@@H]3[C@H](N2)O[C@@H](COP(O)(O)=O)C2=C3S[Mo](S)(=O)(=O)S2)C(=O)N1
self.energy = 9999
self.minimized = False
self.energy = ff.CalcEnergy()
self.ids_hvy_atms = [a.GetIdx() for a in self.mol.GetAtoms()
if a.GetAtomicNum() != 1]
......@@ -776,9 +783,14 @@ class MyConformer:
return
# Perform the minimization, and save the energy.
ff = AllChem.UFFGetMoleculeForceField(self.mol)
ff.Minimize()
self.energy = ff.CalcEnergy()
try:
ff = AllChem.UFFGetMoleculeForceField(self.mol)
ff.Minimize()
self.energy = ff.CalcEnergy()
except:
Utils.log("Warning: Could not calculate energy for molecule " +
Chem.MolToSmiles(self.mol))
self.energy = 9999
self.minimized = True
def align_to_me(self, other_conf):
......
......@@ -24,6 +24,7 @@ try:
except:
Utils.exception("You need to install rdkit and its dependencies.")
def load_smiles_file(filename):
"""Loads a smiles file.
......@@ -51,9 +52,13 @@ def load_smiles_file(filename):
# Handle unnamed ligands.
if name == "":
name = "untitled_line_{}".format(line_counter + 1)
Utils.log(("\tUntitled ligand on line {}. Naming that ligand " +
"{}. All associated files will be refered to with " +
"this name.").format(line_counter + 1, name))
Utils.log(
(
"\tUntitled ligand on line {}. Naming that ligand "
+ "{}. All associated files will be referred to with "
+ "this name."
).format(line_counter + 1, name)
)
# Handle duplicate ligands in same list.
if name in name_list:
......@@ -62,26 +67,43 @@ def load_smiles_file(filename):
duplicate_names[name] = duplicate_names[name] + 1
new_name = "{}_copy_{}".format(name, duplicate_names[name])
Utils.log("\nMultiple entries with the ligand name: {}".format(name))
Utils.log("\tThe veresion of the ligand on line {} will be retitled {}".format(line_counter,new_name))
Utils.log("\tAll associated files will be refered to with this name")
Utils.log(
"\nMultiple entries with the ligand name: {}".format(name)
)
Utils.log(
"\tThe version of the ligand on line {} will be retitled {}".format(
line_counter, new_name
)
)
Utils.log(
"\tAll associated files will be referred to with this name"
)
name = new_name
else:
duplicate_names[name] = 2
new_name = "{}_copy_{}".format(name, duplicate_names[name])
Utils.log("\nMultiple entries with the ligand name: {}".format(name))
Utils.log("\tThe veresion of the ligand on line {} will be retitled {}".format(line_counter,new_name))
Utils.log("\tAll associated files will be refered to with this name")
Utils.log(
"\nMultiple entries with the ligand name: {}".format(name)
)
Utils.log(
"\tThe version of the ligand on line {} will be retitled {}".format(
line_counter, new_name
)
)
Utils.log(
"\tAll associated files will be referred to with this name"
)
name = new_name
# Save the data for this line and advance.
name_list.append(name)
line_counter +=1
line_counter += 1
data.append((smiles, name, {}))
# Return the data.
return data
def load_sdf_file(filename):
"""Loads an sdf file.
......@@ -100,9 +122,7 @@ def load_sdf_file(filename):
for mol in suppl:
# Convert mols to smiles. That's what the rest of the program is
# designed to deal with.
smiles = Chem.MolToSmiles(
mol, isomericSmiles=True, canonical=True
)
smiles = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
try:
name = mol.GetProp("_Name")
......@@ -111,10 +131,14 @@ def load_sdf_file(filename):
# Handle unnamed ligands
if name == "":
Utils.log("\tUntitled ligand for the {} molecule in the input SDF".format(mol_obj_counter))
name = "untitled_{}_molnum_{}".format(missing_name_counter,mol_obj_counter)
Utils.log(
"\tUntitled ligand for the {} molecule in the input SDF".format(
mol_obj_counter
)
)
name = "untitled_{}_molnum_{}".format(missing_name_counter, mol_obj_counter)
Utils.log("\tNaming that ligand {}".format(name))
Utils.log("\tAll associated files will be refered to with this name")
Utils.log("\tAll associated files will be referred to with this name")
missing_name_counter += 1
# Handle duplicate ligands in same list.
......@@ -124,16 +148,32 @@ def load_sdf_file(filename):
duplicate_names[name] = duplicate_names[name] + 1
new_name = "{}_copy_{}".format(name, duplicate_names[name])
Utils.log("\nMultiple entries with the ligand name: {}".format(name))
Utils.log("\tThe veresion of the ligand for the {} molecule in the SDF file will be retitled {}".format(mol_obj_counter,new_name))
Utils.log("\tAll associated files will be refered to with this name")
Utils.log(
"\nMultiple entries with the ligand name: {}".format(name)
)
Utils.log(
"\tThe version of the ligand for the {} molecule in the SDF file will be retitled {}".format(
mol_obj_counter, new_name
)
)
Utils.log(
"\tAll associated files will be referred to with this name"
)
name = new_name
else:
duplicate_names[name] = 2
new_name = "{}_copy_{}".format(name, duplicate_names[name])
Utils.log("\nMultiple entries with the ligand name: {}".format(name))
Utils.log("\tThe veresion of the ligand for the {} molecule in the SDF file will be retitled {}".format(mol_obj_counter,new_name))
Utils.log("\tAll associated files will be refered to with this name")
Utils.log(
"\nMultiple entries with the ligand name: {}".format(name)
)
Utils.log(
"\tThe version of the ligand for the {} molecule in the SDF file will be retitled {}".format(
mol_obj_counter, new_name
)
)
Utils.log(
"\tAll associated files will be referred to with this name"
)
name = new_name
mol_obj_counter += 1
......
......@@ -24,6 +24,7 @@ from gypsum_dl.Steps.IO.SaveToPDB import convert_sdfs_to_PDBs
from gypsum_dl.Steps.IO.Web2DOutput import web_2d_output
from gypsum_dl import Utils
def proccess_output(contnrs, params):
"""Proccess the molecular models in preparation for writing them to the
disk."""
......
......@@ -26,10 +26,12 @@ from os.path import basename
from gypsum_dl import Utils
import rdkit
import rdkit.Chem as Chem
#Disable the unnecessary RDKit warnings
rdkit.RDLogger.DisableLog('rdApp.*')
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'gypsum_dl'))
# Disable the unnecessary RDKit warnings
rdkit.RDLogger.DisableLog("rdApp.*")
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), "gypsum_dl"))
def convert_sdfs_to_PDBs(contnrs, output_folder):
"""This will convert every conformer into a PDB file, which is saved in
......@@ -55,7 +57,7 @@ def convert_sdfs_to_PDBs(contnrs, output_folder):
output_folder + os.sep,
Utils.slug(name),
contnr.contnr_idx_orig + 1,
i + 1
i + 1,
)
# Get the conformers into the rdkit_mol object.
......@@ -65,13 +67,14 @@ def convert_sdfs_to_PDBs(contnrs, output_folder):
continue
else:
# Write conformers to a PDB file.
Chem.MolToPDBFile(mol, pdb_file, flavor = 32)
Chem.MolToPDBFile(mol, pdb_file, flavor=32)
# Add header to PDB file with original SMILES and final SMILES
printout = "REMARK Original SMILES string: {}\nREMARK Final SMILES string: {}\n".format(m.orig_smi,m.standardize_smiles())
printout = "REMARK Original SMILES string: {}\nREMARK Final SMILES string: {}\n".format(
m.orig_smi, m.standardize_smiles()
)
with open(pdb_file) as f:
printout = printout + f.read()
with open(pdb_file,'w') as f:
with open(pdb_file, "w") as f:
f.write(printout)
printout = ""
\ No newline at end of file
......@@ -26,6 +26,7 @@ try:
except:
Utils.exception("You need to install rdkit and its dependencies.")
def save_to_sdf(contnrs, params, separate_output_files, output_folder):
"""Saves the 3D models to the disk as an SDF file.
......@@ -72,7 +73,7 @@ def save_to_sdf(contnrs, params, separate_output_files, output_folder):
sdf_file = "{}{}__input{}.sdf".format(
output_folder + os.sep,
Utils.slug(contnr.name),
contnr.contnr_idx_orig + 1
contnr.contnr_idx_orig + 1,
)
w = Chem.SDWriter(sdf_file)
# w = Chem.SDWriter(output_folder + os.sep + "output." + str(i + 1) + ".sdf")
......
......@@ -30,6 +30,7 @@ try:
except:
Utils.exception("You need to install rdkit and its dependencies.")
def web_2d_output(contnrs, output_folder):
"""Saves pictures of the models to an HTML file on disk. It can be viewed in
a browser. Then opens a browser automatically to view them. This is mostly
......@@ -39,7 +40,7 @@ def web_2d_output(contnrs, output_folder):
# Let's not parallelize it for now. This will rarely be used.
html_file = output_folder + os.sep + "gypsum_dl_success.html"
f = open(html_file, 'w')
f = open(html_file, "w")
for contnr in contnrs:
Utils.log("\t" + contnr.orig_smi)