Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
dimorphite_dl
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
2
Issues
2
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
jdurrant
dimorphite_dl
Commits
44a8a100
Commit
44a8a100
authored
May 12, 2019
by
Jacob Durrant
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update to 1.1.2. Various bug fixes. See CHANGES.md.
parent
a36ea281
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
59 additions
and
22 deletions
+59
-22
CHANGES.md
CHANGES.md
+10
-0
README.md
README.md
+2
-2
dimorphite_dl.py
dimorphite_dl.py
+46
-19
site_substructures.smarts
site_substructures.smarts
+1
-1
No files found.
CHANGES.md
View file @
44a8a100
Changes
=======
1.
1.2
-----
*
Corrected a bug that led Dimorphite-DL to sometimes produce output molecules
that are non-physical.
*
Corrected a bug that gave incorrect protonation states for rare molecules
(aromatic rings with nitrogens that are protonated when electrically
neutral, e.g., pyridin-4(1H)-one).
1.
1.1
-----
*
`run_with_mol_list()`
now preserves non-string properties.
*
`run_with_mol_list()`
throws a warning if it cannot process a molecule,
rather than terminating the program with an error.
...
...
README.md
View file @
44a8a100
Dimorphite-DL 1.1.
1
Dimorphite-DL 1.1.
2
===================
What is it?
...
...
@@ -34,7 +34,7 @@ usage: dimorphite_dl.py [-h] [--min_ph MIN] [--max_ph MAX]
[--smiles_file FILE] [--output_file FILE]
[--label_states] [--test]
Dimorphite 1.1.
1
: Creates models of appropriately protonated small moleucles.
Dimorphite 1.1.
2
: Creates models of appropriately protonated small moleucles.
Apache 2.0 License. Copyright 2018 Jacob D. Durrant.
optional arguments:
...
...
dimorphite_dl.py
View file @
44a8a100
...
...
@@ -137,7 +137,7 @@ class ArgParseFuncs:
:return: A parser object.
"""
parser
=
MyParser
(
description
=
"Dimorphite 1.1.
1
: Creates models of "
+
parser
=
MyParser
(
description
=
"Dimorphite 1.1.
2
: Creates models of "
+
"appropriately protonated small moleucles. "
+
"Apache 2.0 License. Copyright 2018 Jacob D. "
+
"Durrant."
)
...
...
@@ -280,18 +280,30 @@ class UtilFuncs:
if it fails to convert to a Mol Obj
"""
# Check that there are no type errors, ie Nones or non-string
# A non-string type will cause RDKit to hard crash
if
smiles_str
is
None
or
type
(
smiles_str
)
is
not
str
:
return
None
# Check that there are no type errors, ie Nones or non-string
# A non-string type will cause RDKit to hard crash
try
:
# Try to fix azides here. They are just tricky to deal with.
smiles_str
=
smiles_str
.
replace
(
"N=N=N"
,
"N=[N+]=N"
)
smiles_str
=
smiles_str
.
replace
(
"NN#N"
,
"N=[N+]=N"
)
# Now convert to a mol object. Note the trick that is necessary to
# capture RDKit error/warning messages. See
# https://stackoverflow.com/questions/24277488/in-python-how-to-capture-the-stdout-from-a-c-shared-library-to-a-variable
stderr_fileno
=
sys
.
stderr
.
fileno
()
stderr_save
=
os
.
dup
(
stderr_fileno
)
stderr_pipe
=
os
.
pipe
()
os
.
dup2
(
stderr_pipe
[
1
],
stderr_fileno
)
os
.
close
(
stderr_pipe
[
1
])
mol
=
Chem
.
MolFromSmiles
(
smiles_str
)
except
:
return
None
os
.
close
(
stderr_fileno
)
os
.
close
(
stderr_pipe
[
0
])
os
.
dup2
(
stderr_save
,
stderr_fileno
)
os
.
close
(
stderr_save
)
# Check that there are None type errors Chem.MolFromSmiles has sanitize on
# which means if there is even a small error in the SMILES (kekulize,
...
...
@@ -513,6 +525,21 @@ class Protonate(object):
# BOTH states. Let's remove this redundancy.
new_smis
=
list
(
set
(
new_smis
))
# Deprotonating protonated aromatic nitrogen gives [nH-]. Change this
# to [n-]. This is a hack.
new_smis
=
[
s
.
replace
(
"[nH-]"
,
"[n-]"
)
for
s
in
new_smis
]
# Sometimes Dimorphite-DL generates molecules that aren't actually
# possible. Simply convert these to mol objects to eliminate the bad
# ones (that are None).
new_smis
=
[
s
for
s
in
new_smis
if
UtilFuncs
.
convert_smiles_str_to_mol
(
s
)
is
not
None
]
# If there are no smi left, return the input one at the very least.
# All generated forms have apparently been judged
# inappropriate/mal-formed.
if
len
(
new_smis
)
==
0
:
new_smis
=
[
smi
]
# If the user wants to see the target states, add those
# to the ends of each line.
if
self
.
args
[
"label_states"
]:
...
...
@@ -522,6 +549,7 @@ class Protonate(object):
new_lines
=
[
x
+
"
\t
"
+
tag
for
x
in
new_smis
]
self
.
cur_prot_SMI
=
new_lines
return
self
.
next
()
class
ProtSubstructFuncs
:
...
...
@@ -555,7 +583,7 @@ class ProtSubstructFuncs:
sub
[
"smart"
]
=
splits
[
1
]
sub
[
"mol"
]
=
Chem
.
MolFromSmarts
(
sub
[
"smart"
])
#NEED TO DIVIDE THIS BY 3s
#
NEED TO DIVIDE THIS BY 3s
pka_ranges
=
[
splits
[
i
:
i
+
3
]
for
i
in
range
(
2
,
len
(
splits
)
-
1
,
3
)]
prot
=
[]
...
...
@@ -819,8 +847,8 @@ class TestFuncs:
[
"CC(=O)[n+]1ccc(N)cc1"
,
"CC(=O)[n+]1ccc([NH3+])cc1"
,
"CC(=O)[n+]1ccc(N)cc1"
,
"Anilines_primary"
],
[
"CCNc1ccccc1"
,
"CC[NH2+]c1ccccc1"
,
"CCNc1ccccc1"
,
"Anilines_secondary"
],
[
"Cc1ccccc1N(C)C"
,
"Cc1ccccc1[NH+](C)C"
,
"Cc1ccccc1N(C)C"
,
"Anilines_tertiary"
],
[
"BrC1=CC2=C(C=C1)NC=C2"
,
"Brc1ccc2[nH]ccc2c1"
,
"Brc1ccc2[n
H-]ccc2c1"
,
"Indole_pyrrole"
],
[
"
BrC1=CNC=C(C1=O)Br"
,
"O=c1c(Br)c[nH+]cc1Br"
,
"O=c1c(Br)c[nH]cc1Br"
,
"Aromatic_nitrogen_protonated"
],
[
"BrC1=CC2=C(C=C1)NC=C2"
,
"Brc1ccc2[nH]ccc2c1"
,
"Brc1ccc2[n
-]ccc2c1"
,
"Indole_pyrrole"
],
[
"
O=c1cc[nH]cc1"
,
"O=c1cc[nH]cc1"
,
"O=c1cc[n-]cc1"
,
"Aromatic_nitrogen_protonated"
],
[
"C-N=[N+]=[N@H]"
,
"CN=[N+]=N"
,
"CN=[N+]=[N-]"
,
"Azide"
],
[
"BrC(C(O)=O)CBr"
,
"O=C(O)C(Br)CBr"
,
"O=C([O-])C(Br)CBr"
,
"Carboxyl"
],
[
"NC(NN=O)=N"
,
"NC(=[NH2+])NN=O"
,
"N=C(N)NN=O"
,
"AmidineGuanidine1"
],
...
...
@@ -954,27 +982,27 @@ class TestFuncs:
num_states
=
len
(
expected_output
)
if
(
len
(
output
)
!=
num_states
):
msg
=
args
[
"smiles"
]
[
0
]
+
" should have "
+
str
(
num_states
)
+
\
msg
=
args
[
"smiles"
]
+
" should have "
+
str
(
num_states
)
+
\
" states at at pH "
+
str
(
args
[
"min_ph"
])
+
": "
+
str
(
output
)
print
(
msg
)
raise
Exception
(
msg
)
if
(
len
(
set
([
l
[
0
]
for
l
in
output
])
-
set
(
expected_output
))
!=
0
):
msg
=
args
[
"smiles"
]
[
0
]
+
" is not "
+
" AND "
.
join
(
expected_output
)
+
\
msg
=
args
[
"smiles"
]
+
" is not "
+
" AND "
.
join
(
expected_output
)
+
\
" at pH "
+
str
(
args
[
"min_ph"
])
+
" - "
+
str
(
args
[
"max_ph"
])
+
\
"; it is "
+
" AND "
.
join
([
l
[
0
]
for
l
in
output
])
print
(
msg
)
raise
Exception
(
msg
)
if
(
len
(
set
([
l
[
1
]
for
l
in
output
])
-
set
(
labels
))
!=
0
):
msg
=
args
[
"smiles"
]
[
0
]
+
" not labeled as "
+
" AND "
.
join
(
labels
)
+
\
msg
=
args
[
"smiles"
]
+
" not labeled as "
+
" AND "
.
join
(
labels
)
+
\
"; it is "
+
" AND "
.
join
([
l
[
1
]
for
l
in
output
])
print
(
msg
)
raise
Exception
(
msg
)
ph_range
=
sorted
(
list
(
set
([
args
[
"min_ph"
],
args
[
"max_ph"
]])))
ph_range_str
=
"("
+
" - "
.
join
(
"{0:.2f}"
.
format
(
n
)
for
n
in
ph_range
)
+
")"
print
(
"(CORRECT) "
+
ph_range_str
.
ljust
(
10
)
+
" "
+
args
[
"smiles"
]
[
0
]
+
" => "
+
" AND "
.
join
([
l
[
0
]
for
l
in
output
]))
print
(
"(CORRECT) "
+
ph_range_str
.
ljust
(
10
)
+
" "
+
args
[
"smiles"
]
+
" => "
+
" AND "
.
join
([
l
[
0
]
for
l
in
output
]))
def
run
(
**
kwargs
):
"""A helpful, importable function for those who want to call Dimorphite-DL
...
...
@@ -1026,8 +1054,7 @@ def run_with_mol_list(mol_lst, **kwargs):
protonated_smiles_and_props
=
[]
for
m
in
mol_lst
:
props
=
m
.
GetPropsAsDict
()
smiles
=
Chem
.
MolToSmiles
(
m
,
isomericSmiles
=
True
)
kwargs
[
"smiles"
]
=
smiles
kwargs
[
"smiles"
]
=
Chem
.
MolToSmiles
(
m
,
isomericSmiles
=
True
)
protonated_smiles_and_props
.
extend
(
[(
s
.
split
(
"
\t
"
)[
0
],
props
)
for
s
in
main
(
kwargs
)]
)
...
...
site_substructures.smarts
View file @
44a8a100
...
...
@@ -36,4 +36,4 @@ Phosphate_diester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[O+0:5]-[C,c,
Phosphonate_ester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[C,c,N,n,F,Cl,Br,I:5])-[OX2:6]-[H] 5 2.0868 0.4503028610465036
Primary_hydroxyl_amine [C,c:1]-[O:2]-[NH2:3] 2 4.035714285714286 0.8463816543155368
*Indole_pyrrole [c;R:1]1[c;R:2][c;R:3][c;R:4][n;R:5]1[H] 4 14.52875 4.06702491591416
Aromatic_nitrogen_protonated [n:1]-[H] 0 7.17 2.94602395490212
*
Aromatic_nitrogen_protonated [n:1]-[H] 0 7.17 2.94602395490212
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment