Hi Christian,
I had a look at my old code and the outcome is actually intended.
Here's some 'historic background' and explanation regarding the code:
-- RedScaff means that all linking atoms are deleted and the rings
are connected using the ring atoms where trhe linker chain was.
-- Scaff leaves the linker chains at their place so you get what you
(hopefully??) wanted to get.
Now, the snipped I had posted quick and dirty had some additional
lines that force each atom to be a carbon (which is not what you wanted,
as I guess) That code simply has to be commented out to arrive at the
molecular core as you wanted to have it.
As the script was rather a dirty hack intended to inspire RDKiters,
I now cleaned up at least the raw 'code-clups' and added some doku.
The attached script should do what you expected. If not, then feel free to
adapt it to fit your needs.
@Greg:
Now that some people are interested in the code:
May this script find a save harbour in the Contrib folder?
All the best!
Markus
Christian de Bouillé wrote:
Hi Greg
I have tested the script from
Markus Kossner
Starting string
Oc1ccccc1N1CCN(C(=S)NCc2cc3c(cc2)OCO3)CC1
Scaffold with mode Scaff
c1cc(ccc1)N1CCN(c2cc3OCOc3cc2)CC1
This version with Scaff keeps
the heteroatoms and aromaticity
But the chain between the phenyl and the piperidine
is not kept
Is someone who can help me
There is similar script PyMCS
10 years old
May be someone has a copy
Best Regards
Christian
#!/usr/bin/env python2.6
# encoding: utf-8
import os,sys
from Chem import AllChem as Chem
def flatten(x):
"""flatten(sequence) -> list
Returns a single, flat list which contains all elements retrieved
from the sequence and all nested sub-sequences (iterables).
Examples:
>>> [1, 2, [3,4], (5,6)]
[1, 2, [3, 4], (5, 6)]
>>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
[1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
result = []
for el in x:
if hasattr(el, "__iter__") and not isinstance(el, basestring):
result.extend(flatten(el))
else:
result.append(el)
return result
def GetFrame(mol,mode='RedScaff'):
'''return a ganeric molecule defining the reduced scaffold of the
molecule.
mode can be 'RedScaff' and 'Scaff'. The second mode will turn every atom
to a carbon and every bond to a single bond!'''
ring=mol.GetRingInfo()
RingAtoms= flatten(ring.AtomRings())
NonRingAtoms=[ atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx()
not in RingAtoms ]
RingNeighbors=[]
Paths=[]
for NonRingAtom in NonRingAtoms:
for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
if neighbor.GetIdx() in RingAtoms:
RingNeighbors.append(NonRingAtom)
Paths.append([neighbor.GetIdx(),NonRingAtom]) #The ring
Atoms
having a non ring Nieghbor will be the start of a walk
break
PosLinkers=[x for x in NonRingAtoms if x not in RingNeighbors] #Only
these
Atoms are Possible Linkers of two rings
Framework=[ x for x in RingAtoms ]
Linkers=[]
while len(Paths)>0:
NewPaths=[]
for P in Paths:
if P==None:
print 'ooh, there is still a bug somewere '
else:
for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
if neighbor.GetIdx() not in P:
if neighbor.GetIdx() in NonRingAtoms:
n=P[:]
n.append(neighbor.GetIdx())
NewPaths.append(n[:])
elif neighbor.GetIdx() in RingAtoms:
n=P[:]
n.append(neighbor.GetIdx())
Linkers.append(n)
Framework=Framework+P[:]
Paths=NewPaths[:]
#print 'Linkers:',Linkers
#print 'RingAtoms:',RingAtoms
if mode=='Scaff':
Framework=list(set(Framework))
todel=[]
NonRingAtoms.sort(reverse=True)
em=Chem.EditableMol(mol)
BondsToAdd=[ sorted([i[0],i[-1]]) for i in Linkers ]
mem=[]
for i in BondsToAdd:
if i not in mem:
em.AddBond(i[0],i[1],Chem.BondType.SINGLE)
mem.append(i)
for i in NonRingAtoms:
todel.append(i)
for i in todel:
em.RemoveAtom(i)
m=em.GetMol()
return m
if mode=='RedScaff':
Framework=list(set(Framework))
todel=[]
NonRingAtoms.sort(reverse=True)
for i in NonRingAtoms:
if i != None:
if i not in Framework:
todel.append(i)
em=Chem.EditableMol(mol)
for i in todel:
em.RemoveAtom(i)
m=em.GetMol()
#===================================#
#Now do the flattening of atoms and bonds!
#Any heavy atom will become a carbon and any bond will become a
single
bond!! #
#===================================#
for atom in m.GetAtoms(): #
atom.SetAtomicNum(6) #
atom.SetFormalCharge(0) #
for bond in m.GetBonds(): #
bond.SetBondType(Chem.BondType.SINGLE) #
Chem.SanitizeMol(m) #
#===================================#
return m
if len(sys.argv) < 2:
print "No input file provided: Frames.py <file-to-process.sdf>"
sys.exit(1)
suppl=Chem.SDMolSupplier(sys.argv[1])
FrameDict={}
for mol in suppl:
if mol == 'None': continue
try:
m=GetFrame(mol,mode='RedScaff')
if FrameDict.has_key(Chem.MolToSmiles(m)):
FrameDict[Chem.MolToSmiles(m)].append(mol)
else:
FrameDict[Chem.MolToSmiles(m)]=[mol,]
except:
print '--------------------------------'
print 'could not process the molecule with the following name:'
print mol.GetProp('_Name')
counter=0
w=open('frames.smi','w')
d=Chem.SDWriter('frames.sdf')
for key,item in FrameDict.items():
counter+=1
#d=Chem.SDWriter(str(counter)+'.sdf')
for i in item:
i.SetProp('Scaffold',key)
i.SetProp('Cluster',str(counter))
d.write(i)
print key,len(item)
w.write(key+'\n')
w.close
print 'number of Frames found: %d' %(counter)
------------------------------------------------------------------------------
Protect Your Site and Customers from Malware Attacks
Learn about various malware tactics and how to avoid them. Understand
malware threats, the impact they can have on your business, and how you
can protect your company and customers by using code signing.
http://p.sf.net/sfu/oracle-sfdevnl
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss
#!/usr/bin/python
# encoding: utf-8
# Jan 2011 (markus kossner) Cleaned up the code, added some documentation
# somwhere around Aug 2008 (markus kossner) created
#
# This script extracts the molecular framework for a database of molecules.
# You can use two modes (hard coded):
# - Scaff: The molecular frame is extracted
# - RedScaff: All linking chains between rings are deleted. The rings are directly connected.
#
# You can comment in/out the code snippets indicated by the comments
# to force each atom of the frame to be a Carbon.
#
# Usage: Frames.py <database.sdf>
# Output:
# - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc)
# - frames.smi containing the (caninical) smiles and count of occurrence
#
import os,sys
from Chem import AllChem as Chem
def flatten(x):
"""flatten(sequence) -> list
Returns a single, flat list which contains all elements retrieved
from the sequence and all nested sub-sequences (iterables).
Examples:
>>> [1, 2, [3,4], (5,6)]
[1, 2, [3, 4], (5, 6)]
>>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
[1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
result = []
for el in x:
if hasattr(el, "__iter__") and not isinstance(el, basestring):
result.extend(flatten(el))
else:
result.append(el)
return result
def GetFrame(mol, mode='Scaff'):
'''return a ganeric molecule defining the reduced scaffold of the input mol.
mode can be 'Scaff' or 'RedScaff':
Scaff -> chop off the side chains and return the scaffold
RedScaff -> remove all linking chains and connect the rings
directly at the atoms where the linker was
'''
ring = mol.GetRingInfo()
RingAtoms = flatten(ring.AtomRings())
NonRingAtoms = [ atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx() not in RingAtoms ]
RingNeighbors = []
Paths = []
for NonRingAtom in NonRingAtoms:
for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
if neighbor.GetIdx() in RingAtoms:
RingNeighbors.append(NonRingAtom)
Paths.append([neighbor.GetIdx(),NonRingAtom]) #The ring Atoms having a non ring Nieghbor will be the start of a walk
break
PosConnectors = [x for x in NonRingAtoms if x not in RingNeighbors] #Only these Atoms are potential starting points of a Linker chain
#print 'PosConnectors:'
#print PosConnectors
Framework = [ x for x in RingAtoms ]
#Start a list of pathways which we will have to walk
#print 'Path atoms:'
#print Paths
Linkers = []
while len(Paths)>0:
NewPaths = []
for P in Paths:
if P == None:
print 'ooh'
else:
for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
if neighbor.GetIdx() not in P:
if neighbor.GetIdx() in NonRingAtoms:
n = P[:]
n.append(neighbor.GetIdx())
NewPaths.append(n[:])
elif neighbor.GetIdx() in RingAtoms:
#print 'adding the following path to Framework:'
#print P
n = P[:]
n.append(neighbor.GetIdx())
Linkers.append(n)
Framework=Framework+P[:]
Paths = NewPaths[:]
#print 'Linkers:',Linkers
#print 'RingAtoms:',RingAtoms
#em.AddBond(3,4,Chem.BondType.SINGLE)
if mode == 'RedScaff':
Framework = list(set(Framework))
todel = []
NonRingAtoms.sort(reverse=True)
em = Chem.EditableMol(mol)
BondsToAdd = [ sorted([i[0],i[-1]]) for i in Linkers ]
mem = []
for i in BondsToAdd:
if i not in mem:
em.AddBond(i[0],i[1],Chem.BondType.SINGLE)
mem.append(i)
for i in NonRingAtoms:
todel.append(i)
for i in todel:
em.RemoveAtom(i)
m = em.GetMol()
#===================================#
# Now do the flattening of atoms and bonds!
# Any heavy atom will become a carbon and any bond will become a single bond! #
#===================================#
# for atom in m.GetAtoms(): #
# atom.SetAtomicNum(6) #
# atom.SetFormalCharge(0) #
# for bond in m.GetBonds(): #
# bond.SetBondType(Chem.BondType.SINGLE) #
# Chem.SanitizeMol(m) #
#===================================#
return m
if mode == 'Scaff':
Framework = list(set(Framework))
todel = []
NonRingAtoms.sort(reverse=True)
for i in NonRingAtoms:
if i != None:
if i not in Framework:
todel.append(i)
em = Chem.EditableMol(mol)
for i in todel:
em.RemoveAtom(i)
m = em.GetMol()
#===================================#
# Now do the flattening of atoms and bonds!
# Any heavy atom will become a carbon and any bond will become a single bond!! #
#===================================#
# for atom in m.GetAtoms(): #
# atom.SetAtomicNum(6) #
# atom.SetFormalCharge(0) #
# for bond in m.GetBonds(): #
# bond.SetBondType(Chem.BondType.SINGLE) #
# Chem.SanitizeMol(m) #
#===================================#
return m
if __name__=='__main__':
if len(sys.argv) < 2:
print "No input file provided: Frames.py filetosprocess.ext"
sys.exit(1)
suppl = Chem.SDMolSupplier(sys.argv[1])
FrameDict = {}
for mol in suppl:
m = GetFrame(mol)
cansmiles = Chem.MolToSmiles(m, isomericSmiles=True)
if FrameDict.has_key(cansmiles):
FrameDict[cansmiles].append(mol)
else:
FrameDict[cansmiles]=[mol,]
counter=0
w=open('frames.smi','w')
for key,item in FrameDict.items():
counter+=1
d=Chem.SDWriter(str(counter)+'.sdf')
for i in item:
i.SetProp('Scaffold',key)
i.SetProp('Cluster',str(counter))
d.write(i)
print key,len(item)
w.write(key+'\t'+str(len(item))+'\n')
w.close
print 'number of Clusters: %d' %(counter)
------------------------------------------------------------------------------
Protect Your Site and Customers from Malware Attacks
Learn about various malware tactics and how to avoid them. Understand
malware threats, the impact they can have on your business, and how you
can protect your company and customers by using code signing.
http://p.sf.net/sfu/oracle-sfdevnl
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss