Hi all, Below is a small pybel script for Murcko framework generation, with or without exocyclic double bonded heteroatoms.
Any comment on the programme structure, or how to improve it, are welcome :) I'll need to run it on a ZINC subset, so the speedier the better. I plan to generate smaller Murcko scaffolds by deleting: - in the initial molecule (if there is more than 3 cycles), all-carbon and non-substituted (i.e. terminal) cycle(s) (yes, it could lead to acyclic fragment, aborting the Murcko framework generation, e.g. for tetraphenylmethane), - in the Murcko framework, terminal cycle of 3 or 4 atoms but not in the short term, and I'm not yet sure if I'll need such framework. Regards, Pascal #!/usr/bin/env python # encoding: utf-8 import sys import os, pybel import openbabel as ob def checkIfIsOnlyOneNeighbor(mol, keepExo): isAnAtomWithOneNeighbor = 0 atomWithOneNeighborList = [] if keepExo == "n" : for atom in ob.OBMolAtomIter(mol.OBMol): if not atom.IsInRing(): # Keep cycle anyway neighborsCount = 0 for neighbor in ob.OBAtomAtomIter(atom): neighborsCount = neighborsCount + 1 if (neighborsCount <= 1): # only one neighbour : terminal atom, which belongs not to the Murcko framework isAnAtomWithOneNeighbor = 1 atomWithOneNeighborList.append(atom) # will be deleted else: for atom in ob.OBMolAtomIter(mol.OBMol): if not atom.IsInRing(): neighborsCount = 0 for neighbor in ob.OBAtomAtomIter(atom): neighborsCount = neighborsCount + 1 if (neighborsCount <= 1): # Does atom have a double bond? Is this atom a carbon? Is this atom connected to a cycle? if (atom.BOSum() < 2) | ((atom.BOSum() == 2) & (atom.GetAtomicNum() == 6)): # Delete if no double bond, or if it's a carbon with a double bond isAnAtomWithOneNeighbor = 1 atomWithOneNeighborList.append(atom) # will be deleted else: # It is a heteroatom with a double bond... for neighbor in ob.OBAtomAtomIter(atom): if not neighbor.IsInRing(): # ... not connected to a cycle isAnAtomWithOneNeighbor = 1 atomWithOneNeighborList.append(atom) # will be deleted # --> keep heteroatoms with exocyclic double bond return isAnAtomWithOneNeighbor, atomWithOneNeighborList def stripStereo(mol): smilesMurcko = mol.write("smi").split("\t")[0] smilesMurckoNoStereo = smilesMurcko.replace("/","") smilesMurckoNoStereo = smilesMurckoNoStereo.replace("\\","") # E/Z stereochemistry is now reseted molName = mol.OBMol.GetTitle() molMurckoNoStereo = smilesMurckoNoStereo + "\t" + molName + "\n" # The smiles to be written in the .smi file return molMurckoNoStereo def main(): if (len(sys.argv) < 3): print "No input file provided and/or missing argument: Murcko.py filetosprocess.ext KeepExocyclicCoubleBond:y/n" print "The script will determine which file type to read from by the extension." sys.exit(1) elif ((sys.argv[2] != "n") & (sys.argv[2] != "y")): print "Wrong argument: Murcko.py filetosprocess.ext KeepExocyclicCoubleBond:y/n" sys.exit(1) keepExo = sys.argv[2] molnum = 0 #murckoOutput = pybel.Outputfile("smi", "murckoFrameworks.smi", True) # for writing smi file with Murcko framework - keeps E/Z stereochemistry, see comment below murckoNoStereoOutput = open("murckoFrameworksNoStereo.smi","w") # for writing smi file with Murcko framework, without stereochemistry for mol in pybel.readfile(sys.argv[1].split('.')[1], sys.argv[1]): molnum += 1 #print mol.OBMol.GetTitle(), "- Molecules processed:", molnum if not (molnum % 1000): print "Molecules processed:", molnum mol.OBMol.DeleteHydrogens() haveAtomWithOneNeighbor,atomsWithOneNeighborList = checkIfIsOnlyOneNeighbor(mol, keepExo) while (haveAtomWithOneNeighbor): for atomToDel in atomsWithOneNeighborList: mol.OBMol.DeleteAtom(atomToDel) # delete atom which are not part of a cycle or a linker (terminal atoms) - Do it as long as necessary haveAtomWithOneNeighbor,atomsWithOneNeighborList = checkIfIsOnlyOneNeighbor(mol, keepExo) ## Molecule with no cycle have no Murcko framework #smilesMurcko = mol.write("smi").split("\t")[0] #if (smilesMurcko): # always true, because I always have at least one cycle #murckoOutput.write(mol) #murckoOutput.write(mol) # E/Z stereochemistry conserved noStereoMurcko = stripStereo(mol) # Need to delete "/" and "\" from smiles murckoNoStereoOutput.write(noStereoMurcko) murckoNoStereoOutput.close() #murckoOutput.close() if __name__ == '__main__': main() ------------------------------------------------------------------------------ Beautiful is writing same markup. Internet Explorer 9 supports standards for HTML5, CSS3, SVG 1.1, ECMAScript5, and DOM L2 & L3. Spend less time writing and rewriting code and more time creating great experiences on the web. Be a part of the beta today. http://p.sf.net/sfu/beautyoftheweb _______________________________________________ OpenBabel-discuss mailing list OpenBabel-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/openbabel-discuss