Hi all,

Below is a small pybel script for Murcko framework generation, with or
without exocyclic double bonded heteroatoms.

Any comment on the programme structure, or how to improve it, are
welcome :) I'll need to run it on a ZINC subset, so the speedier the
better.

I plan to generate smaller Murcko scaffolds by deleting:
- in the initial molecule (if there is more than 3 cycles), all-carbon
and non-substituted (i.e. terminal) cycle(s) (yes, it could lead to
acyclic fragment, aborting the Murcko framework generation, e.g. for
tetraphenylmethane),
- in the Murcko framework, terminal cycle of 3 or 4 atoms
but not in the short term, and I'm not yet sure if I'll need such framework.

Regards,
Pascal


#!/usr/bin/env python
# encoding: utf-8

import sys
import os, pybel
import openbabel as ob

def checkIfIsOnlyOneNeighbor(mol, keepExo):
        isAnAtomWithOneNeighbor = 0
        atomWithOneNeighborList = []
        if keepExo == "n" :
                for atom in ob.OBMolAtomIter(mol.OBMol):
                        if not atom.IsInRing(): # Keep cycle anyway
                                neighborsCount = 0
                                for neighbor in ob.OBAtomAtomIter(atom):
                                        neighborsCount = neighborsCount + 1
                                if (neighborsCount <= 1):
                                # only one neighbour : terminal atom, which 
belongs not to the
Murcko framework
                                        isAnAtomWithOneNeighbor = 1
                                        atomWithOneNeighborList.append(atom) # 
will be deleted
        else:
                for atom in ob.OBMolAtomIter(mol.OBMol):
                        if not atom.IsInRing():
                                neighborsCount = 0
                                for neighbor in ob.OBAtomAtomIter(atom):
                                        neighborsCount = neighborsCount + 1
                                if (neighborsCount <= 1):
                                # Does atom have a double bond? Is this atom a 
carbon? Is this
atom connected to a cycle?
                                        if (atom.BOSum() < 2) | ((atom.BOSum() 
== 2) & (atom.GetAtomicNum() == 6)):
                                                # Delete if no double bond, or 
if it's a carbon with a double bond
                                                isAnAtomWithOneNeighbor = 1
                                                
atomWithOneNeighborList.append(atom) # will be deleted
                                        else:
                                                # It is a heteroatom with a 
double bond...
                                                for neighbor in 
ob.OBAtomAtomIter(atom):
                                                        if not 
neighbor.IsInRing():
                                                                # ... not 
connected to a cycle
                                                                
isAnAtomWithOneNeighbor = 1
                                                                
atomWithOneNeighborList.append(atom) # will be deleted
                                        # --> keep heteroatoms with exocyclic 
double bond
        return isAnAtomWithOneNeighbor, atomWithOneNeighborList


def stripStereo(mol):
        smilesMurcko = mol.write("smi").split("\t")[0]
        smilesMurckoNoStereo = smilesMurcko.replace("/","")
        smilesMurckoNoStereo = smilesMurckoNoStereo.replace("\\","") # E/Z
stereochemistry is now reseted
        molName = mol.OBMol.GetTitle()
        molMurckoNoStereo = smilesMurckoNoStereo + "\t" + molName + "\n" #
The smiles to be written in the .smi file
        return molMurckoNoStereo



def main():
        if (len(sys.argv) < 3):
                print "No input file provided and/or missing argument: Murcko.py
filetosprocess.ext KeepExocyclicCoubleBond:y/n"
                print "The script will determine which file type to read from 
by the
extension."
                sys.exit(1)
        elif ((sys.argv[2] != "n") & (sys.argv[2] != "y")):
                print "Wrong argument: Murcko.py filetosprocess.ext
KeepExocyclicCoubleBond:y/n"
                sys.exit(1)
        keepExo = sys.argv[2]
        molnum = 0
        #murckoOutput = pybel.Outputfile("smi", "murckoFrameworks.smi", True)
# for writing smi file with Murcko framework - keeps E/Z
stereochemistry, see comment below
        murckoNoStereoOutput = open("murckoFrameworksNoStereo.smi","w") # for
writing smi file with Murcko framework, without stereochemistry
        for mol in pybel.readfile(sys.argv[1].split('.')[1], sys.argv[1]):
                molnum += 1
                #print mol.OBMol.GetTitle(), "- Molecules processed:", molnum
                if not (molnum % 1000):
                        print "Molecules processed:", molnum
                mol.OBMol.DeleteHydrogens()
                haveAtomWithOneNeighbor,atomsWithOneNeighborList =
checkIfIsOnlyOneNeighbor(mol, keepExo)
                while (haveAtomWithOneNeighbor):
                        for atomToDel in atomsWithOneNeighborList:
                                mol.OBMol.DeleteAtom(atomToDel) # delete atom 
which are not part
of a cycle or a linker (terminal atoms) - Do it as long as necessary
                        haveAtomWithOneNeighbor,atomsWithOneNeighborList =
checkIfIsOnlyOneNeighbor(mol, keepExo)
                ## Molecule with no cycle have no Murcko framework
                #smilesMurcko = mol.write("smi").split("\t")[0]
                #if (smilesMurcko): # always true, because I always have at 
least one cycle
                        #murckoOutput.write(mol)
                #murckoOutput.write(mol) # E/Z stereochemistry conserved
                noStereoMurcko = stripStereo(mol) # Need to delete "/" and "\" 
from smiles
                murckoNoStereoOutput.write(noStereoMurcko)
        murckoNoStereoOutput.close()
        #murckoOutput.close()   


if __name__ == '__main__':
        main()

------------------------------------------------------------------------------
Beautiful is writing same markup. Internet Explorer 9 supports
standards for HTML5, CSS3, SVG 1.1,  ECMAScript5, and DOM L2 & L3.
Spend less time writing and  rewriting code and more time creating great
experiences on the web. Be a part of the beta today.
http://p.sf.net/sfu/beautyoftheweb
_______________________________________________
OpenBabel-discuss mailing list
OpenBabel-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/openbabel-discuss

Reply via email to