Hello again,

I'm still working on my MCS code, and following Greg's suggestion, have tried 
to use custom atom types to get rid of atoms which are the entry point to a 
ring whose other members are not contained in the MCS.
Example: two molecules being overlaid, the MCS is shown below. I'd like to get 
rid of the N bracketed.
[cid:[email protected]]

Here's the code I put together - much of it inspired by our previous discussion 
and the custom-atom-types how-to page. It seems to work in some cases, but 
doesn't in others.

# begin of ring_aware_mcs code
# see http://rdkit.org/docs/Cookbook.html#using-custom-mcs-atom-types and
# https://sourceforge.net/p/rdkit/mailman/message/35170984/
# for details on this block of code
def ring_labels(a):
   # using 300 here rather than 100 as Greg suggested in case some of the users 
thinks of using a heavy atom anywhere.
    return 300*a.GetOwningMol().GetRingInfo().NumAtomRings(a.GetIdx()) + 
a.GetAtomicNum()

def getMCSSmiles(mol, labelledMol, mcs, loggero):

    mcsp = Chem.MolFromSmarts(mcs.smartsString)

    # Trying to find a way to delete every atom belonging to a ring (that is, 
with custom atomtype above 100)
    rmcsp = get_rid_of_isolated_ring_atoms(mcs, loggero)
    # if it's not directly connected to another atom in the same atomic custom 
atomtype spectrum
    match = labelledMol.GetSubstructMatch(rmcsp, useChirality=True)
    loggero.debug("got tentative match %s"%str(match))
    if match == (): # if previous step didn't work, then we go back to the old 
simpler way (which may still cause omega to fail
        loggero.warning("No match returned from rmcsp, trying using raw 
smartsString (mcsp - may contain isolated ring-atoms)")
        loggero.warning("mcsp(%s - %d atoms) versus rmcsp(%s - %d 
atoms)"%(Chem.MolToSmarts(mcsp), mcsp.GetNumAtoms(), Chem.MolToSmarts(rmcsp), 
rmcsp.GetNumAtoms()))
        match = labelledMol.GetSubstructMatch(mcsp, useChirality=True)
    loggero.debug("got match %s"%str(match))
    if mcs.smartsString is not '' and match is not ():
        return Chem.MolFragmentToSmiles(
            mol, atomsToUse=match,
            isomericSmiles=True,
            canonical=False
        )
    else:
        return ''

def get_rid_of_isolated_ring_atoms(mcs, loggero):
    '''
    this function transforms the matching smarts into an editable molecule 
(RWmol), and iterates through its atoms
    checking, if they are cyclic (isotope number > 200) if any of their 
neighbours share the ring
    (that is, have a similar isotope number) - if that is not the case, then 
the atom under exam is excised from the molecule
    and finally a new smiles is returned. There may be a more efficient way to 
do this...
    :param mcs:
    :param loggero:
    :return:
    '''
    mcsp = Chem.rdchem.RWMol(Chem.MolFromSmiles(mcs.smartsString))
    loggero.debug("***********\nmcsp contains %d atoms"%(mcsp.GetNumAtoms()))
    loggero.debug ("%s"%Chem.MolToSmiles(mcsp, isomericSmiles=True))
    for at in mcsp.GetAtoms():
        alone_in_ring = False
        ait = at.GetIsotope() # can't get back the number. why???
        loggero.debug("AtomType (%s) = %d"%(str(at), ait))
        if ait > 200: # that is, if the atom is in a ring
            alone_in_ring = True
            loggero.debug("alone_in_ring = %s"%(str(alone_in_ring)))
            for nb in at.GetNeighbors():
                nit=nb.GetIsotope()
                loggero.debug("\tn: %s, nit %d "%(str(nb), nit))
                if abs(nit - ait) < 200:
                    alone_in_ring = False
                    loggero.debug("\t * nit %d - alone_in_ring = %s"%(nit, 
str(alone_in_ring)))
                    break
        if alone_in_ring:
            mcsp.RemoveAtom(at.GetIdx())
            loggero.debug("mcsp now contains %d atoms"%(mcsp.GetNumAtoms()))
            loggero.debug("%s"%Chem.MolToSmiles(mcsp, isomericSmiles=True))
    loggero.debug("got rid of peripheral ring-atoms: %s"%Chem.MolToSmiles(mcsp, 
isomericSmiles=True))
    return Chem.MolFromSmarts(Chem.MolToSmiles(mcsp, isomericSmiles=True))

def ring_aware_mcs(molA, molB, loggero):
    nms = [Chem.Mol(x) for x in (molA, molB)]
    for nm in nms:
        old_smi = Chem.MolToSmiles(nm, isomericSmiles=True)
        for at in nm.GetAtoms():
            at.SetIsotope(ring_labels(at))
        new_smi = Chem.MolToSmiles(nm, isomericSmiles=True)
        loggero.debug( "OLD: %s\nNEW %s"%(old_smi, new_smi))
    mcs = rdFMCS.FindMCS(
        nms,
        maximizeBonds=True,
        matchValences=True,
        matchChiralTag=True,
        ringMatchesRingOnly=True, # ring should match other rings
        completeRingsOnly=True, # a ring atom is part of an MCS only if the 
whole ring is
        atomCompare=rdFMCS.AtomCompare.CompareIsotopes
    )
    mcs.smilesString = getMCSSmiles(molA, nms[0], mcs, loggero)
    return mcs
# end of ring_aware_mcs code

and a unittest snippet (the first two cases behave as expected, the second one 
doesn't):

    def test_ring_aware_MCS(self):
        molA = Chem.MolFromSmiles("SC1CCN(C1)S(=O)(=O)c1ccccc1")
        molB = Chem.MolFromSmiles("SC1CCN(P1)S(=O)(=O)c1ccccc1")
        MCS = ring_aware_mcs(molA, molB, self.testlog)
        print "Out of ring_aware_mcs, MCS.smilesString = %s" % MCS.smilesString
        self.assertEqual(MCS.smilesString , "O=S(=O)c1ccccc1") # this works

        molA = Chem.MolFromSmiles("C(N1CCCC1)C1=CC=CC=C1")
        molB = Chem.MolFromSmiles("C(N1CCOC1)C1=CC=CC=C1")
        MCS = ring_aware_mcs(molA, molB, self.testlog)
        print "Out of ring_aware_mcs, MCS.smilesString = %s" % MCS.smilesString
        self.assertEqual(MCS.smilesString , "Cc1ccccc1") # this works too

        molC = Chem.MolFromSmiles("O=C1NC(CN2CCCC2)C(=O)N1")
        molD = Chem.MolFromSmiles("O=C1NC(CN2CCOC2)C(=O)N1")
        MCS = ring_aware_mcs(molC, molD, self.testlog)
        print "Out of ring_aware_mcs, MCS.smilesString = %s" % MCS.smilesString
        self.assertEqual(MCS.smilesString , "CC1NC(=O)NC1=O") # doesn't work as 
expected - O=C1NC(CN)C(=O)N1 comes out instead

The elimination procedure I put in place seem to run as expected, but when the 
'reduced' smarts is applied to the labelled molecule for substructure matching, 
it doesn't return a list of atom indices. If the red branch of the code (which 
uses get_rid_of_isolated_ring_atoms()) doesn't work, the blue one takes over 
(hope the colors show) - but the MCS I obtain that way causes issues 
downstream, when I try to pass it on to third party conformer generation 
software to keep that portion of the molecule fixed.
Am I hitting a bug, or am I doing something wrong while juggling smarts, smiles 
and molecule objects?
Thanks in advance,

Luca Fenu - Computational Chemist
Computational Chemist, CADD Group, Chemistry
Eli Lilly and Company Limited
Erl Wood Manor, Sunninghill Road, Windlesham, Surrey, GU20 6PH
012.764.83354 (office) | 07858.701.646 (mobile)
[email protected]<mailto:[email protected]> | 
www.lilly.co.uk<http://www.lilly.co.uk>

[cid:[email protected]]


------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to