Re: split a string with quoted parts into list

Max M Thu, 10 Mar 2005 02:30:09 -0800

oliver wrote:

hi there
i'm experimanting with imaplib and came across stringts like
    (\HasNoChildren) "." "INBOX.Sent Items"
in which the quotes are part of the string.
now i try to convert this into a list. assume the string is in the variable f, then i tried f.split() but i end up with ['(\\HasNoChildren)', '"."', '"INBOX.Sent', 'Items"'] so due to the sapce in "Sent Items" its is sepearted in two entries, what i don't want.

is there another way to convert a string with quoted sub entries into a list of strings?

In Twisteds protocols/imap4.py module there is a function called parseNestedParens() that can be ripped out of the module.

I have used it for another project and put it into this attachment.

--

hilsen/regards Max M, Denmark

http://www.mxm.dk/
IT's Mad Science

"""
This code was stolen from Twisteds protocols/imap4.py  module
"""


import types, string

class IMAP4Exception(Exception):
    def __init__(self, *args):
        Exception.__init__(self, *args)

class MismatchedNesting(IMAP4Exception):
    pass

class MismatchedQuoting(IMAP4Exception):
    pass

def wildcardToRegexp(wildcard, delim=None):
    wildcard = wildcard.replace('*', '(?:.*?)')
    if delim is None:
        wildcard = wildcard.replace('%', '(?:.*?)')
    else:
        wildcard = wildcard.replace('%', '(?:(?:[^%s])*?)' % re.escape(delim))
    return re.compile(wildcard, re.I)

def splitQuoted(s):
    """Split a string into whitespace delimited tokens

    Tokens that would otherwise be separated but are surrounded by \"
    remain as a single token.  Any token that is not quoted and is
    equal to \"NIL\" is tokenized as C{None}.

    @type s: C{str}
    @param s: The string to be split

    @rtype: C{list} of C{str}
    @return: A list of the resulting tokens

    @raise MismatchedQuoting: Raised if an odd number of quotes are present
    """
    s = s.strip()
    result = []
    inQuote = inWord = start = 0
    for (i, c) in zip(range(len(s)), s):
        if c == '"' and not inQuote:
            inQuote = 1
            start = i + 1
        elif c == '"' and inQuote:
            inQuote = 0
            result.append(s[start:i])
            start = i + 1
        elif not inWord and not inQuote and c not in ('"' + string.whitespace):
            inWord = 1
            start = i
        elif inWord and not inQuote and c in string.whitespace:
            if s[start:i] == 'NIL':
                result.append(None)
            else:
                result.append(s[start:i])
            start = i
            inWord = 0
    if inQuote:
        raise MismatchedQuoting(s)
    if inWord:
        if s[start:] == 'NIL':
            result.append(None)
        else:
            result.append(s[start:])
    return result


def splitOn(sequence, predicate, transformers):
    result = []
    mode = predicate(sequence[0])
    tmp = [sequence[0]]
    for e in sequence[1:]:
        p = predicate(e)
        if p != mode:
            result.extend(transformers[mode](tmp))
            tmp = [e]
            mode = p
        else:
            tmp.append(e)
    result.extend(transformers[mode](tmp))
    return result


def collapseStrings(results):
    """
    Turns a list of length-one strings and lists into a list of longer
    strings and lists.  For example,

    ['a', 'b', ['c', 'd']] is returned as ['ab', ['cd']]

    @type results: C{list} of C{str} and C{list}
    @param results: The list to be collapsed

    @rtype: C{list} of C{str} and C{list}
    @return: A new list which is the collapsed form of C{results}
    """
    copy = []
    begun = None
    listsList = [isinstance(s, types.ListType) for s in results]

    pred = lambda e: isinstance(e, types.TupleType)
    tran = {
        0: lambda e: splitQuoted(''.join(e)),
        1: lambda e: [''.join([i[0] for i in e])]
    }
    for (i, c, isList) in zip(range(len(results)), results, listsList):
        if isList:
            if begun is not None:
                copy.extend(splitOn(results[begun:i], pred, tran))
                begun = None
            copy.append(collapseStrings(c))
        elif begun is None:
            begun = i
    if begun is not None:
        copy.extend(splitOn(results[begun:], pred, tran))
    return copy




def parseNestedParens(s, handleLiteral = 1):
    """Parse an s-exp-like string into a more useful data structure.

    @type s: C{str}
    @param s: The s-exp-like string to parse

    @rtype: C{list} of C{str} and C{list}
    @return: A list containing the tokens present in the input.

    @raise MismatchedNesting: Raised if the number or placement
    of opening or closing parenthesis is invalid.
    """
    s = s.strip()
    inQuote = 0
    contentStack = [[]]
    try:
        i = 0
        L = len(s)
        while i < L:
            c = s[i]
            if inQuote:
                if c == '\\':
                    contentStack[-1].append(s[i+1])
                    i += 2
                    continue
                elif c == '"':
                    inQuote = not inQuote
                contentStack[-1].append(c)
                i += 1
            else:
                if c == '"':
                    contentStack[-1].append(c)
                    inQuote = not inQuote
                    i += 1
                elif handleLiteral and c == '{':
                    end = s.find('}', i)
                    if end == -1:
                        raise ValueError, "Malformed literal"
                    literalSize = int(s[i+1:end])
                    contentStack[-1].append((s[end+3:end+3+literalSize],))
                    i = end + 3 + literalSize
                elif c == '(' or c == '[':
                    contentStack.append([])
                    i += 1
                elif c == ')' or c == ']':
                    contentStack[-2].append(contentStack.pop())
                    i += 1
                else:
                    contentStack[-1].append(c)
                    i += 1
    except IndexError:
        raise MismatchedNesting(s)
    if len(contentStack) != 1:
        raise MismatchedNesting(s)
    return collapseStrings(contentStack[0])


if __name__=='__main__':

    r = '(\Noinferiors \Unmarked) "/" "INBOX"(\Unmarked) "/" 
"test"(\Noinferiors \Unmarked) "/" "Sent Items"(\Noinferiors \Unmarked) "/" 
"Calendar"(\Noinferiors \Unmarked) "/" "Checklist"(\Unmarked) "/" 
"Cabinet"(\Noinferiors \Marked) "/" "Trash"(\Unmarked) "/" 
"INBOX.Sent"(\Unmarked) "/" "Sent"'
    
    parsedParens = parseNestedParens(r)
    print parsedParens
    for i in range(0, len(parsedParens), 3):
        (flags, seperator, folderName) = parsedParens[i:i+3]
        print flags
        print seperator
        print folderName

-- 
http://mail.python.org/mailman/listinfo/python-list

Re: split a string with quoted parts into list

Reply via email to