hi there
i'm experimanting with imaplib and came across stringts like (\HasNoChildren) "." "INBOX.Sent Items" in which the quotes are part of the string.
now i try to convert this into a list. assume the string is in the variable f, then i tried
f.split()
but i end up with
['(\\HasNoChildren)', '"."', '"INBOX.Sent', 'Items"']
so due to the sapce in "Sent Items" its is sepearted in two entries, what i don't want.
is there another way to convert a string with quoted sub entries into a list of strings?
In Twisteds protocols/imap4.py module there is a function called parseNestedParens() that can be ripped out of the module.
I have used it for another project and put it into this attachment.
--
hilsen/regards Max M, Denmark
http://www.mxm.dk/ IT's Mad Science
""" This code was stolen from Twisteds protocols/imap4.py module """
import types, string class IMAP4Exception(Exception): def __init__(self, *args): Exception.__init__(self, *args) class MismatchedNesting(IMAP4Exception): pass class MismatchedQuoting(IMAP4Exception): pass def wildcardToRegexp(wildcard, delim=None): wildcard = wildcard.replace('*', '(?:.*?)') if delim is None: wildcard = wildcard.replace('%', '(?:.*?)') else: wildcard = wildcard.replace('%', '(?:(?:[^%s])*?)' % re.escape(delim)) return re.compile(wildcard, re.I) def splitQuoted(s): """Split a string into whitespace delimited tokens Tokens that would otherwise be separated but are surrounded by \" remain as a single token. Any token that is not quoted and is equal to \"NIL\" is tokenized as C{None}. @type s: C{str} @param s: The string to be split @rtype: C{list} of C{str} @return: A list of the resulting tokens @raise MismatchedQuoting: Raised if an odd number of quotes are present """ s = s.strip() result = [] inQuote = inWord = start = 0 for (i, c) in zip(range(len(s)), s): if c == '"' and not inQuote: inQuote = 1 start = i + 1 elif c == '"' and inQuote: inQuote = 0 result.append(s[start:i]) start = i + 1 elif not inWord and not inQuote and c not in ('"' + string.whitespace): inWord = 1 start = i elif inWord and not inQuote and c in string.whitespace: if s[start:i] == 'NIL': result.append(None) else: result.append(s[start:i]) start = i inWord = 0 if inQuote: raise MismatchedQuoting(s) if inWord: if s[start:] == 'NIL': result.append(None) else: result.append(s[start:]) return result def splitOn(sequence, predicate, transformers): result = [] mode = predicate(sequence[0]) tmp = [sequence[0]] for e in sequence[1:]: p = predicate(e) if p != mode: result.extend(transformers[mode](tmp)) tmp = [e] mode = p else: tmp.append(e) result.extend(transformers[mode](tmp)) return result def collapseStrings(results): """ Turns a list of length-one strings and lists into a list of longer strings and lists. For example, ['a', 'b', ['c', 'd']] is returned as ['ab', ['cd']] @type results: C{list} of C{str} and C{list} @param results: The list to be collapsed @rtype: C{list} of C{str} and C{list} @return: A new list which is the collapsed form of C{results} """ copy = [] begun = None listsList = [isinstance(s, types.ListType) for s in results] pred = lambda e: isinstance(e, types.TupleType) tran = { 0: lambda e: splitQuoted(''.join(e)), 1: lambda e: [''.join([i[0] for i in e])] } for (i, c, isList) in zip(range(len(results)), results, listsList): if isList: if begun is not None: copy.extend(splitOn(results[begun:i], pred, tran)) begun = None copy.append(collapseStrings(c)) elif begun is None: begun = i if begun is not None: copy.extend(splitOn(results[begun:], pred, tran)) return copy def parseNestedParens(s, handleLiteral = 1): """Parse an s-exp-like string into a more useful data structure. @type s: C{str} @param s: The s-exp-like string to parse @rtype: C{list} of C{str} and C{list} @return: A list containing the tokens present in the input. @raise MismatchedNesting: Raised if the number or placement of opening or closing parenthesis is invalid. """ s = s.strip() inQuote = 0 contentStack = [[]] try: i = 0 L = len(s) while i < L: c = s[i] if inQuote: if c == '\\': contentStack[-1].append(s[i+1]) i += 2 continue elif c == '"': inQuote = not inQuote contentStack[-1].append(c) i += 1 else: if c == '"': contentStack[-1].append(c) inQuote = not inQuote i += 1 elif handleLiteral and c == '{': end = s.find('}', i) if end == -1: raise ValueError, "Malformed literal" literalSize = int(s[i+1:end]) contentStack[-1].append((s[end+3:end+3+literalSize],)) i = end + 3 + literalSize elif c == '(' or c == '[': contentStack.append([]) i += 1 elif c == ')' or c == ']': contentStack[-2].append(contentStack.pop()) i += 1 else: contentStack[-1].append(c) i += 1 except IndexError: raise MismatchedNesting(s) if len(contentStack) != 1: raise MismatchedNesting(s) return collapseStrings(contentStack[0]) if __name__=='__main__': r = '(\Noinferiors \Unmarked) "/" "INBOX"(\Unmarked) "/" "test"(\Noinferiors \Unmarked) "/" "Sent Items"(\Noinferiors \Unmarked) "/" "Calendar"(\Noinferiors \Unmarked) "/" "Checklist"(\Unmarked) "/" "Cabinet"(\Noinferiors \Marked) "/" "Trash"(\Unmarked) "/" "INBOX.Sent"(\Unmarked) "/" "Sent"' parsedParens = parseNestedParens(r) print parsedParens for i in range(0, len(parsedParens), 3): (flags, seperator, folderName) = parsedParens[i:i+3] print flags print seperator print folderName
-- http://mail.python.org/mailman/listinfo/python-list