Steven D'Aprano, 30.12.2009 07:01:
def _re_match_items(s):
    # Setup some regular expressions.
    COMMON_RE = r'\$?([-+]?[0-9,]*\.?[0-9,]+)'
    FLOAT_RE = COMMON_RE + '$'
    BRACKETED_FLOAT_RE = r'\(' + COMMON_RE + r'\)$'
    DATE_RE = r'\d{1,2}-\w+-\d{1,2}$'
    mo = re.match(FLOAT_RE, s)  # "mo" short for "match object"
    if mo:
        return float(mo.group(1).replace(',', ''))
    # Otherwise mo will be None and we go on to the next test.
    mo = re.match(BRACKETED_FLOAT_RE, s)
    if mo:
        return -float(mo.group(1).replace(',', ''))
    if re.match(DATE_RE, s):
        return dateutil.parser.parse(s, dayfirst=True)
    raise ValueError("bad string can't be matched")

Given that this is meant for converting single data items, which may happen quite frequently in a program (depending on the size of the input), you might want to use pre-compiled regexps here.

Also, you can convert the above into a single regexp with multiple alternative groups and then just run the matcher once, e.g. (untested):

    COMMON_RE = r'\$?([-+]?[0-9,]*\.?[0-9,]+)'
    FLOAT_RE = COMMON_RE + '$'
    BRACKETED_FLOAT_RE = r'\(' + COMMON_RE + r'\)$'
    DATE_RE = r'(\d{1,2}-\w+-\d{1,2})$'  # note the surrounding () I added

    match_data_items = re.compile('|'.join(
        [BRACKETED_FLOAT_RE, FLOAT_RE, DATE_RE])).match

    def convert_data_item(s):
        # ...
        match = match_data_items(s)
        if match:
            bfloat_value, float_value, date_value = match.groups()
            if bfloat_value:
                return -float(bfloat_value.replace(',', ''))
            if float_value:
                return float(bfloat_value.replace(',', ''))
            if date_value:
                return dateutil.parser.parse(date_value, dayfirst=True)
        raise ...

Stefan
--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to