Steven D'Aprano, 30.12.2009 07:01:
def _re_match_items(s): # Setup some regular expressions. COMMON_RE = r'\$?([-+]?[0-9,]*\.?[0-9,]+)' FLOAT_RE = COMMON_RE + '$' BRACKETED_FLOAT_RE = r'\(' + COMMON_RE + r'\)$' DATE_RE = r'\d{1,2}-\w+-\d{1,2}$' mo = re.match(FLOAT_RE, s) # "mo" short for "match object" if mo: return float(mo.group(1).replace(',', '')) # Otherwise mo will be None and we go on to the next test. mo = re.match(BRACKETED_FLOAT_RE, s) if mo: return -float(mo.group(1).replace(',', '')) if re.match(DATE_RE, s): return dateutil.parser.parse(s, dayfirst=True) raise ValueError("bad string can't be matched")
Given that this is meant for converting single data items, which may happen quite frequently in a program (depending on the size of the input), you might want to use pre-compiled regexps here.
Also, you can convert the above into a single regexp with multiple alternative groups and then just run the matcher once, e.g. (untested):
COMMON_RE = r'\$?([-+]?[0-9,]*\.?[0-9,]+)' FLOAT_RE = COMMON_RE + '$' BRACKETED_FLOAT_RE = r'\(' + COMMON_RE + r'\)$' DATE_RE = r'(\d{1,2}-\w+-\d{1,2})$' # note the surrounding () I added match_data_items = re.compile('|'.join( [BRACKETED_FLOAT_RE, FLOAT_RE, DATE_RE])).match def convert_data_item(s): # ... match = match_data_items(s) if match: bfloat_value, float_value, date_value = match.groups() if bfloat_value: return -float(bfloat_value.replace(',', '')) if float_value: return float(bfloat_value.replace(',', '')) if date_value: return dateutil.parser.parse(date_value, dayfirst=True) raise ... Stefan -- http://mail.python.org/mailman/listinfo/python-list