On Feb 5, 8:31 am, [EMAIL PROTECTED] wrote: > Hello again - > > I do not seem to be able to get a handle on non-greedy pattern > matching. >
Regexps wont cut it when you have to parse nested ()'s in a logical expression. Here is a pyparsing solution. For your specific application, you will need to sift through WHERE clauses too, you should be able to use this as a starting point for creating a WHERE clause parser. -- Paul from pyparsing import * # define terminal items identifier = Word(alphas,alphanums+'_') column_name = Combine( identifier + ZeroOrMore('.' + identifier) ) integer = Word(nums) value = integer | sglQuotedString relationalOperator = oneOf("< = > >= <= != <>") simple_comparison = Group(column_name + relationalOperator + (column_name | value)) AND_cl = CaselessLiteral("AND") OR_cl = CaselessLiteral("OR") NOT_cl = CaselessLiteral("NOT") logical_expression = operatorPrecedence( simple_comparison, [ (NOT_cl, 1, opAssoc.RIGHT), (OR_cl, 2, opAssoc.LEFT), (AND_cl, 2, opAssoc.LEFT), ]) # syntax for FROM ... join expression FROM_cl = CaselessLiteral("FROM") ON_cl = CaselessLiteral("ON") INNER_cl = CaselessLiteral("INNER") JOIN_cl = CaselessLiteral("JOIN") join_expr = identifier('table1') + INNER_cl + JOIN_cl + \ identifier('table2') + ON_cl + \ logical_expression('join_condition') from_clause = FROM_cl + '(' + join_expr + ')' test = """ FROM (qry_Scores_Lookup1 INNER JOIN CSS_Rpt1 ON (qry_Scores_Lookup1.desc = CSS_Rpt1.desc) AND (qry_Scores_Lookup1.lastcdu = CSS_Rpt1.lastcdu))""" # parsing the FROM clause normally join_fields = from_clause.parseString(test) print "\nParsed tokens, plus any named fields" print join_fields.dump() print "\nAccessing some named fields directly" print "table1 =", join_fields.table1 print "join_condition =", join_fields.join_condition # create parse action to tally up column names as they are found # during the parsing process def tally_names(tokens): column_names.append(tokens[0]) column_name.setParseAction(tally_names) # parse the FROM clause, but just let the parse action do the work print "\nExtracting the column names" column_names = [] from_clause.parseString(test) print column_names Prints: Parsed tokens, plus any named fields ['FROM', '(', 'qry_Scores_Lookup1', 'INNER', 'JOIN', 'CSS_Rpt1', 'ON', [['qry_Scores_Lookup1.desc', '=', 'CSS_Rpt1.desc'], 'AND', ['qry_Scores_Lookup1.lastcdu', '=', 'CSS_Rpt1.lastcdu']], ')'] - join_condition: [['qry_Scores_Lookup1.desc', '=', 'CSS_Rpt1.desc'], 'AND', ['qry_Scores_Lookup1.lastcdu', '=', 'CSS_Rpt1.lastcdu']] - table1: qry_Scores_Lookup1 - table2: CSS_Rpt1 Accessing some named fields directly table1 = qry_Scores_Lookup1 join_condition = [['qry_Scores_Lookup1.desc', '=', 'CSS_Rpt1.desc'], 'AND', ['qry_Scores_Lookup1.lastcdu', '=', 'CSS_Rpt1.lastcdu']] Extracting the column names ['qry_Scores_Lookup1.desc', 'CSS_Rpt1.desc', 'qry_Scores_Lookup1.lastcdu', 'CSS_Rpt1.lastcdu'] -- http://mail.python.org/mailman/listinfo/python-list