I am not expert of REs yet, this my first possible solution: import re
txt = """ <tag1 name="john"/> <br/> <tag2 value="adj__tall__"/> <tag1 name="joe"/> <tag1 name="jack"/> <tag2 value="adj__short__"/>""" tfinder = r"""< # The opening < the tag to find \s* # Possible space or newline (tag[12]) # First subgroup, the identifier, tag1 or tag2 \s+ # There must be a space or newline or more (?:name|value) # Name or value, non-grouping \s* # Possible space or newline = # The = \s* # Possible space or newline " # Opening " ([^"]*) # Second subgroup, the tag string, it can't contain " " # Closing " of the string \s* # Possible space or newline /? # One optional ending / \s* # Possible space or newline > # The closing > of the tag ? # Greedy, match the first closing > """ patt = re.compile(tfinder, flags=re.I+re.X) prec_type = "" prec_string = "" for mobj in patt.finditer(txt): curr_type, curr_string = mobj.groups() if curr_type == "tag2" and prec_type == "tag1": print prec_string, curr_string.replace("adj__", "").strip("_") prec_type = curr_type prec_string = curr_string Bye, bearophile -- http://mail.python.org/mailman/listinfo/python-list