Paul McGuire wrote: >>>I have to differentiate between: >>> (NP -x-y) >>>and: >>> (NP-x -y) >>>I'm doing this now using Combine. Does that seem right? > > If your word char set is just alphanums+"-", then this will work > without doing anything unnatural with leaveWhitespace: > > from pyparsing import * > > thing = Word(alphanums+"-") > LPAREN = Literal("(").suppress() > RPAREN = Literal(")").suppress() > node = LPAREN + OneOrMore(thing) + RPAREN > > print node.parseString("(NP -x-y)") > print node.parseString("(NP-x -y)") > > will print: > > ['NP', '-x-y'] > ['NP-x', '-y']
I actually need to break these into: ['NP', '-x-y'] {'tag':'NP', 'word:'-x-y'} ['NP', 'x', 'y'] {tag:'NP', 'functions':['x'], 'word':'y'} I know the dict syntax afterwards isn't quite what pyparsing would output, but hopefully my intent is clear. I need to use the dict-style results from setResultsName() calls because in the full grammar, I have a lot of optional elements. For example: (NP-1 -a) --> {'tag':'NP', 'id':'1', 'word':'-a'} (NP-x-2 -B) --> {'tag':'NP', 'functions':['x'], 'id':'2', 'word':'-B'} (NP-x-y=2-3 -4) --> {'tag':'NP', 'functions':['x', 'y'], 'coord':'2', 'id':'3', 'word':'-4'} (-NONE- x) --> {'tag':None, 'word':'x'} STeVe P.S. In case you're curious, here's my current draft of the code: # some character classes printables_trans = _pp.printables.translate word_chars = printables_trans(_id_trans, '()') word_elem = _pp.Word(word_chars) syn_chars = printables_trans(_id_trans, '()-=') syn_word = _pp.Word(syn_chars) func_chars = printables_trans(_id_trans, '()-=0123456789') func_word = _pp.Word(func_chars) num_word = _pp.Word(_pp.nums) # tag separators dash = _pp.Literal('-') tag_sep = dash.suppress() coord_sep = _pp.Literal('=').suppress() # tag types (use Combine to guarantee no spaces) special_tag = _pp.Combine(dash + syn_word + dash) syn_tag = syn_word func_tags = _pp.ZeroOrMore(_pp.Combine(tag_sep + func_word)) coord_tag = _pp.Optional(_pp.Combine(coord_sep + num_word)) id_tag = _pp.Optional(_pp.Combine(tag_sep + num_word)) # give tag types result names special_tag = special_tag.setResultsName('tag') syn_tag = syn_tag.setResultsName('tag') func_tags = func_tags.setResultsName('funcs') coord_tag = coord_tag.setResultsName('coord') id_tag = id_tag.setResultsName('id') # combine tag types into a tags element normal_tags = syn_tag + func_tags + coord_tag + id_tag tags = special_tag | _pp.Combine(normal_tags) def get_tag(orig_string, tokens_start, tokens): tokens = dict(tokens) tag = tokens.pop('tag') if tag == '-NONE-': tag = None functions = list(tokens.pop('funcs', [])) coord = tokens.pop('coord', None) id = tokens.pop('id', None) return [dict(tag=tag, functions=functions, coord=coord, id=id)] tags.setParseAction(get_tag) # node parentheses start = _pp.Literal('(').suppress() end = _pp.Literal(')').suppress() # words word = word_elem.setResultsName('word') # leaf nodes leaf_node = tags + _pp.Optional(word) def get_leaf_node(orig_string, tokens_start, tokens): try: tag_dict, word = tokens word = cls._unescape(word) except ValueError: tag_dict, = tokens word = None return cls(word=word, **tag_dict) leaf_node.setParseAction(get_leaf_node) # node, recursive node = _pp.Forward() # branch nodes branch_node = tags + _pp.OneOrMore(node) def get_branch_node(orig_string, tokens_start, tokens): return cls(children=tokens[1:], **tokens[0]) branch_node.setParseAction(get_branch_node) # node, recursive node << start + (branch_node | leaf_node) + end # root node may have additional parentheses root_node = node | start + node + end root_nodes = _pp.OneOrMore(root_node) # make sure nodes start and end string str_start = _pp.StringStart() str_end = _pp.StringEnd() cls._root_node = str_start + root_node + str_end cls._root_nodes = str_start + root_nodes + str_end -- http://mail.python.org/mailman/listinfo/python-list