Question regarding lists and regex
Here is a simple program, which queries /var/log/daemon on my OpenBSD box and gets the list of valid ntp peers. Questions: what is the easiest way for me to create lists on the fly, by that I mean like perl push my @foo, something_from_say_stderr. The reason is as you can ip = [""] statement before the for loop, I want to avoid that and use list within the second ip loop, where I extract the ip address. Am I confusing? regex: I presume this is rather a dumb question, anyways here it comes! as you can see from my program, pattIp = r\d{1,3}\ etc, is there any other easy way to group the reptitions, instead of typing the same regex 4 times. TIA Prabhu - amazon: [~/working/programs/python/regex] ttyp4: [109]$ cat syslog.py #!/usr/bin/env python # $Id: syslog.py,v 1.6 2006/11/09 06:24:03 pgurumur Exp $ import getopt, re, os, string, sys, time (dirname, program) = os.path.split(sys.argv[0]) argc = len(sys.argv) def usage(): print program + ": options" print "options: " print " --filename | -f [ name of the file ]" print " --help | -h [ prints this help ]" sys.exit(1) if __name__ == "__main__": if (argc <= 1): usage() else: try: opts, args = getopt.getopt(sys.argv[1:], "f:h", ["help", "filename="]) except getopt.GetoptError: usage() else: filename = "" for optind, optarg in opts: if optind in ("-f", "--filename"): filename = optarg elif optind in ("-h", "--help"): usage() if len(filename): fh = 0 try: fh = open(filename, "r") except IOError, (error, message): print program + ": cannot open " + filename + ": " + message sys.exit(1) pattNtp = r'.*ntpd(?=.*now\s+valid)' count = 0 ip = [""] pid = 0 for line in fh.readlines(): if re.match(pattNtp, line.strip(), re.IGNORECASE): string = line.strip() pattPid = r'\[\d{1,5}\]' pidMatch = re.search(pattPid, string, re.IGNORECASE) if pidMatch is not None: pid = int(re.sub(r'\[|\]', "", pidMatch.group())) pattIp = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}' match = re.search(pattIp, string, re.IGNORECASE) if match is not None: ip.append(match.group()) count += 1 print "NTP program started with pid:", pid print "Number of valid peers:", count for x in ip: if len(x): print x fh.close() -- http://mail.python.org/mailman/listinfo/python-list
Question on regex
Hello all - I have a file which has IP address and subnet number and I use regex to extract the IP separately from subnet. pattern used for IP: \d{1,3}(\.\d{1,3}){3} pattern used for subnet:((\d{1,3})|(\d{1,3}(\.\d{1,3}){1,3}))/(\d{1,2}) so I have list of ip/subnets strewn around like this 10.200.0.34 10.200.4.5 10.178.9.45 10.200/22 10.178/16 10.100.4.64/26, 10.150.100.0/28 10/8 with that above examples: ip regex pattern works for all IP address subnet regex pattern works for all subnets problem now is ip pattern also matches the last 2 subnet numbers, because it falls under ip regex. to fix this problem, i used negative lookahead with ip pattern: so the ip pattern now changes to: \d{1,3}(\.\d{1,3}){3}(?!/\d+) now the problem is 10.150.100.0 works fine, 10.100.4.64 subnet gets matched with ip pattern with the following result: 10.100.4.6 Is there a workaround for this or what should change in ip regex pattern. python script: #!/usr/bin/env python import re, sys fh = 0 try: fh = open(sys.argv[1], "r") except IOError, message: print "cannot open file: %s" %message else: for lines in fh.readlines(): lines = lines.strip() pattIp = re.compile("(\d{1,3}(\.\d{1,3}){3})(?!/\d+)") pattNet = re.compile("((\d{1,3})|(\d{1,3}(\.\d{1,3}){1,3}))/(\d{1,2})") match = pattIp.search(lines) if match is not None: print "ipmatch: %s" %match.groups()[0] match = pattNet.search(lines) if match is not None: print "subnet: %s" %match.groups()[0] fh.close() output with that above ip/subnet in a file ipmatch: 10.200.0.34 ipmatch: 10.200.4.5 ipmatch: 10.178.9.45 subnet: 10.200 subnet: 10.178 ipmatch: 10.100.4.6 subnet: 10.100.4.64 subnet: 10.150.100.0 subnet: 10 TIA Prabhu begin:vcard fn:Prabhu Gurumurthy n:Gurumurthy;Prabhu org:Silver Spring Networks;IT adr:Suite 205;;2755 Campus Drive;San Mateo;CA;94403;USA email;internet:[EMAIL PROTECTED] title:Network Engineer tel;work:(650) 357 8770 x134 tel;home:(650) 585 6527 tel;cell:(831) 224 0894 x-mozilla-html:FALSE url:http://www.silverspringnet.com version:2.1 end:vcard -- http://mail.python.org/mailman/listinfo/python-list
help parsing ipv6 addresses and subnets
Hello list, I would like to parse IPv6 addresses and subnet using re module in python. I am able to either parse the ipv6 address or ipv6 network but not both using single line. any help appreciated. BTW is there a metacharacter for hex digits. Thanks Prabhu - - #!/usr/bin/env python2.5 # $Id: $ import re, os, Queue, sys from optparse import OptionParser # for debug purposes only import pdb argc = len(sys.argv) (dirname, program) = os.path.split(sys.argv[0]) def error(Message): if Message: global program print "%s: %s" %(program, Message) sys.exit(1) def cisco_parse(FileName): if FileName: fh = None if os.path.exists(FileName): try: fh = open(FileName, "r") except IOError, message: error(message) else: count = 0 flag = True while flag: try: lines = fh.next() except StopIteration: flag = False else: line = lines.strip() rehex = "[A-Fa-f0-9]" # to parse ipv6 address format = \ "((%s{1,4}:?:){1,7}%s{1,4})" %(rehex, rehex) # to parse ipv6 subnet # format = \ # "((%s{1,4}:?:){1,7}%s{1,4}(?=(::/\d{1,3})))" %(rehex, rehex) reip6 = re.compile(format) match = reip6.search(line) if match is not None: tupleLen = len(match.groups()) if tupleLen == 2: print count, match.groups()[0] elif tupleLen == 3: print count, match.groups()[0] + match.groups()[2] count += 1 fh.close() fh = None def ParseCmdLine(): parser = OptionParser(usage="%prog [options]", version="%prog 1.0") parser.add_option("-f", "--filename", help="cisco config to read", dest="file") (options, args) = parser.parse_args() fileName = None if options.file: fileName = options.file if fileName: cisco_parse(fileName) if __name__ == "__main__": if argc <= 1: error("too few arguments, use -h or --help to view all options") else: ParseCmdLine() -- http://mail.python.org/mailman/listinfo/python-list
help with pyparsing
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 All, I have the following lines that I would like to parse in python using pyparsing, but have some problems forming the grammar. Line in file: table const { 207.135.103.128/26, 207.135.112.64/29 } table persist { ! 10.200.2/24, 10.200/22 } table const { 192.168/16, ! 172.24.1/29, 172.16/12, 169.254/16 } table persist { 10.202/22 } table const { 10.206/22 } table const { \ 10.205.1/24, \ 169.136.241.68, \ 169.136.241.70, \ 169.136.241.71, \ 169.136.241.72, \ 169.136.241.75, \ 169.136.241.76, \ 169.136.241.77, \ 169.136.241.78, \ 169.136.241.79, \ 169.136.241.81, \ 169.136.241.82, \ 169.136.241.85 } I have the following grammar defn. tableName = Word(alphanums + "-" + "_") leftClose = Suppress("<") rightClose = Suppress(">") key = Suppress("table") tableType = Regex("persist|const") ip4Address = OneOrMore(Word(nums + ".")) ip4Network = Group(ip4Address + Optional(Word("/") + OneOrMore(Word(nums temp = ZeroOrMore("\\" + "\n") tableList = OneOrMore(Optional("\\") | ip4Network | ip4Address | Suppress(",") | Literal("!")) leftParen = Suppress("{") rightParen = Suppress("}") table = key + leftClose + tableName + rightClose + tableType + \ leftParen + tableList + rightParen I cannot seem to match sixth line in the file above, i.e table name with KS, how do I form the grammar for it, BTW, I still cannot seem to ignore comments using table.ignore(Literal("#") + restOfLine), I get a parse error. Any help appreciated. Thanks Prabhu - - -BEGIN PGP SIGNATURE- Version: GnuPG v2.0.4-svn0 (GNU/Linux) Comment: Using GnuPG with SUSE - http://enigmail.mozdev.org iD8DBQFHXMhFTkjpaeKzB9YRAmZYAJ9Lyys6+xCrGEsyy33AoRWVdUOXQwCfTG9Q /f7JZ2pAW6WDSzs79jbDFQE= =CGb0 -END PGP SIGNATURE- -- http://mail.python.org/mailman/listinfo/python-list
Re: help with pyparsing
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 Paul McGuire wrote: > On Dec 9, 11:01 pm, Prabhu Gurumurthy <[EMAIL PROTECTED]> wrote: >> -BEGIN PGP SIGNED MESSAGE- >> Hash: SHA1 >> >> All, >> >> I have the following lines that I would like to parse in python using >> pyparsing, but have some problems forming the grammar. >> >> Line in file: >> table const { 207.135.103.128/26, 207.135.112.64/29 } >> table persist { ! 10.200.2/24, 10.200/22 } >> table const { 192.168/16, ! 172.24.1/29, 172.16/12, 169.254/16 } >> table persist { 10.202/22 } >> table const { 10.206/22 } >> table const { \ >>10.205.1/24, \ >>169.136.241.68, \ >>169.136.241.70, \ >>169.136.241.71, \ >>169.136.241.72, \ >>169.136.241.75, \ >>169.136.241.76, \ >>169.136.241.77, \ >>169.136.241.78, \ >>169.136.241.79, \ >>169.136.241.81, \ >>169.136.241.82, \ >>169.136.241.85 } >> >> I have the following grammar defn. >> >> tableName = Word(alphanums + "-" + "_") >> leftClose = Suppress("<") >> rightClose = Suppress(">") >> key = Suppress("table") >> tableType = Regex("persist|const") >> ip4Address = OneOrMore(Word(nums + ".")) >> ip4Network = Group(ip4Address + Optional(Word("/") + >> OneOrMore(Word(nums >> temp = ZeroOrMore("\\" + "\n") >> tableList = OneOrMore(Optional("\\") | >>ip4Network | ip4Address | Suppress(",") | Literal("!")) >> leftParen = Suppress("{") >> rightParen = Suppress("}") >> >> table = key + leftClose + tableName + rightClose + tableType + \ >> leftParen + tableList + rightParen >> >> I cannot seem to match sixth line in the file above, i.e table name with >> KS, how do I form the grammar for it, BTW, I still cannot seem to ignore >> comments using table.ignore(Literal("#") + restOfLine), I get a parse error. >> >> Any help appreciated. >> Thanks >> Prabhu > > Prabhu - > > This is a good start, but here are some suggestions: > > 1. ip4Address = OneOrMore(Word(nums + ".")) > > Word(nums+".") will read any contiguous set of characters in the > string nums+".", so OneOrMore is not necessary for reading in an > ip4Address. Just use: > > ip4Address = Word(nums + ".") > > > 2. ip4Network = Group(ip4Address + Optional(Word("/") + > OneOrMore(Word(nums > > Same comment, OneOrMore is not needed for the added value to the > ip4Address: > > ip4Network = Group(ip4Address + Optional(Word("/") + Word(nums > > > 3. tableList = OneOrMore(Optional("\\") | >ip4Network | ip4Address | Suppress(",") | > Literal("!")) > > The list of ip4Networks is just a comma-delimited list, with some > entries preceded with a '!' character. It is simpler to use > pyparsing's built-in helper, delimitedList, as in: > > tableList = Group( delimitedList(Group("!"+ip4Network)|ip4Network) ) > > > Yes, I know, you are saying, "but what about all those backslashes?" > The backslashes look like they are just there as line continuations. > We can define an ignore expression, so that the table expression, and > all of its contained expressions, will ignore '\' characters as line > continuations: > > table.ignore( Literal("\\") + LineEnd() ) > > And I'm not sure why you had trouble with ignoring '#' + restOfLine, > it works fine in the program below. > > If you make these changes, your program will look something like this: > > tableName = Word(alphanums + "-" + "_") > leftClose = Suppress("<") > rightClose = Suppress(">") > key = Suppress("table") > tableType = Regex("persist|const") > ip4Address = Word(nums + ".") > ip4Network = Group(ip4Address + Optional(Word("/") + Word(nums))) > tableList = Group(delimitedList(Group("!"+ip4Network)|ip4Network)) > leftParen = Suppress("{") > rightParen = Suppress("}") > > table = key + leftClose + tableName + rightClose + tableType + \ > leftParen + tableList + rightParen > table.ignore(Literal("\\") + LineEnd()) > table.ignore(Literal("#") + restOfLine) > > #