On Mar 25, 12:30 am, "Paul McGuire" <[EMAIL PROTECTED]> wrote: > On Mar 24, 6:52 pm, "mkppk" <[EMAIL PROTECTED]> wrote: > > > Its just that I would rather not reinvent the wheel (or read old C > > code).. > > Wouldn't we all! > > Here is the basic structure of a pyparsing solution. The parsing part > isn't so bad - the real problem is the awful ParseONUS routine in C. > Plus things are awkward since the C program parses right-to-left and > then reverses all of the found fields, and the parser I wrote works > left-to-right. Still, this grammar does most of the job. I've left > out my port of ParseONUS since it is *so* ugly, and not really part of > the pyparsing example. > > -- Paul > > from pyparsing import * > > # define values for optional fields > NoAmountGiven = "" > NoEPCGiven = "" > NoAuxOnusGiven = "" > > # define delimiters > DOLLAR = Suppress("$") > T_ = Suppress("T") > A_ = Suppress("A") > > # field definitions > amt = DOLLAR + Word(nums,exact=10) + DOLLAR > onus = Word("0123456789A- ") > transit = T_ + Word("0123456789-") + T_ > epc = oneOf( list(nums) ) > aux_onus = A_ + Word("0123456789- ") + A_ > > # validation parse action > def validateTransitNumber(t): > transit = t[0] > flds = transit.split("-") > if len(flds) > 2: > raise ParseException(0, "too many dashes in transit number", > 0) > if len(flds) == 2: > if len(flds[0]) not in (3,4): > raise ParseException(0, "invalid dash position in transit > number", 0) > else: > # compute checksum > ti = map(int,transit) > ti.reverse() # original algorithm worked with reversed data > cksum = 3*(ti[8]+ti[5]+ti[2]) + 7*(ti[7]+ti[4]+ti[1]) + > ti[6]+ti[3]+ti[0] > if cksum%10 != 0: > raise ParseException(0, "transit number failed checksum", > 0) > return transit > > # define overallMICRformat, with results names > micrdata = > Optional(aux_onus,default=NoAuxOnusGiven).setResultsName("aux_onus") + > \ > Optional(epc,default=NoEPCGiven).setResultsName("epc") +\ > > transit.setParseAction(validateTransitNumber).setResultsName("transit") > + \ > onus.setResultsName("onus") + \ > Optional(amt,default=NoAmountGiven).setResultsName("amt") > + \ > stringEnd > > import re > > def parseONUS(tokens): > tokens["csn"] = "" > tokens["tpc"] = "" > tokens["account"] = "" > tokens["amt"] = tokens["amt"][0] > onus = tokens.onus > # remainder omitted out of respect for newsreaders... > # suffice to say that unspeakable acts are performed on > # onus and aux_onus fields to extract account and > # check numbers > > micrdata.setParseAction(parseONUS) > > testdata = file("checks.csv").readlines()[1:] > tests = [(flds[1],flds) for flds in map(lambda > l:l.split(","),testdata)] > def verifyResults(res,csv): > def match(x,y): > print (x==y and "_" or "X"),x,"=",y > > Ex,MICR,Bank,Stat,Amt,AS,TPC,TS,CSN,CS,ACCT,AS,EPC,ES,ONUS,OS,AUX,AS,Tran,TS > = csv > match(res.amt,Amt) > match(res.account,ACCT) > match(res.csn,CSN) > match(res.onus,ONUS) > match(res.tpc,TPC) > match(res.epc,EPC) > match(res.transit,Tran) > > for t,data in tests: > print t > try: > res = micrdata.parseString(t) > print res.dump() > if not(data[0] == "No"): > print "Passed expression that should have failed" > verifyResults(res,data) > except ParseException,pe: > print "<parse failed> %s" % pe.msg > if not(data[0] == "Yes"): > print "Failed expression that should have passed" > print
Great, thanks for taking a look Paul. I had never tried to use pyparsing before. Yea, the ONUS field is crazy, don't know why there is no standard for it. -- http://mail.python.org/mailman/listinfo/python-list