> [...] > the program works great except for one thing. It's significantly > slower through the later files in the search then through the early > ones... Before anyone criticizes, I recognize that that middle section > could be simplified with a for loop... I just haven't cleaned it > up... > > The problem is that the first 300 files take about 10-15 seconds and > the last 300 take about 2 minutes... If we do more than about 1500 > files in one run, it just hangs up and never finishes... > > Is there a solution here that I'm missing? What am I doing that is so > inefficient?
The only thing I see is that you compile all of the RE's every time you call replacecycle(). They really only need to be compiled once, but I don't know why that would cause the progressive slowing. FWIW, it seems to me like a shell+sed script would be the obvious solution to the problem. > # File: masseditor.py > > import re > import os > import time > > def massreplace(): > editfile = open("pathname\editfile.txt") > filestring = editfile.read() > filelist = filestring.splitlines() > ## errorcheck = re.compile('(a name=)+(.*)(-)+(.*)(></a>)+') > for i in range(len(filelist)): > source = open(filelist[i]) > starttext = source.read() > interimtext = replacecycle(starttext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > interimtext = replacecycle(interimtext) > finaltext = replacecycle(interimtext) > source.close() > source = open(filelist[i],"w") > source.write(finaltext) > source.close() > ## if errorcheck.findall(finaltext)!=[]: > ## print errorcheck.findall(finaltext) > ## print filelist[i] > if i == 100: > print "done 100" > print time.clock() > elif i == 300: > print "done 300" > print time.clock() > elif i == 600: > print "done 600" > print time.clock() > elif i == 1000: > print "done 1000" > print time.clock() > print "done" > print i > print time.clock() > > def replacecycle(starttext): > p1= re.compile('(href=|HREF=)+(.*)(#)+(.*)( )+(.*)(">)+') > p2= re.compile('(name=")+(.*)( )+(.*)(">)+') > p3= re.compile('(href=|HREF=)+(.*)(#)+(.*)(\')+(.*)(">)+') > p4= re.compile('(name=")+(.*)(\')+(.*)(">)+') > p5= re.compile('(href=|HREF=)+(.*)(#)+(.*)(-)+(.*)(">)+') > p6= re.compile('(name=")+(.*)(-)+(.*)(">)+') > p7= re.compile('(href=|HREF=)+(.*)(#)+(.*)(<)+(.*)(">)+') > p8= re.compile('(name=")+(.*)(<)+(.*)(">)+') > p7= re.compile('(href=|HREF=")+(.*)(#)+(.*)(:)+(.*)(">)+') > p8= re.compile('(name=")+(.*)(:)+(.*)(">)+') > p9= re.compile('(href=|HREF=")+(.*)(#)+(.*)(\?)+(.*)(">)+') > p10= re.compile('(name=")+(.*)(\?)+(.*)(">)+') > p100= re.compile('(a name=)+(.*)(-)+(.*)(></a>)+') > q1= r"\1\2\3\4_\6\7" > q2= r"\1\2_\4\5" > interimtext = p1.sub(q1, starttext) > interimtext = p2.sub(q2, interimtext) > interimtext = p3.sub(q1, interimtext) > interimtext = p4.sub(q2, interimtext) > interimtext = p5.sub(q1, interimtext) > interimtext = p6.sub(q2, interimtext) > interimtext = p7.sub(q1, interimtext) > interimtext = p8.sub(q2, interimtext) > interimtext = p9.sub(q1, interimtext) > interimtext = p10.sub(q2, interimtext) > interimtext = p100.sub(q2, interimtext) > > return interimtext > > massreplace() > -- Grant Edwards grante Yow! Are you still at SEXUALLY ACTIVE? Did you visi.com BRING th' REINFORCEMENTS? -- http://mail.python.org/mailman/listinfo/python-list