I wrote the following simple program to loop through our help files and fix some errors (in case you can't see the subtle RE search that's happening, we're replacing spaces in bookmarks with _'s)
the program works great except for one thing. It's significantly slower through the later files in the search then through the early ones... Before anyone criticizes, I recognize that that middle section could be simplified with a for loop... I just haven't cleaned it up... The problem is that the first 300 files take about 10-15 seconds and the last 300 take about 2 minutes... If we do more than about 1500 files in one run, it just hangs up and never finishes... Is there a solution here that I'm missing? What am I doing that is so inefficient? # File: masseditor.py import re import os import time def massreplace(): editfile = open("pathname\editfile.txt") filestring = editfile.read() filelist = filestring.splitlines() ## errorcheck = re.compile('(a name=)+(.*)(-)+(.*)(></a>)+') for i in range(len(filelist)): source = open(filelist[i]) starttext = source.read() interimtext = replacecycle(starttext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) interimtext = replacecycle(interimtext) finaltext = replacecycle(interimtext) source.close() source = open(filelist[i],"w") source.write(finaltext) source.close() ## if errorcheck.findall(finaltext)!=[]: ## print errorcheck.findall(finaltext) ## print filelist[i] if i == 100: print "done 100" print time.clock() elif i == 300: print "done 300" print time.clock() elif i == 600: print "done 600" print time.clock() elif i == 1000: print "done 1000" print time.clock() print "done" print i print time.clock() def replacecycle(starttext): p1= re.compile('(href=|HREF=)+(.*)(#)+(.*)( )+(.*)(">)+') p2= re.compile('(name=")+(.*)( )+(.*)(">)+') p3= re.compile('(href=|HREF=)+(.*)(#)+(.*)(\')+(.*)(">)+') p4= re.compile('(name=")+(.*)(\')+(.*)(">)+') p5= re.compile('(href=|HREF=)+(.*)(#)+(.*)(-)+(.*)(">)+') p6= re.compile('(name=")+(.*)(-)+(.*)(">)+') p7= re.compile('(href=|HREF=)+(.*)(#)+(.*)(<)+(.*)(">)+') p8= re.compile('(name=")+(.*)(<)+(.*)(">)+') p7= re.compile('(href=|HREF=")+(.*)(#)+(.*)(:)+(.*)(">)+') p8= re.compile('(name=")+(.*)(:)+(.*)(">)+') p9= re.compile('(href=|HREF=")+(.*)(#)+(.*)(\?)+(.*)(">)+') p10= re.compile('(name=")+(.*)(\?)+(.*)(">)+') p100= re.compile('(a name=)+(.*)(-)+(.*)(></a>)+') q1= r"\1\2\3\4_\6\7" q2= r"\1\2_\4\5" interimtext = p1.sub(q1, starttext) interimtext = p2.sub(q2, interimtext) interimtext = p3.sub(q1, interimtext) interimtext = p4.sub(q2, interimtext) interimtext = p5.sub(q1, interimtext) interimtext = p6.sub(q2, interimtext) interimtext = p7.sub(q1, interimtext) interimtext = p8.sub(q2, interimtext) interimtext = p9.sub(q1, interimtext) interimtext = p10.sub(q2, interimtext) interimtext = p100.sub(q2, interimtext) return interimtext massreplace() -- http://mail.python.org/mailman/listinfo/python-list