I have programs that do lots of string-to-string replacements, so I'm trying to create a speedy implementation (tons of .replace statements has become unwieldy). My MultiReplace object does as well as the function regexp, which both do better than the for loop function, any other suggestions?
def multi_repl(text, subs): for ori, sub in subs: text = text.replace(ori, sub) return text import string latex_esc_dic = dict(latex_esc) latex_esc_ori, latex_esc_rep = zip(*latex_esc) def symbol_replace(match, get=latex_esc_dic.get): return get(match.group(1), "") symbol_pattern = re.compile( "(" + string.join(map(re.escape, latex_esc_ori), "|") + ")" ) class MultiReplace(object): """ Replace multiple instances from a list of ori/rep pairs. I use an object for performance: compiled regexes persist. Table is a list of pairs, I have to convert to dict for regex replace function, don't use a dict natively since they aren't ordered. """ def __init__(self, table): print "initing object" self.originals, self.replacements = zip(*table) self.pattern = re.compile( "(" + string.join(map(re.escape, self.originals), "|") + ")" ) self.table_dic = dict(table) def _get_replacement(self, match): # passed match #print "replacing %s with %s" % (match.group(1), self.table_dic.get(match.group(1), "")) return self.table_dic.get(match.group(1), "") # use match to return replacement def replace(self, line): return self.pattern.sub(self._get_replacement, line) # pass replacement function mr = MultiReplace(latex_esc) ... #line = multi_repl(line, latex_esc) # 0.406 #line = symbol_pattern.sub(symbol_replace, line) #0.385 line = mr.replace(line) #0.385 -- http://mail.python.org/mailman/listinfo/python-list