Claudio Grondi wrote: > Note: code below is intended to help to clarify things only, > so that a bunch of examples can be tested. > If you need bugfree production quality code, maybe > someone else can provide it.
Still not tested enough to ensure that it's bug free, but more concise. Here's one the implements the algorithm directly and another that uses a regexp. The latter should be the preferred approach. My intent was that the algorithm implements the given pattern so they should given identical results. # Doing the work ourselves def find_spread_substring(query, target, limit = None): stack = [] ti = qi = 0 Nq = len(query) Nt = len(target) delta = 0 while ti < Nt: # We have a match if query[qi] == target[ti]: stack.append( (qi, ti, delta) ) qi = qi + 1 if qi == Nq: return [ti for (qi, ti, delta) in stack] ti = ti + 1 delta = 0 else: # No match while 1: # If we have a partial match, check if we've # gone over the limit. if stack: delta = delta + 1 if limit is not None and delta > limit: # backtrack, treating it as an invalid match # (so retry this 'else:' block) qi, ti, delta = stack.pop() continue # No backtracking needed break # Advance to check the next character in the target ti = ti + 1 # Failure return None # Using regular expressions import re def find_spread_substring2(query, target, limit = None): if limit is None: template = "(%s).*?" else: template = "(%%s).{,%d}?" % (limit,) terms = [template % c for c in query] pattern = "".join(terms) pat = re.compile(pattern) m = pat.search(target) if not m: return None return [m.start(i) for i in range(1, len(query)+1)] def test(): for (q, t, limit, is_valid) in ( ("1010", "10001001", None, True), ("1010", "100011", None, False), ("1010", "100010", 3, True), ("1010", "100010", 1, True), ("1010", "1000010", 1, False), ("1010", "01000010", 2, True), ("1010", "01000010", 1, False), ("1010", "0100000", None, False), ): result = find_spread_substring(q, t, limit) result2 = find_spread_substring2(q, t, limit) if result != result2: raise AssertionError( (result, result2) ) if result is not None: if limit is not None: # check that it's a proper subset for (x, y) in zip(result[:-1], result[1:]): # +1 because 'limit' is the maximum gap size if (y-x) > limit+1: raise AssertionError((q, t, limit, result, x, y)) s = "".join([t[i] for i in result]) if s != q: raise AssertionError((q, t, limit, result, s)) if result is None and not is_valid: pass elif result is not None and is_valid: pass else: raise AssertionError( (q, t, limit, is_valid, result) ) if __name__ == "__main__": test() print "All tests passed." Andrew [EMAIL PROTECTED] -- http://mail.python.org/mailman/listinfo/python-list