Mag Gam wrote: > I am in the process of reading a zipped file which is about 6gb. > > I would like to know if there is a command similar to grep in python > because I would like to emulate, -A -B option of GNU grep. > > Lets say I have this, > > 083828.441,AA > 093828.441,AA > 094028.441,AA > 094058.441,CC > 094828.441,AA > 103828.441,AA > 123828.441,AA > > > if I do grep -A2 -B2 "CC" > > I get 2 lines before and 2 lines after "C" > > Is there an easy way to do this in python?
from itertools import islice, groupby from collections import deque def grep(instream, ismatch, before, after): items_before = None for key, group in groupby(instream, ismatch): if key: if items_before is not None: for item in items_before: yield "before", item else: items_before = not None # ;) for item in group: yield "match", item else: if items_before is not None: for item in islice(group, after): yield "after", item items_before = deque(group, maxlen=before) def demo1(): with open(__file__) as instream: for state, (index, line) in grep(enumerate(instream, 1), ismatch=lambda (i, s): "item" in s, before=2, after=2): print "%3d %-6s %s" % (index, state + ":", line), def demo2(): from StringIO import StringIO import csv lines = StringIO("""\ 083828.441,AA 093828.441,AA 094028.441,AA 094058.441,CC 094828.441,AA 103828.441,AA 123828.441,AA """) rows = csv.reader(lines) for state, row in grep(rows, lambda r: r[-1] == "CC", 1, 2): print row if __name__ == "__main__": demo1() demo2() Probably too slow; badly needs testing. Peter -- http://mail.python.org/mailman/listinfo/python-list