> def category_iterator(source): > source = iter(source) > try: > while True: > item = source.next()
This gave me a lot of inspiration. After a couple of days of banging my head against the wall, I finally figured out a code that could attach headers, titles, numbers, and categories in their appropriate combinations--basically one BIG logic puzzle. It's not the prettiest thing in the world, but it works. If anyone has a better way to do it, then I'll be all ears. Anyways, thank you all for your input, it helped me think outside the box. import re data = ['RULES', 'Approval and Promulgation of Air Quality Implementation Plans:', 'Illinois; Revisions to Emission Reduction Market System, ', '11042 [E8-3800]', 'E8-3800.pdf', 'Ohio; Oxides of Nitrogen Budget Trading Program; Correction, ', '11192 [Z8-2506]', 'Z8-2506.pdf', 'NOTICES', 'Agency Information Collection Activities; Proposals, Submissions, and Approvals, ', '11108-11110 [E8-3934]', 'E8-3934.pdf', 'Data Availability for Lead National Ambient Air Quality Standard Review, ', '11110-11111 [E8-3935]', 'E8-3935.pdf', 'Environmental Impacts Statements; Notice of Availability, ', '11112 [E8-3917]', 'E8-3917.pdf'] NOTICES = re.compile(r'NOTICES') RULES = re.compile(r'RULES') TITLE = re.compile(r'[A-Z][a-z].*') NUM = re.compile(r'\d.*') PDF = re.compile(r'.*\.pdf') counted = [] sorted = [] title = [] tot = len(data) x=0 while x < tot: try: item = data[x] title = [] if NOTICES.match(item) or RULES.match(item): module = item header = '' if TITLE.match(data[x+1]) and TITLE.match(data[x+2]) and NUM.match(data[x+3]): #Header header = data[x+1] counted.append(data[x+1]) sorted.append(data[x+1]) #Title counted.append(data[x+2]) sorted.append(data[x+2]) #Number counted.append(data[x+3]) sorted.append(data[x+3]) title.append(''.join(sorted)) print title, module print sorted = [] x+=1 elif TITLE.match(data[x+1]) and NUM.match(data[x+2]): #Title counted.append(data[x+1]) sorted.append(data[x+1]) #Number counted.append(data[x+2]) sorted.append(data[x+2]) title.append(''.join(sorted)) print title, module print sorted = [] x+=1 else: print item, "strange1" break x+=1 else: if item in counted: x+=1 elif PDF.match(item): x+=1 elif TITLE.match(data[x]) and TITLE.match(data[x+1]) and NUM.match(data[x+2]): #Header header = data[x] counted.append(data[x]) sorted.append(data[x]) #Title counted.append(data[x+1]) sorted.append(data[x+1]) #Number counted.append(data[x+2]) sorted.append(data[x+2]) title.append(''.join(sorted)) sorted = [] print title, module print x+=1 elif TITLE.match(data[x]) and NUM.match(data[x+1]): #Title sorted.append(header) counted.append(data[x]) sorted.append(data[x]) #Number counted.append(data[x+1]) sorted.append(data[x+1]) title.append(''.join(sorted)) sorted = [] print title, module print x+=1 else: print item, "strange2" x+=1 break except IndexError: break -- http://mail.python.org/mailman/listinfo/python-list