I extracted a table from a PDF so the data is quite messy and the data that should be in 1 row is in 3 colums, like so: year color location 1 1997 blue, MD 2 green, 3 and yellow
SO far my code is below, but I know I am missing data I am just not sure what to put in it: # Simply read and split an example Table 4 import sys # Assigning count number and getting rid of right space def main(): count = 0 pieces = [] for line in open(infile, 'U'): if count < 130: data = line.replace('"', '').rstrip().split("\t") data = clean_data(data) if data[1] == "year" and data[1] != "": write_pieces(pieces) pieces = data str.join(pieces) else: for i in range(len(data)): pieces[i] = pieces[i] + data[i] str.join(pieces) # Executing command to remove right space def clean_data(s): return [x.rstrip() for x in s] def write_pieces(pieces): print if __name__ == '__main__': infile = "file.txt" main() -- https://mail.python.org/mailman/listinfo/python-list