On Friday, 4 July 2014 14:12:15 UTC+10, flebber wrote: > I have taken the code and gone a little further, but I need to be able to > protect myself against commas and single quotes in names. > > > > How is it the best to do this? > > > > so in my file I had on line 44 this trainer name. > > > > "Michael, Wayne & John Hawkes" > > > > and in line 95 this horse name. > > Inz'n'out > > > > this throws of my capturing correct item 9. How do I protect against this? > > > > Here is current code. > > > > import re > > from sys import argv > > SCRIPT, FILENAME = argv > > > > > > def out_file_name(file_name): > > """take an input file and keep the name with appended _clean""" > > file_parts = file_name.split(".",) > > output_file = file_parts[0] + '_clean.' + file_parts[1] > > return output_file > > > > > > def race_table(text_file): > > """utility to reorganise poorly made csv entry""" > > input_table = [[item.strip(' "') for item in record.split(',')] > > for record in text_file.splitlines()] > > # At this point look at input_table to find the record indices > > output_table = [] > > for record in input_table: > > if record[0] == 'Meeting': > > meeting = record[3] > > elif record[0] == 'Race': > > date = record[13] > > race = record[1] > > elif record[0] == 'Horse': > > number = record[1] > > name = record[2] > > results = record[9] > > res_split = re.split('[- ]', results) > > starts = res_split[0] > > wins = res_split[1] > > seconds = res_split[2] > > thirds = res_split[3] > > prizemoney = res_split[4] > > trainer = record[4] > > location = record[5] > > print(name, wins, seconds) > > output_table.append((meeting, date, race, number, name, > > starts, wins, seconds, thirds, prizemoney, > > trainer, location)) > > return output_table > > > > MY_FILE = out_file_name(FILENAME) > > > > # with open(FILENAME, 'r') as f_in, open(MY_FILE, 'w') as f_out: > > # for line in race_table(f_in.readline()): > > # new_row = line > > with open(FILENAME, 'r') as f_in, open(MY_FILE, 'w') as f_out: > > CONTENT = f_in.read() > > # print(content) > > FILE_CONTENTS = race_table(CONTENT) > > # print new_name > > f_out.write(str(FILE_CONTENTS)) > > > > > > if __name__ == '__main__': > > pass
So I found this on stack overflow In [2]: import string In [3]: identity = string.maketrans("", "") In [4]: x = ['+5556', '-1539', '-99', '+1500'] In [5]: x = [s.translate(identity, "+-") for s in x] In [6]: x Out[6]: ['5556', '1539', '99', '1500'] but it fails in my file, due to I believe mine being a list of list. Is there an easy way to iterate the sublists without flattening? Current code. input_table = [[item.strip(' "') for item in record.split(',')] for record in text_file.splitlines()] # At this point look at input_table to find the record indices identity = string.maketrans("", "") print(input_table) input_table = [s.translate(identity, ",'") for s in input_table] Sayth -- https://mail.python.org/mailman/listinfo/python-list