On Sunday, 5 July 2015 10:23:17 UTC+10, Sayth Renshaw wrote: > I was playing with odo(blaze http://blaze.pydata.org/en/latest/) and wanted > to use it with a current script I have been using on the command line. > > So my 2 scripts are below, I will explain here hopefully to keep question > clearer what I have done. Script 2 works for me from the command line as > > python clean.py some.csv > > i wanted to use my script to fix up a bad csv and then use odo to a > dataframe and hopefully build upon this later. > > When I run script 1 I get the error I need more than 1 value to unpack, which > makes sense in that I have Script and Filename. > > ##Error### > C:\Users\sayth\Repos\Notebooks>python odo_test.py > Traceback (most recent call last): > File "odo_test.py", line 3, in <module> > import clean > File "C:\Users\sayth\Repos\Notebooks\clean.py", line 9, in <module> > SCRIPT, FILENAME = argv > ValueError: need more than 1 value to unpack > > But if I change script2 to have just FILENAME = argv I get this error and I > am not sure what to do. > > ##Error### > C:\Users\sayth\Repos\Notebooks>python odo_test.py > Traceback (most recent call last): > File "odo_test.py", line 3, in <module> > import clean > File "C:\Users\sayth\Repos\Notebooks\clean.py", line 62, in <module> > MY_FILE = out_file_name(FILENAME) > File "C:\Users\sayth\Repos\Notebooks\clean.py", line 15, in out_file_name > file_parts = file_name.split(".",) > AttributeError: 'list' object has no attribute 'split' > > What can i do? > > ######Scripts ####### > > # Script 1 > > from odo import odo > import pandas as pd > import clean > > print(argv) > myFile = race_table('20150704RHIL0.csv') > > > odo(myFile, pd.DataFrame) > > # Script 2 > > import csv > import re > from sys import argv > SCRIPT, FILENAME = argv > #FILENAME = argv > > > def out_file_name(file_name): > """take an input file and keep the name with appended _clean""" > file_parts = file_name.split(".",) > output_file = file_parts[0] + '_clean.' + file_parts[1] > return output_file > > > def race_table(text_file): > """utility to reorganise poorly made csv entry""" > output_table = [] > for record in text_file: > if record[0] == 'Meeting': > meeting = record[3] > rail = record[6] > weather = record[7] > track = record[8] > elif record[0] == 'Race': > date = record[13] > race = record[1] > benchmark = record[4] > distance = record[5] > elif record[0] == 'Horse': > number = record[1] > name = record[2] > jockey = record[6] > barrier = record[7] > weight = record[8] > results = record[9] > res_split = re.split('[- ]', results) > starts = res_split[0] > wins = res_split[1] > seconds = res_split[2] > thirds = res_split[3] > try: > prizemoney = res_split[4] > except IndexError: > prizemoney = 0 > trainer = record[4] > location = record[5] > b_rating = record[15] > sex = record[16] > print(name, wins, seconds) > output_table.append((meeting, date, rail, weather, track, > distance, > benchmark, race, number, name, sex, b_rating, > weight, barrier, starts, wins, seconds, > thirds, prizemoney, trainer, location, jockey > )) > return output_table > > MY_FILE = out_file_name(FILENAME) > > # with open(FILENAME, 'r') as f_in, open(MY_FILE, 'w') as f_out: > # for line in race_table(f_in.readline()): > # new_row = line > with open(FILENAME, 'r') as f_in, open(MY_FILE, 'w') as f_out: > CONTENT = csv.reader(f_in) > # print(content) > FILE_CONTENTS = race_table(CONTENT) > # print new_name > # f_out.write(str(FILE_CONTENTS)) > headers = ['MEETING', 'DATE', 'RAIL', 'WEATHER', 'TRACK', 'DISTANCE', > 'BENCHMARK', 'RACE', 'NUMBER', 'NAME', 'SEX', 'B_RATING', > 'WEIGHT', 'BARRIER', 'STARTS', 'WINS', 'SECONDS', 'THIRDS', > 'PRIZEMONEY', 'TRAINER', 'LOCATION', 'JOCKEY'] > > f_csv = csv.writer(f_out) > f_csv.writerow(headers) > f_csv.writerows(FILE_CONTENTS) > > > # Old implementation for reference > # input_table = [[item.strip(' "') for item in record.split(',')] > # for record in text_file.splitlines()] > # At this point look at input_table to find the record indices > # identity = string.maketrans("", "") > # print(input_table) > # input_table = [s.translate(identity, ",'") for s > # in input_table] > > if __name__ == '__main__': > main() > > > many thanks for your time. > > Sayth
Solved it, well to sum extent by putting the whole clean.py into a function that called the others and then just called that. Sayth -- https://mail.python.org/mailman/listinfo/python-list