> Code is below, The files are about 5mb and 230,000 rows. When I have 43 > files of them and when I get to the 35th (reading it in) my system gets so > slow that it is nearly functionless. I am on a mac and activity monitor > shows that python is using 2.99GB of memory (of 4GB). (python 2.6 64bit). > The getsizeof() returns 6424 bytes for the alldata . So I am not sure what > is happening. > Any ideas > Thanks > > import csv, os, glob import sys
def read_data_file(filename): reader = csv.reader(open(filename, "U"),delimiter='\t') data = [] mask = [] outliers = [] modified = [] data_append = data.append mask_append = mask.append outliers_append = outliers.append modified_append = modified.append maskcount = 0 outliercount = 0 modifiedcount = 0 for row in reader: if '[MASKS]' in row: maskcount += 1 if '[OUTLIERS]' in row: outliercount += 1 if '[MODIFIED]' in row: modifiedcount += 1 if not any((maskcount, outliercount, modifiedcount, not row)): data_append(row) elif not any((outliercount, modifiedcount, not row)): mask_append(row) elif not any((modifiedcount, not row)): outliers_append(row) else: if row: modified_append(row) data = data[1:] mask = mask[3:] outliers = outliers[3:] modified = modified[3:] return [data, mask, outliers, modified] def ImportDataFrom(folder): print 'Importing files from: ', folder alldata = dict() infolder = glob.glob( os.path.join(folder, '*.txt') ) numfiles = len(infolder) print 'Importing ' + str(numfiles) + ' files from: ', folder for infile in infolder: print "Loading into memory: " + os.path.split(infile)[1] fname = os.path.split(infile)[1] filedata = dict(zip([fname + '_data', fname + '_mask', fname + '_outliers', fname+'_modified'], read_data_file(infile))) print fname + ' has ' + str(len(filedata[fname + '_data'])) + ' rows of data' print fname + ' has ' + str(len(filedata[fname + '_mask'])) + ' rows of masked data' print fname + ' has ' + str(len(filedata[fname + '_outliers'])) + ' rows of outliers' print fname + ' has ' + str(len(filedata[fname +'_modified'])) + ' modified rows of data' print str(sys.getsizeof(filedata)) +'bytes'' of memory used for '+ fname print ' ' alldata.update(filedata) print str(len(alldata)/4) + ' files of ' + str(numfiles) + ' using ' + str(sys.getsizeof(alldata)) + ' bytes of memory' return alldata ImportDataFrom("/Users/vmd/Dropbox/dna/data/rawdata")
-- http://mail.python.org/mailman/listinfo/python-list