Hello All, I am still having trouble with memory errors when I try to process many netcdf files. Originally I would get the memory error as mentioned in the previous post but when I added gc.collect() after each for loop I receive the error: GEOS_ERROR: bad allocation with no additional information! The error use to occur at the point when a new netcdf file was to be opened and plotted but with the things I have 'fixed' thanks to suggestions from this list it seems to happen while processing the second file. I am just trying to plot 3hourly data for each file and each file contains hourly data for a month and I am trying to do this for many months. It seems like I cannot close down the last file properly so the computer has a clean memory to start the next one. Any feedback will be greatly appreciated. My latest version of the code:
###################### from netCDF4 import Dataset import numpy as N import matplotlib.pyplot as plt from numpy import ma as MA from mpl_toolkits.basemap import Basemap from netcdftime import utime from datetime import datetime import os shapefile1="E:/DSE_BushfireClimatologyProject/griddeddatasamples/test_GIS/DSE_REGIONS" OutputFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/OutputsforValidation" def plotrawdata(variable): if variable=='TSFC': ncvariablename='T_SFC' MainFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/T_SFC/" ticks=[-5,0,5,10,15,20,25,30,35,40,45,50] Title='Surface Temperature' cmap=plt.cm.jet elif variable=='RHSFC': ncvariablename='RH_SFC' MainFolder=r"E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/RH_SFC/" ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] Title='Surface RH' cmap=plt.cm.jet_r fileforlatlon=Dataset("E:/DSE_BushfireClimatologyProject/griddeddatasamples/GriddedData/InputsforValidation/T_SFC/TSFC_1974_01/IDZ00026_VIC_ADFD_T_SFC.nc", 'r+', 'NETCDF4') LAT=fileforlatlon.variables['latitude'][:] LON=fileforlatlon.variables['longitude'][:] startperiod=raw_input("Start slice (e.g. 1 ): ") endperiod=raw_input("End slice (e.g. 2): ") skipperiod=raw_input("skip slice (e.g. 1): ") if startperiod == "": startperiod = None else: startperiod = int(startperiod) if endperiod == "": endperiod = None else: endperiod = int(endperiod) if skipperiod == "": skipperiod = None else: skipperiod= int(skipperiod) for (path, dirs, files) in os.walk(MainFolder): for dir in dirs: print dir path=path+'/' for ncfile in files: if ncfile[-3:]=='.nc': print "dealing with ncfiles:", path+ncfile ncfile=os.path.join(path,ncfile) ncfile=Dataset(ncfile, 'r+', 'NETCDF4') #global TSFC variable=ncfile.variables[ncvariablename][startperiod:endperiod:skipperiod] TIME=ncfile.variables['time'][startperiod:endperiod:skipperiod] fillvalue=ncfile.variables[ncvariablename]._FillValue ncfile.close() for variable, TIME in zip((variable[:]),(TIME[:])): #for variable, TIME in zip((variable[sliceperiod]),(TIME[sliceperiod])): cdftime=utime('seconds since 1970-01-01 00:00:00') ncfiletime=cdftime.num2date(TIME) print ncfiletime timestr=str(ncfiletime) d = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') date_string = d.strftime('%Y%m%d_%H%M') #Set up basemap using mercator projection http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html map = Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=-33, llcrnrlon=139.0,urcrnrlon=151.0,lat_ts=0,resolution='i') x,y=map(*N.meshgrid(LON,LAT)) map.drawcoastlines(linewidth=0.5) map.readshapefile(shapefile1, 'DSE_REGIONS') map.drawstates() plt.title(Title+' %s UTC'%ncfiletime) CS = map.contourf(x,y,variable, ticks, cmap=cmap) l,b,w,h =0.1,0.1,0.8,0.8 cax = plt.axes([l+w+0.025, b, 0.025, h], ) cbar=plt.colorbar(CS, cax=cax, drawedges=True) #save map as *.png and plot netcdf file plt.savefig((os.path.join(OutputFolder, ncvariablename+date_string+'UTC.png'))) #plt.show() plt.close() ###################### On Wed, Sep 14, 2011 at 4:08 PM, questions anon <questions.a...@gmail.com>wrote: > Hello All, > I keep coming across a memory error when processing many netcdf files. I > assume it has something to do with how I loop things and maybe need to close > things off properly. > In the code below I am looping through a bunch of netcdf files (each file > is hourly data for one month) and within each netcdf file I am outputting a > *png file every three hours. > This works for one netcdf file but when it begins to process the next > netcdf file I receive this memory error: > > *Traceback (most recent call last): > File > "d:/plot_netcdf_merc_multiplot_across_multifolders_mkdirs_memoryerror.py", > line 44, in <module> > TSFC=ncfile.variables['T_SFC'][:] > File "netCDF4.pyx", line 2473, in netCDF4.Variable.__getitem__ > (netCDF4.c:23094) > MemoryError* > > To reduce processing requirements I have tried making the LAT and LON to > only use [0] but I also receive an error: > > *Traceback (most recent call last): > File > "d:/plot_netcdf_merc_multiplot_across_multifolders_mkdirs_memoryerror.py", > line 75, in <module> > x,y=map(*N.meshgrid(LON,LAT)) > File "C:\Python27\lib\site-packages\numpy\lib\function_base.py", line > 3256, in meshgrid > numRows, numCols = len(y), len(x) # yes, reversed > TypeError: len() of unsized object* > > finally I have added gc.collect() in a couple of places but that doesn't > seem to do anything to help. > I am using :*Python 2.7.2 |EPD 7.1-2 (32-bit)| (default, Jul 3 2011, > 15:13:59) [MSC v.1500 32 bit (Intel)] on win32* > Any feedback will be greatly appreciated! > > > from netCDF4 import Dataset > import numpy > import numpy as N > import matplotlib.pyplot as plt > from numpy import ma as MA > from mpl_toolkits.basemap import Basemap > from netcdftime import utime > from datetime import datetime > import os > import gc > > print "start processing...." > > inputpath=r'E:/GriddedData/Input/' > outputpath=r'E:/GriddedData/Validation/' > shapefile1="E:/test_GIS/DSE_REGIONS" > for (path, dirs, files) in os.walk(inputpath): > for dir in dirs: > print dir > sourcepath=os.path.join(path,dir) > relativepath=os.path.relpath(sourcepath,inputpath) > newdir=os.path.join(outputpath,relativepath) > if not os.path.exists(newdir): > os.makedirs(newdir) > > for ncfile in files: > if ncfile[-3:]=='.nc': > print "dealing with ncfiles:", ncfile > ncfile=os.path.join(sourcepath,ncfile) > #print ncfile > ncfile=Dataset(ncfile, 'r+', 'NETCDF4') > TSFC=ncfile.variables['T_SFC'][:,:,:] > TIME=ncfile.variables['time'][:] > LAT=ncfile.variables['latitude'][:] > LON=ncfile.variables['longitude'][:] > fillvalue=ncfile.variables['T_SFC']._FillValue > TSFC=MA.masked_values(TSFC, fillvalue) > ncfile.close() > gc.collect() > print "garbage collected" > > > for TSFC, TIME in zip((TSFC[1::3]),(TIME[1::3])): > print TSFC, TIME > #convert time from numbers to date and prepare it to have no > symbols for saving to filename > cdftime=utime('seconds since 1970-01-01 00:00:00') > ncfiletime=cdftime.num2date(TIME) > print ncfiletime > timestr=str(ncfiletime) > d = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') > date_string = d.strftime('%Y%m%d_%H%M') > > #Set up basemap using mercator projection > http://matplotlib.sourceforge.net/basemap/doc/html/users/merc.html > map = > Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=-33, > > llcrnrlon=139.0,urcrnrlon=151.0,lat_ts=0,resolution='i') > > # compute map projection coordinates for lat/lon grid. > x,y=map(*N.meshgrid(LON,LAT)) > map.drawcoastlines(linewidth=0.5) > map.readshapefile(shapefile1, 'DSE_REGIONS') > map.drawstates() > > plt.title('Surface temperature at %s UTC'%ncfiletime) > ticks=[-5,0,5,10,15,20,25,30,35,40,45,50] > CS = map.contourf(x,y,TSFC, ticks, cmap=plt.cm.jet) > l,b,w,h =0.1,0.1,0.8,0.8 > cax = plt.axes([l+w+0.025, b, 0.025, h], ) > cbar=plt.colorbar(CS, cax=cax, drawedges=True) > > #save map as *.png and plot netcdf file > > plt.savefig((os.path.join(newdir,'TSFC'+date_string+'UTC.png'))) > plt.close() > gc.collect() > print "garbage collected again" > print "end of processing" > > >
-- http://mail.python.org/mailman/listinfo/python-list