Roy Mendelssohn Tue, 28 Aug 2012 09:07:32 -0700 > here is the relevant section from your code:
> > netcdf.file <- nc_create( > > filename=netcdf.fn, > > # vars=list(emis.var), > > # verbose=TRUE) > > vars=list(emis.var)) > > # Write data to data variable: gotta have file first. > > # Gotta convert 2d global.emis.mx[lat,lon] to 3d > > global.emis.arr[time,lat,lon] > > # Do this before adding _FillValue to prevent: > > # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or = > _FillValue type mismatch > > ## global.emis.arr <- global.emis.mx > > ## dim(global.emis.arr) <- c(1, dim(global.emis.mx)) > > ## global.emis.arr[1,,] <- global.emis.mx > > # Note > > # * global.emis.mx[lat,lon] > > # * datavar needs [lon, lat, time] (with time *last*) > > ncvar_put( > > nc=netcdf.file, > > varid=emis.var, > > # vals=global.emis.arr, > > vals=t(global.emis.mx), > > # start=rep.int(1, length(dim(global.emis.arr))), > > start=c(1, 1, 1), > > # count=dim(global.emis.arr)) > > count=c(-1,-1, 1)) # -1 -> all data > You can't write until all dimensions have been defined, and all > variables defined. But in fact, that's only a part of the code ... which omits the prior dimension and variable definitions :-( See the current version @ https://github.com/TomRoche/GEIA_to_netCDF/blob/c380c0a28dc8c71dbf0c2ba18130a2439a4fe089/GEIA.to.netCDF.r I've also attached that (quoted) following my .sig, with the top-most constant and function declarations removed for brevity. The dimension definitions are prefixed with '1', the variable definition is prefixed with '2'. HTH, Tom Roche <tom_ro...@pobox.com> current GEIA.to.netCDF.r code block follows to end of post------------ # code---------------------------------------------------------------- > # process input > library(maps) # on tlrPanP5 as well as clusters > # input file path > GEIA.emis.txt.fp <- sprintf('%s/%s', GEIA.emis.txt.dir, GEIA.emis.txt.fn) > # columns are grid#, mass > GEIA.emis.mx <- > as.matrix(read.table(GEIA.emis.txt.fp, skip=GEIA.emis.txt.n.header)) > # mask zeros? no, use NA for non-ocean areas > # GEIA.emis.mx[GEIA.emis.mx == 0] <- NA > # <simple input check> > dim(GEIA.emis.mx) ## [1] 36143 2 > # start debug > GEIA.emis.mx.rows <- dim(GEIA.emis.mx)[1] > if (GEIA.emis.mx.rows > GEIA.emis.grids.dim) { > cat(sprintf('ERROR: %s: GEIA.emis.mx.rows=%.0d > > GEIA.emis.grids.dim=%.0d\n', > this.fn, GEIA.emis.mx.rows, GEIA.emis.grids.dim)) > } else { > cat(sprintf('debug: %s: GEIA.emis.mx.rows < GEIA.emis.grids.dim\n', > this.fn)) > } > # end debug > # </simple input check> > global.emis.vec <- > create.global.emissions.vector( > GEIA.emis.grids.dim, GEIA.emis.mx.rows, GEIA.emis.mx) > # <visual input check> > # Need sorted lat and lon vectors: we know what those are a priori > # Add 0.5 since grid centers > lon.vec <- 0.5 + > seq(from=grid.lon.degree.start, by=grid.lon.degree.per.cell, > length.out=GEIA.emis.lon.dim) > lat.vec <- 0.5 + > seq(from=grid.lat.degree.start, by=grid.lat.degree.per.cell, > length.out=GEIA.emis.lat.dim) > # Create emissions matrix corresponding to those dimensional vectors > # (i.e., global.emis.mx is the "projection" of global.emis.vec) > # First, create empty global.emis.mx? No, fill from global.emis.vec. > # Fill using byrow=T? or "bycol" == byrow=FALSE? (row=lat) > # I assigned (using lon.lat.vec.to.grid.index) > # "grid indices" (global.emis.vec.index values) > # "lon-majorly" (i.e., iterate over lats before incrementing lon), > # so we want to fill byrow=FALSE ... BUT, > # that will "fill from top" (i.e., starting @ 90N) and > # we want to "fill from bottom" (i.e., starting @ 90S) ... > # global.emis.mx <- matrix( > # global.emis.vec, nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim, > # # so flip/reverse rows/latitudes when done > # byrow=FALSE)[GEIA.emis.lat.dim:1,] > # NO: I cannot just fill global.emis.mx from global.emis.vec: > # latter's/GEIA's grid numbering system ensures 1000 lons per lat! > # Which overflows the "real-spatial" global.emis.mx :-( > # So I need to fill global.emis.mx using a for loop to decode the grid > indices :-( > # (but at least I can fill in whatever direction I want :-) > global.emis.mx <- matrix( > rep(NA, GEIA.emis.grids.dim), nrow=GEIA.emis.lat.dim, > ncol=GEIA.emis.lon.dim) > # 1: works if subsequently transposed: TODO: FIXME > for (i.lon in 1:GEIA.emis.lon.dim) { > for (i.lat in 1:GEIA.emis.lat.dim) { > # 2: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)' > # for (i.lat in 1:GEIA.emis.lat.dim) { > # for (i.lon in 1:GEIA.emis.lon.dim) { > # 3: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)' > # for (i.lon in GEIA.emis.lon.dim:1) { > # for (i.lat in GEIA.emis.lat.dim:1) { > # 4: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)' > # for (i.lat in GEIA.emis.lat.dim:1) { > # for (i.lon in GEIA.emis.lon.dim:1) { > lon <- lon.vec[i.lon] > lat <- lat.vec[i.lat] > GEIA.emis.grid.val <- > global.emis.vec[ > lon.lat.vec.to.grid.index(c(lon, lat), > n.lon=GEIA.emis.lon.dim, n.lat=GEIA.emis.lat.dim)] > if (!is.na(GEIA.emis.grid.val)) { > if (is.na(global.emis.mx[i.lat, i.lon])) { > global.emis.mx[i.lat, i.lon] <- GEIA.emis.grid.val > # start debug > # cat(sprintf( > # 'debug: %s: writing val=%f to global.emis.mx[%.0f,%.0f] for grid > center=[%f,%f]\n', > # this.fn, GEIA.emis.grid.val, i.lon, i.lat, lon, lat)) > # end debug > } else { > # error if overwriting presumably-previously-written non-NA! > cat(sprintf( > 'ERROR: %s: overwriting val=%f with val=%f at > global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n', > this.fn, global.emis.mx[i.lat, i.lon], GEIA.emis.grid.val, > i.lon, i.lat, lon, lat)) > return # TODO: abend > } # end testing target != NA (thus not previously written) > } # end testing source != NA (don't write if is.na(lookup) > } # end for loop over lats > } # end for loop over lons > # Now draw the damn thing > # 1: TODO: FIXME: why do I need to transpose global.emis.mx? > image(lon.vec, lat.vec, t(global.emis.mx)) > # 2,3,4: how it should work ?!? > # image(lon.vec, lat.vec, global.emis.mx) > map(add=TRUE) > # </visual input check> > # write output to netCDF > library(ncdf4) > # output file path (not currently used by package=ncdf4) > netcdf.fp <- sprintf('%s/%s', netcdf.dir, netcdf.fn) 1> # create dimensions and dimensional variables 1> time.vec <- c(0) # annual value, corresponding to no specific year 1> time.dim <- ncdim_def( 1> name=time.var.name, 1> units=time.var.units, 1> vals=time.vec, 1> unlim=TRUE, 1> create_dimvar=TRUE, 1> calendar=time.var.calendar, 1> longname=time.var.long_name) 1> lon.dim <- ncdim_def( 1> name=lon.var.name, 1> units=lon.var.units, 1> vals=lon.vec, 1> unlim=FALSE, 1> create_dimvar=TRUE, 1> longname=lon.var.long_name) 1> lat.dim <- ncdim_def( 1> name=lat.var.name, 1> units=lat.var.units, 1> vals=lat.vec, 1> unlim=FALSE, 1> create_dimvar=TRUE, 1> longname=lat.var.long_name) 2> # create data variable (as container--can't put data until we have a file) 2> emis.var <- ncvar_def( 2> name=emis.var.name, 2> units=emis.var.units, 2> # dim=c(time.dim, lat.dim, lon.dim), 2> # dim=list(time.dim, lat.dim, lon.dim), 2> # note dim order desired for result=var(time, lat, lon) 2> dim=list(lon.dim, lat.dim, time.dim), 2> missval=as.double(emis.var._FillValue), 2> longname=emis.var.long_name, 2> prec="double") > # get current time for creation_date > # system(intern=TRUE) -> return char vector, one member per output line) > netcdf.timestamp <- system('date', intern=TRUE) > # create netCDF file > netcdf.file <- nc_create( > filename=netcdf.fn, > # vars=list(emis.var), > # verbose=TRUE) > vars=list(emis.var)) > # Write data to data variable: gotta have file first. > # Gotta convert 2d global.emis.mx[lat,lon] to 3d > global.emis.arr[time,lat,lon] > # Do this before adding _FillValue to prevent: > # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or > _FillValue type mismatch > ## global.emis.arr <- global.emis.mx > ## dim(global.emis.arr) <- c(1, dim(global.emis.mx)) > ## global.emis.arr[1,,] <- global.emis.mx > # Note > # * global.emis.mx[lat,lon] > # * datavar needs [lon, lat, time] (with time *last*) > ncvar_put( > nc=netcdf.file, > varid=emis.var, > # vals=global.emis.arr, > vals=t(global.emis.mx), > # start=rep.int(1, length(dim(global.emis.arr))), > start=c(1, 1, 1), > # count=dim(global.emis.arr)) > count=c(-1,-1, 1)) # -1 -> all data > # Write netCDF attributes > # Note: can't pass *.dim as varid, even though these are coordinate vars :-( > # add datavar attributes > ncatt_put( > nc=netcdf.file, > # varid=lon.var, > varid=lon.var.name, > attname="comment", > attval=lon.var.comment, > prec="text") > ncatt_put( > nc=netcdf.file, > # varid=lat.var, > varid=lat.var.name, > attname="comment", > attval=lat.var.comment, > prec="text") > # put _FillValue after putting data! > ncatt_put( > nc=netcdf.file, > varid=emis.var, > attname="_FillValue", > attval=emis.var._FillValue, > prec="float") # why is "emi_n2o:missing_value = -999."? > # add global attributes (varid=0) > ncatt_put( > nc=netcdf.file, > varid=0, > attname="creation_date", > attval=netcdf.timestamp, > prec="text") > ncatt_put( > nc=netcdf.file, > varid=0, > attname="source_file", > attval=netcdf.source_file, > prec="text") > ncatt_put( > nc=netcdf.file, > varid=0, > attname="Conventions", > attval=netcdf.Conventions, > prec="text") > # flush to file (there may not be data on disk before this point) > # nc_sync(netcdf.file) # so we don't hafta reopen the file, below > # Nope: per David W. Pierce Mon, 27 Aug 2012 21:35:35 -0700, ncsync is not > enough > nc_close(netcdf.file) > nc_open(netcdf.fn, > write=FALSE, # will only read below > readunlim=TRUE) # it's a small file > # <simple output check> > system(sprintf('ls -alth %s', netcdf.fp)) > system(sprintf('ncdump -h %s', netcdf.fp)) > # </simple output check> > # <visual output check> > # TODO: do plot-related refactoring! allow to work with projects={ioapi, > this} > # <copied from plotLayersForTimestep.r> > library(fields) > # double-sprintf-ing to set precision by constant: cool or brittle? > stats.precision <- 3 # sigdigs to use for min, median, max of obs > stat.str <- sprintf('%%.%ig', stats.precision) > # use these in function=subtitle.stats as sprintf inputs > max.str <- sprintf('max=%s', stat.str) > med.str <- sprintf('med=%s', stat.str) > min.str <- sprintf('min=%s', stat.str) > # </copied from plotLayersForTimestep.r> > # Get the data out of the datavar, to test reusability > # target.data <- emis.var[,,1] # fails, with > # > Error in emis.var[, , 1] : incorrect number of dimensions > target.data <- ncvar_get( > nc=netcdf.file, > # varid=emis.var, > varid=emis.var.name, > # read all the data > # start=rep(1, emis.var$ndims), > start=c(1, 1, 1), > # count=rep(-1, emis.var$ndims)) > count=c(-1, -1, 1)) > # MAJOR: all of the above fail with > # > Error in if (nc$var[[li]]$hasAddOffset) addOffset = > nc$var[[li]]$addOffset else addOffset = 0 : > # > argument is of length zero > # Note that, if just using the raw data, the following plot code works. > target.data <- t(global.emis.mx) > # <simple output check/> > dim(target.data) # n.lon, n.lat > # <copied from windowEmissions.r> > palette.vec <- > c("grey","purple","deepskyblue2","green","yellow","orange","red","brown") > colors <- colorRampPalette(palette.vec) > probabilities.vec <- seq(0, 1, 1.0/(length(palette.vec) - 1)) > # </copied from windowEmissions.r> > # <copy/mod from plotLayersForTimestep.r> > plot.layer(target.data, > title=netcdf.title, > subtitle=subtitle.stats(target.data), > x.centers=lon.vec, > y.centers=lat.vec, > q.vec=probabilities.vec, > colors=colors) > # </copy/mod from plotLayersForTimestep.r> > map(add=TRUE) > # </visual output check> > # teardown > dev.off() > nc_close(netcdf.file) ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.