Roy Mendelssohn Tue, 28 Aug 2012 09:07:32 -0700
here is the relevant section from your code:
netcdf.file <- nc_create(
filename=netcdf.fn,
# vars=list(emis.var),
# verbose=TRUE)
vars=list(emis.var))
# Write data to data variable: gotta have file first.
# Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
# Do this before adding _FillValue to prevent:
# > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or =
## global.emis.arr <- global.emis.mx
## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
## global.emis.arr[1,,] <- global.emis.mx
# Note
# * global.emis.mx[lat,lon]
# * datavar needs [lon, lat, time] (with time *last*)
ncvar_put(
nc=netcdf.file,
varid=emis.var,
# vals=global.emis.arr,
vals=t(global.emis.mx),
# start=rep.int(1, length(dim(global.emis.arr))),
start=c(1, 1, 1),
# count=dim(global.emis.arr))
count=c(-1,-1, 1)) # -1 -> all data
You can't write until all dimensions have been defined, and all
variables defined.
But in fact, that's only a part of the code ... which omits the prior
dimension and variable definitions :-( See the current version @
https://github.com/TomRoche/GEIA_to_netCDF/blob/c380c0a28dc8c71dbf0c2ba18130a2439a4fe089/GEIA.to.netCDF.r
I've also attached that (quoted) following my .sig, with the top-most
constant and function declarations removed for brevity. The dimension
definitions are prefixed with '1', the variable definition is prefixed
with '2'.
HTH, Tom Roche <Tom_Roche at pobox.com>
current GEIA.to.netCDF.r code block follows to end of post------------
# code----------------------------------------------------------------
# process input
library(maps) # on tlrPanP5 as well as clusters
# input file path
GEIA.emis.txt.fp <- sprintf('%s/%s', GEIA.emis.txt.dir, GEIA.emis.txt.fn)
# columns are grid#, mass
GEIA.emis.mx <-
as.matrix(read.table(GEIA.emis.txt.fp, skip=GEIA.emis.txt.n.header))
# mask zeros? no, use NA for non-ocean areas
# GEIA.emis.mx[GEIA.emis.mx == 0] <- NA
# <simple input check>
dim(GEIA.emis.mx) ## [1] 36143 2
# start debug
GEIA.emis.mx.rows <- dim(GEIA.emis.mx)[1]
if (GEIA.emis.mx.rows > GEIA.emis.grids.dim) {
cat(sprintf('ERROR: %s: GEIA.emis.mx.rows=%.0d > GEIA.emis.grids.dim=%.0d\n',
this.fn, GEIA.emis.mx.rows, GEIA.emis.grids.dim))
} else {
cat(sprintf('debug: %s: GEIA.emis.mx.rows < GEIA.emis.grids.dim\n',
this.fn))
}
# end debug
# </simple input check>
global.emis.vec <-
create.global.emissions.vector(
GEIA.emis.grids.dim, GEIA.emis.mx.rows, GEIA.emis.mx)
# <visual input check>
# Need sorted lat and lon vectors: we know what those are a priori
# Add 0.5 since grid centers
lon.vec <- 0.5 +
seq(from=grid.lon.degree.start, by=grid.lon.degree.per.cell, length.out=GEIA.emis.lon.dim)
lat.vec <- 0.5 +
seq(from=grid.lat.degree.start, by=grid.lat.degree.per.cell, length.out=GEIA.emis.lat.dim)
# Create emissions matrix corresponding to those dimensional vectors
# (i.e., global.emis.mx is the "projection" of global.emis.vec)
# First, create empty global.emis.mx? No, fill from global.emis.vec.
# Fill using byrow=T? or "bycol" == byrow=FALSE? (row=lat)
# I assigned (using lon.lat.vec.to.grid.index)
# "grid indices" (global.emis.vec.index values)
# "lon-majorly" (i.e., iterate over lats before incrementing lon),
# so we want to fill byrow=FALSE ... BUT,
# that will "fill from top" (i.e., starting @ 90N) and
# we want to "fill from bottom" (i.e., starting @ 90S) ...
# global.emis.mx <- matrix(
# global.emis.vec, nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim,
# # so flip/reverse rows/latitudes when done
# byrow=FALSE)[GEIA.emis.lat.dim:1,]
# NO: I cannot just fill global.emis.mx from global.emis.vec:
# latter's/GEIA's grid numbering system ensures 1000 lons per lat!
# Which overflows the "real-spatial" global.emis.mx :-(
# So I need to fill global.emis.mx using a for loop to decode the grid indices :-(
# (but at least I can fill in whatever direction I want :-)
global.emis.mx <- matrix(
rep(NA, GEIA.emis.grids.dim), nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim)
# 1: works if subsequently transposed: TODO: FIXME
for (i.lon in 1:GEIA.emis.lon.dim) {
for (i.lat in 1:GEIA.emis.lat.dim) {
# 2: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
# for (i.lat in 1:GEIA.emis.lat.dim) {
# for (i.lon in 1:GEIA.emis.lon.dim) {
# 3: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
# for (i.lon in GEIA.emis.lon.dim:1) {
# for (i.lat in GEIA.emis.lat.dim:1) {
# 4: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
# for (i.lat in GEIA.emis.lat.dim:1) {
# for (i.lon in GEIA.emis.lon.dim:1) {
lon <- lon.vec[i.lon]
lat <- lat.vec[i.lat]
GEIA.emis.grid.val <-
global.emis.vec[
lon.lat.vec.to.grid.index(c(lon, lat),
n.lon=GEIA.emis.lon.dim, n.lat=GEIA.emis.lat.dim)]
if (!is.na(GEIA.emis.grid.val)) {
if (is.na(global.emis.mx[i.lat, i.lon])) {
global.emis.mx[i.lat, i.lon] <- GEIA.emis.grid.val
# start debug
# cat(sprintf(
# 'debug: %s: writing val=%f to global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
# this.fn, GEIA.emis.grid.val, i.lon, i.lat, lon, lat))
# end debug
} else {
# error if overwriting presumably-previously-written non-NA!
cat(sprintf(
'ERROR: %s: overwriting val=%f with val=%f at global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
this.fn, global.emis.mx[i.lat, i.lon], GEIA.emis.grid.val,
i.lon, i.lat, lon, lat))
return # TODO: abend
} # end testing target != NA (thus not previously written)
} # end testing source != NA (don't write if is.na(lookup)
} # end for loop over lats
} # end for loop over lons
# Now draw the damn thing
# 1: TODO: FIXME: why do I need to transpose global.emis.mx?
image(lon.vec, lat.vec, t(global.emis.mx))
# 2,3,4: how it should work ?!?
# image(lon.vec, lat.vec, global.emis.mx)
map(add=TRUE)
# </visual input check>
# write output to netCDF
library(ncdf4)
# output file path (not currently used by package=ncdf4)
netcdf.fp <- sprintf('%s/%s', netcdf.dir, netcdf.fn)
1> # create dimensions and dimensional variables
1> time.vec <- c(0) # annual value, corresponding to no specific year
1> time.dim <- ncdim_def(
1> name=time.var.name,
1> units=time.var.units,
1> vals=time.vec,
1> unlim=TRUE,
1> create_dimvar=TRUE,
1> calendar=time.var.calendar,
1> longname=time.var.long_name)
1> lon.dim <- ncdim_def(
1> name=lon.var.name,
1> units=lon.var.units,
1> vals=lon.vec,
1> unlim=FALSE,
1> create_dimvar=TRUE,
1> longname=lon.var.long_name)
1> lat.dim <- ncdim_def(
1> name=lat.var.name,
1> units=lat.var.units,
1> vals=lat.vec,
1> unlim=FALSE,
1> create_dimvar=TRUE,
1> longname=lat.var.long_name)
2> # create data variable (as container--can't put data until we have a file)
2> emis.var <- ncvar_def(
2> name=emis.var.name,
2> units=emis.var.units,
2> # dim=c(time.dim, lat.dim, lon.dim),
2> # dim=list(time.dim, lat.dim, lon.dim),
2> # note dim order desired for result=var(time, lat, lon)
2> dim=list(lon.dim, lat.dim, time.dim),
2> missval=as.double(emis.var._FillValue),
2> longname=emis.var.long_name,
2> prec="double")
# get current time for creation_date
# system(intern=TRUE) -> return char vector, one member per output line)
netcdf.timestamp <- system('date', intern=TRUE)
# create netCDF file
netcdf.file <- nc_create(
filename=netcdf.fn,
# vars=list(emis.var),
# verbose=TRUE)
vars=list(emis.var))
# Write data to data variable: gotta have file first.
# Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
# Do this before adding _FillValue to prevent:
# > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or _FillValue type mismatch
## global.emis.arr <- global.emis.mx
## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
## global.emis.arr[1,,] <- global.emis.mx
# Note
# * global.emis.mx[lat,lon]
# * datavar needs [lon, lat, time] (with time *last*)
ncvar_put(
nc=netcdf.file,
varid=emis.var,
# vals=global.emis.arr,
vals=t(global.emis.mx),
# start=rep.int(1, length(dim(global.emis.arr))),
start=c(1, 1, 1),
# count=dim(global.emis.arr))
count=c(-1,-1, 1)) # -1 -> all data
# Write netCDF attributes
# Note: can't pass *.dim as varid, even though these are coordinate vars :-(
# add datavar attributes
ncatt_put(
nc=netcdf.file,
# varid=lon.var,
varid=lon.var.name,
attname="comment",
attval=lon.var.comment,
prec="text")
ncatt_put(
nc=netcdf.file,
# varid=lat.var,
varid=lat.var.name,
attname="comment",
attval=lat.var.comment,
prec="text")
# put _FillValue after putting data!
ncatt_put(
nc=netcdf.file,
varid=emis.var,
attname="_FillValue",
attval=emis.var._FillValue,
prec="float") # why is "emi_n2o:missing_value = -999."?
# add global attributes (varid=0)
ncatt_put(
nc=netcdf.file,
varid=0,
attname="creation_date",
attval=netcdf.timestamp,
prec="text")
ncatt_put(
nc=netcdf.file,
varid=0,
attname="source_file",
attval=netcdf.source_file,
prec="text")
ncatt_put(
nc=netcdf.file,
varid=0,
attname="Conventions",
attval=netcdf.Conventions,
prec="text")
# flush to file (there may not be data on disk before this point)
# nc_sync(netcdf.file) # so we don't hafta reopen the file, below
# Nope: per David W. Pierce Mon, 27 Aug 2012 21:35:35 -0700, ncsync is not enough
nc_close(netcdf.file)
nc_open(netcdf.fn,
write=FALSE, # will only read below
readunlim=TRUE) # it's a small file
# <simple output check>
system(sprintf('ls -alth %s', netcdf.fp))
system(sprintf('ncdump -h %s', netcdf.fp))
# </simple output check>
# <visual output check>
# TODO: do plot-related refactoring! allow to work with projects={ioapi, this}
# <copied from plotLayersForTimestep.r>
library(fields)
# double-sprintf-ing to set precision by constant: cool or brittle?
stats.precision <- 3 # sigdigs to use for min, median, max of obs
stat.str <- sprintf('%%.%ig', stats.precision)
# use these in function=subtitle.stats as sprintf inputs
max.str <- sprintf('max=%s', stat.str)
med.str <- sprintf('med=%s', stat.str)
min.str <- sprintf('min=%s', stat.str)
# </copied from plotLayersForTimestep.r>
# Get the data out of the datavar, to test reusability
# target.data <- emis.var[,,1] # fails, with
# > Error in emis.var[, , 1] : incorrect number of dimensions
target.data <- ncvar_get(
nc=netcdf.file,
# varid=emis.var,
varid=emis.var.name,
# read all the data
# start=rep(1, emis.var$ndims),
start=c(1, 1, 1),
# count=rep(-1, emis.var$ndims))
count=c(-1, -1, 1))
# MAJOR: all of the above fail with
# > Error in if (nc$var[[li]]$hasAddOffset) addOffset = nc$var[[li]]$addOffset else addOffset = 0 :
# > argument is of length zero
# Note that, if just using the raw data, the following plot code works.
target.data <- t(global.emis.mx)
# <simple output check/>
dim(target.data) # n.lon, n.lat
# <copied from windowEmissions.r>
palette.vec <- c("grey","purple","deepskyblue2","green","yellow","orange","red","brown")
colors <- colorRampPalette(palette.vec)
probabilities.vec <- seq(0, 1, 1.0/(length(palette.vec) - 1))
# </copied from windowEmissions.r>
# <copy/mod from plotLayersForTimestep.r>
plot.layer(target.data,
title=netcdf.title,
subtitle=subtitle.stats(target.data),
x.centers=lon.vec,
y.centers=lat.vec,
q.vec=probabilities.vec,
colors=colors)
# </copy/mod from plotLayersForTimestep.r>
map(add=TRUE)
# </visual output check>
# teardown
dev.off()
nc_close(netcdf.file)