Skip to contents

rnz R NetCDF Zarr

rnz wraps the pizzarr and RNetCDF package providing very similar set of functions to the RNetCDF package. The intent is to allow access to NetCDF and zarr in essentially the same way as is done with NetCDF such that packages with a NetCDF backend can easily implement an equivalent ZARR back end using base R.

This vignette shows the most basic operation of the package using a core demo dataset derived from: https://gdo-dcp.ucllnl.org/downscaled_cmip_projections/

A NetCDF and Zarr copy of the dataset are included.


# these files are in `z_demo()` as a file store
# and via "https://raw.githubusercontent.com/DOI-USGS/rnz/main/inst/extdata/bcsd.zarr/" as an http store.
gsub(normalizePath(dirname(rnz::z_demo())), "",
     list.files(rnz::z_demo(), recursive = TRUE, all.files = TRUE), 
     fixed = TRUE)
#>  [1] ".zattrs"           ".zgroup"           ".zmetadata"       
#>  [4] "latitude/.zarray"  "latitude/.zattrs"  "latitude/0"       
#>  [7] "longitude/.zarray" "longitude/.zattrs" "longitude/0"      
#> [10] "pr/.zarray"        "pr/.zattrs"        "pr/0.0.0"         
#> [13] "tas/.zarray"       "tas/.zattrs"       "tas/0.0.0"        
#> [16] "time/.zarray"      "time/.zattrs"      "time/0"

(z <- rnz::open_nz(rnz::z_demo()))
#> <ZarrGroup>
#>   Public:
#>     clone: function (deep = FALSE) 
#>     contains_item: function (item) 
#>     create_dataset: function (name, data = NA, ...) 
#>     create_group: function (name, overwrite = FALSE) 
#>     get_attrs: function () 
#>     get_chunk_store: function () 
#>     get_item: function (item) 
#>     get_meta: function () 
#>     get_name: function () 
#>     get_path: function () 
#>     get_read_only: function () 
#>     get_store: function () 
#>     get_synchronizer: function () 
#>     initialize: function (store, path = NA, read_only = FALSE, chunk_store = NA, 
#>   Private:
#>     attrs: Attributes, R6
#>     cache_attrs: NULL
#>     chunk_store: NA
#>     create_dataset_nosync: function (name, data = NA, ...) 
#>     create_group_nosync: function (name, overwrite = FALSE) 
#>     item_path: function (item) 
#>     key_prefix: 
#>     meta: list
#>     path: 
#>     read_only: TRUE
#>     store: DirectoryStore, Store, R6
#>     synchronizer: NULL

cat(c(capture.output(rnz::nzdump(rnz::z_demo()))[1:25], "..."), sep = "\n")
#> zarr {
#> dimensions:
#> latitude = 33 ;
#> longitude = 81 ;
#> time = 12 ;
#> variables:
#>  <f4 latitude(latitude) ;
#>      latitude:_CoordinateAxisType = Lat ;
#>      latitude:axis = Y ;
#>      latitude:bounds = latitude_bnds ;
#>      latitude:long_name = Latitude ;
#>      latitude:standard_name = latitude ;
#>      latitude:units = degrees_north ;
#>  <f4 longitude(longitude) ;
#>      longitude:_CoordinateAxisType = Lon ;
#>      longitude:axis = X ;
#>      longitude:bounds = longitude_bnds ;
#>      longitude:long_name = Longitude ;
#>      longitude:standard_name = longitude ;
#>      longitude:units = degrees_east ;
#>  <f4 pr(time, latitude, longitude) ;
#>      pr:coordinates = time latitude longitude  ;
#>      pr:long_name = monthly_sum_pr ;
#>      pr:name = pr ;
#>      pr:units = mm/m ;
#> ...

nc_file <- rnz::z_demo(format = "netcdf")

(nc <- RNetCDF::open.nc(nc_file))
#> [1] 65536
#> attr(,"handle_ptr")
#> <pointer: 0x0000013bd98cc5e0>
#> attr(,"class")
#> [1] "NetCDF"

cat(c(capture.output(RNetCDF::print.nc(nc))[1:25], "..."), sep = "\n")
#> netcdf classic {
#> dimensions:
#>  latitude = 33 ;
#>  longitude = 81 ;
#>  time = UNLIMITED ; // (12 currently)
#> variables:
#>  NC_FLOAT latitude(latitude) ;
#>      NC_CHAR latitude:standard_name = "latitude" ;
#>      NC_CHAR latitude:long_name = "Latitude" ;
#>      NC_CHAR latitude:units = "degrees_north" ;
#>      NC_CHAR latitude:axis = "Y" ;
#>      NC_CHAR latitude:bounds = "latitude_bnds" ;
#>      NC_CHAR latitude:_CoordinateAxisType = "Lat" ;
#>  NC_FLOAT longitude(longitude) ;
#>      NC_CHAR longitude:standard_name = "longitude" ;
#>      NC_CHAR longitude:long_name = "Longitude" ;
#>      NC_CHAR longitude:units = "degrees_east" ;
#>      NC_CHAR longitude:axis = "X" ;
#>      NC_CHAR longitude:bounds = "longitude_bnds" ;
#>      NC_CHAR longitude:_CoordinateAxisType = "Lon" ;
#>  NC_FLOAT pr(longitude, latitude, time) ;
#>      NC_CHAR pr:long_name = "monthly_sum_pr" ;
#>      NC_CHAR pr:units = "mm/m" ;
#>      NC_FLOAT pr:_FillValue = 100000002004087734272 ;
#>      NC_CHAR pr:name = "pr" ;
#> ...

“Inquire” about elements of a Zarr store

We can do all the normal inquire stuff using 0-indexed ids or names.

Inquire about a store

rnz::inq_nz_source(nc)
#> $ndims
#> [1] 3
#> 
#> $nvars
#> [1] 5
#> 
#> $ngatts
#> [1] 30
#> 
#> $unlimdimid
#> [1] 2
#> 
#> $format
#> [1] "classic"
#> 
#> $libvers
#> [1] "4.9.2 of Mar  7 2024 22:39:28 $"

rnz::inq_nz_source(z)
#> $ndims
#> [1] 3
#> 
#> $nvars
#> [1] 5
#> 
#> $ngatts
#> [1] 30
#> 
#> $format
#> [1] "DirectoryStore"

Inquire about a group

TODO: get rnz working with more than root groups?

rnz::inq_grp(nc)
#> $self
#> [1] 65536
#> attr(,"handle_ptr")
#> <pointer: 0x0000013bd98cc5e0>
#> attr(,"class")
#> [1] "NetCDF"
#> 
#> $grps
#> list()
#> 
#> $name
#> [1] "/"
#> 
#> $fullname
#> [1] "/"
#> 
#> $dimids
#> [1] 0 1 2
#> 
#> $unlimids
#> [1] 2
#> 
#> $varids
#> [1] 0 1 2 3 4
#> 
#> $typeids
#> integer(0)
#> 
#> $ngatts
#> [1] 30

rnz::inq_grp(z)
#> $grps
#> list()
#> 
#> $name
#> [1] "/"
#> 
#> $fullname
#> [1] "/"
#> 
#> $dimids
#> [1] 0 1 2
#> 
#> $varids
#> [1] 0 1 2 3 4
#> 
#> $ngatts
#> [1] 30

Inquire about a dimension


rnz::inq_dim(nc, 2)
#> $id
#> [1] 2
#> 
#> $name
#> [1] "time"
#> 
#> $length
#> [1] 12
#> 
#> $unlim
#> [1] TRUE

rnz::inq_dim(z, 2)
#> $id
#> [1] 2
#> 
#> $name
#> [1] "time"
#> 
#> $length
#> [1] 12

rnz::inq_dim(nc, "time")
#> $id
#> [1] 2
#> 
#> $name
#> [1] "time"
#> 
#> $length
#> [1] 12
#> 
#> $unlim
#> [1] TRUE

rnz::inq_dim(z, "time")
#> $id
#> [1] 2
#> 
#> $name
#> [1] "time"
#> 
#> $length
#> [1] 12

Inquire about a variable


rnz::inq_var(nc, 4)
#> $id
#> [1] 4
#> 
#> $name
#> [1] "time"
#> 
#> $type
#> [1] "NC_DOUBLE"
#> 
#> $ndims
#> [1] 1
#> 
#> $dimids
#> [1] 2
#> 
#> $natts
#> [1] 4

rnz::inq_var(z, 4)
#> $id
#> [1] 4
#> 
#> $name
#> [1] "time"
#> 
#> $type
#> [1] "<f8"
#> 
#> $ndims
#> [1] 1
#> 
#> $dimids
#> [1] 2
#> 
#> $natts
#> [1] 4

rnz::inq_var(nc, "time")
#> $id
#> [1] 4
#> 
#> $name
#> [1] "time"
#> 
#> $type
#> [1] "NC_DOUBLE"
#> 
#> $ndims
#> [1] 1
#> 
#> $dimids
#> [1] 2
#> 
#> $natts
#> [1] 4

rnz::inq_var(z, "time")
#> $id
#> [1] 4
#> 
#> $name
#> [1] "time"
#> 
#> $type
#> [1] "<f8"
#> 
#> $ndims
#> [1] 1
#> 
#> $dimids
#> [1] 2
#> 
#> $natts
#> [1] 4

Inquire about an attribute

rnz::inq_att(nc, "time", "units")
#> $id
#> [1] 1
#> 
#> $name
#> [1] "units"
#> 
#> $type
#> [1] "NC_CHAR"
#> 
#> $length
#> [1] 30

rnz::inq_att(z, "time", "units")
#> $id
#> [1] 3
#> 
#> $name
#> [1] "units"
#> 
#> $type
#> [1] "character"
#> 
#> $length
#> [1] 1

rnz::inq_att(nc, 4, 3)
#> $id
#> [1] 3
#> 
#> $name
#> [1] "_CoordinateAxisType"
#> 
#> $type
#> [1] "NC_CHAR"
#> 
#> $length
#> [1] 4

rnz::inq_att(z, 4, 3)
#> $id
#> [1] 3
#> 
#> $name
#> [1] "units"
#> 
#> $type
#> [1] "character"
#> 
#> $length
#> [1] 1

Get a variable


rnz::get_var(nc, "time")
#>  [1] 17927 17955 17986 18016 18047 18077 18108 18139 18169 18200 18230 18261

rnz::get_var(z, "time")
#>  [1] 17927 17955 17986 18016 18047 18077 18108 18139 18169 18200 18230 18261

rnz::get_var(nc, var = "pr", 
             start = c(1,1,5), count = c(3,3,1))
#> , , 1
#> 
#>       [,1]  [,2]  [,3]
#> [1,] 69.27 69.59 69.51
#> [2,] 60.19 65.76 68.04
#> [3,] 49.68 55.63 55.45

rnz::get_var(z, var = "pr", 
             start = c(1,1,5), count = c(3,3,1))
#> , , 1
#> 
#>        [,1]   [,2]   [,3]
#> [1,] 134.90 137.65 149.01
#> [2,]  59.46  73.97  79.23
#> [3,] 101.86 101.06  96.12

# TODO: figure out why aperm #3
pr <- rnz::get_var(z, "pr") |>
  aperm(c(3,2,1))

# TODO: figure out NA handling
pr[pr > 1000] <- NA

image(pr[,,1], col = hcl.colors(12, "PuBuGn", rev = TRUE),
      useRaster = TRUE, axes = FALSE)

pr <- rnz::get_var(nc, "pr")

image(pr[,,1], col = hcl.colors(12, "PuBuGn", rev = TRUE),
      useRaster = TRUE, axes = FALSE)

Get an attribute

rnz::get_att(nc, "time", "units")
#> [1] "days since 1950-01-01 00:00:00"

rnz::get_att(z, "time", "units")
#> [1] "days since 1950-01-01"

rnz::get_att(nc, 4, 1)
#> [1] "days since 1950-01-01 00:00:00"

rnz::get_att(z, 4, 3)
#> [1] "days since 1950-01-01"

That’s all for now.