A local storage with flexible file format (default rds). The data format defines the data chunks per file.
Usage
storage_local_rds(name, format, path = NULL, read.only = TRUE)
storage_local_tsv(
name,
format,
path = NULL,
read.only = TRUE,
tz = "Etc/GMT-1"
)
Arguments
- name
name of the store
- format
data format of the store
- path
optional path to create the store under. Defaults to rappdirs::user_data_dir(appname = name, appauthor = "rOstluft")
- read.only
read only store. disable put, if false and the store doesn't exist, the store will be initiated
- tz
time zone for POSIXct's columns. Data is stored in UTC. Converted while reading. It is important, that the input data has the same time zone. Default "Etc/GMT-1"
Fields
name
name of the store
format
data format of the store
path
root of the store
data_path
root of all chunks
content_path
path to the rds file containing statistics of store content
columns_path
path to the rds file containing the exact column types of the store content
meta_path
root of all meta files
read.only
flag for read.only usage of store. Default TRUE
ext
file extension for chunks. Default "rds"
read_function
function(file) for reading chunks from disk. Default
base::readRDS()
write_function
function(object, file) for writing chunks to disk. Default
base::saveRDS()
Methods
$get(filter=NULL, ...)
get data from the store. The name of the arguments depend on the format. The filter
argument is applied to each chunk.
$put(data)
puts the data into the store. Stops if store is read only
$get_content()
returns a tibble with the amount of data points per chunk per series
$list_chunks()
get list of all chunks
$get_meta(key=NULL)
get meta data. If key is omitted returns all the content of all files in a named list of
tibbles, with the file name without extension as name. If key is supplied as argument only the list contains only the
specified key.
$put_meta(...)
puts meta data into the store. the name of the argument is used as file name and the value as data.
$fix_content()
generates the content file from the data files
$destroy(confirmation)
removes all files under path from the file system if "DELETE" is supplied as
confirmation
Column Types
The first $put()
saves the column types of the data in a file. All subsequents $put()
calls must have the exact
same column types: same order and classes of columns.
storage_local_tsv
This Storage is mainly for debugging purpose or sharing data with another scripting/programming language. Warning: Slow and doesn't support logical data type.
Examples
## init store, creates directory if necessary
format <- rOstluft::format_rolf()
store <- rOstluft::storage_local_rds("example_rOstluft", format, read.only = FALSE)
#> Local store example_rOstluft initialized under 'C:\Users\tom\AppData\Local/rOstluft/example_rOstluft'
## read data from airmo export und put into the store
fn <- system.file("extdata", "Zch_Stampfenbachstrasse_2010-2014.csv",
package = "rOstluft.data", mustWork = TRUE)
df <- rOstluft::read_airmo_csv(fn)
store$put(df)
#> First put to storage. Save columns types to C:/Users/tom/AppData/Local/rOstluft/example_rOstluft/columns.rds
#> # A tibble: 70 × 6
#> year interval site parameter unit n
#> <dbl> <fct> <fct> <fct> <fct> <int>
#> 1 2010 min30 Zch_Stampfenbachstrasse CO mg/m3 17290
#> 2 2010 min30 Zch_Stampfenbachstrasse NO µg/m3 17322
#> 3 2010 min30 Zch_Stampfenbachstrasse NO2 µg/m3 17322
#> 4 2010 min30 Zch_Stampfenbachstrasse NOx ppb 17322
#> 5 2010 min30 Zch_Stampfenbachstrasse O3 µg/m3 17227
#> 6 2010 min30 Zch_Stampfenbachstrasse PM10 µg/m3 16840
#> 7 2010 min30 Zch_Stampfenbachstrasse SO2 µg/m3 17188
#> 8 2010 min30 Zch_Stampfenbachstrasse Hr %Hr 17492
#> 9 2010 min30 Zch_Stampfenbachstrasse p hPa 17461
#> 10 2010 min30 Zch_Stampfenbachstrasse RainDur min 17502
#> # ℹ 60 more rows
fn <- system.file("extdata", "Zch_Rosengartenstrasse_2010-2014.csv",
package = "rOstluft.data", mustWork = TRUE)
df <- rOstluft::read_airmo_csv(fn)
store$put(df)
#> # A tibble: 18 × 6
#> year interval site parameter unit n
#> <dbl> <fct> <fct> <fct> <fct> <int>
#> 1 2013 min30 Zch_Rosengartenstrasse NO µg/m3 4269
#> 2 2013 min30 Zch_Rosengartenstrasse NO2 µg/m3 4269
#> 3 2013 min30 Zch_Rosengartenstrasse NOx ppb 4269
#> 4 2013 min30 Zch_Rosengartenstrasse O3 µg/m3 4277
#> 5 2013 min30 Zch_Rosengartenstrasse PM10 µg/m3 4025
#> 6 2013 min30 Zch_Rosengartenstrasse Hr %Hr 4010
#> 7 2013 min30 Zch_Rosengartenstrasse p hPa 4291
#> 8 2013 min30 Zch_Rosengartenstrasse RainDur min 4291
#> 9 2013 min30 Zch_Rosengartenstrasse T °C 4010
#> 10 2014 min30 Zch_Rosengartenstrasse NO µg/m3 17415
#> 11 2014 min30 Zch_Rosengartenstrasse NO2 µg/m3 17415
#> 12 2014 min30 Zch_Rosengartenstrasse NOx ppb 17415
#> 13 2014 min30 Zch_Rosengartenstrasse O3 µg/m3 17366
#> 14 2014 min30 Zch_Rosengartenstrasse PM10 µg/m3 16788
#> 15 2014 min30 Zch_Rosengartenstrasse Hr %Hr 17517
#> 16 2014 min30 Zch_Rosengartenstrasse p hPa 17519
#> 17 2014 min30 Zch_Rosengartenstrasse RainDur min 17519
#> 18 2014 min30 Zch_Rosengartenstrasse T °C 17517
## get all data min30 for 2011 and 2012
store$get(site = "Zch_Stampfenbachstrasse", interval = "min30", year = 2011:2012)
#> # A tibble: 488,322 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2011-01-01 00:00:00 Zch_Stampfenbachstrasse CO min30 mg/m3 0.611
#> 2 2011-01-01 00:00:00 Zch_Stampfenbachstrasse NO min30 µg/m3 26.2
#> 3 2011-01-01 00:00:00 Zch_Stampfenbachstrasse NO2 min30 µg/m3 42.1
#> 4 2011-01-01 00:00:00 Zch_Stampfenbachstrasse NOx min30 ppb 43.0
#> 5 2011-01-01 00:00:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.19
#> 6 2011-01-01 00:00:00 Zch_Stampfenbachstrasse PM10 min30 µg/m3 84.0
#> 7 2011-01-01 00:00:00 Zch_Stampfenbachstrasse SO2 min30 µg/m3 5.07
#> 8 2011-01-01 00:00:00 Zch_Stampfenbachstrasse Hr min30 %Hr 91.9
#> 9 2011-01-01 00:00:00 Zch_Stampfenbachstrasse p min30 hPa 971.
#> 10 2011-01-01 00:00:00 Zch_Stampfenbachstrasse RainDur min30 min 0
#> # ℹ 488,312 more rows
## get only data for O3
store$get(year = 2011:2012, site = "Zch_Stampfenbachstrasse", interval = "min30",
filter = parameter == "O3")
#> # A tibble: 34,996 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2011-01-01 00:00:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.19
#> 2 2011-01-01 00:30:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.10
#> 3 2011-01-01 01:00:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.10
#> 4 2011-01-01 01:30:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.00
#> 5 2011-01-01 02:00:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.04
#> 6 2011-01-01 02:30:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.09
#> 7 2011-01-01 03:00:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 1.94
#> 8 2011-01-01 03:30:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 2.04
#> 9 2011-01-01 04:00:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 6.19
#> 10 2011-01-01 04:30:00 Zch_Stampfenbachstrasse O3 min30 µg/m3 37.3
#> # ℹ 34,986 more rows
## get NOx data from multiple stations
store$get(site = c("Zch_Stampfenbachstrasse", "Zch_Rosengartenstrasse"), interval = "min30",
year = 2014, filter = parameter %in% c("NOx", "NO", "NO2"))
#> # A tibble: 104,547 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2014-01-01 00:00:00 Zch_Rosengartenstrasse NO min30 µg/m3 53.3
#> 2 2014-01-01 00:00:00 Zch_Rosengartenstrasse NO2 min30 µg/m3 35.6
#> 3 2014-01-01 00:00:00 Zch_Rosengartenstrasse NOx min30 ppb 61.4
#> 4 2014-01-01 00:30:00 Zch_Rosengartenstrasse NO min30 µg/m3 69.7
#> 5 2014-01-01 00:30:00 Zch_Rosengartenstrasse NO2 min30 µg/m3 40.7
#> 6 2014-01-01 00:30:00 Zch_Rosengartenstrasse NOx min30 ppb 77.2
#> 7 2014-01-01 01:00:00 Zch_Rosengartenstrasse NO min30 µg/m3 110.
#> 8 2014-01-01 01:00:00 Zch_Rosengartenstrasse NO2 min30 µg/m3 54.0
#> 9 2014-01-01 01:00:00 Zch_Rosengartenstrasse NOx min30 ppb 116.
#> 10 2014-01-01 01:30:00 Zch_Rosengartenstrasse NO min30 µg/m3 131.
#> # ℹ 104,537 more rows
## get n data points grouped by intervall, station, parameter, year in the store
store$get_content()
#> # A tibble: 88 × 6
#> year interval site parameter unit n
#> <dbl> <fct> <fct> <fct> <fct> <int>
#> 1 2014 min30 Zch_Rosengartenstrasse NO µg/m3 17415
#> 2 2014 min30 Zch_Rosengartenstrasse NO2 µg/m3 17415
#> 3 2014 min30 Zch_Rosengartenstrasse NOx ppb 17415
#> 4 2014 min30 Zch_Rosengartenstrasse O3 µg/m3 17366
#> 5 2014 min30 Zch_Rosengartenstrasse PM10 µg/m3 16788
#> 6 2014 min30 Zch_Rosengartenstrasse Hr %Hr 17517
#> 7 2014 min30 Zch_Rosengartenstrasse p hPa 17519
#> 8 2014 min30 Zch_Rosengartenstrasse RainDur min 17519
#> 9 2014 min30 Zch_Rosengartenstrasse T °C 17517
#> 10 2013 min30 Zch_Rosengartenstrasse NO µg/m3 4269
#> # ℹ 78 more rows
## get list of all chunks
store$list_chunks()
#> # A tibble: 7 × 7
#> chunk_name interval site year local.path local.modification_t…¹ local.size
#> <fs::path> <chr> <chr> <chr> <fs::path> <dttm> <fs::byte>
#> 1 …c2XCuzIwMTM min30 Zch_… 2013 …IwMTM.rds 2023-12-25 16:07:51 214.18K
#> 2 …c2XCuzIwMTQ min30 Zch_… 2014 …IwMTQ.rds 2023-12-25 16:07:52 862.36K
#> 3 …3NlwrsyMDE0 min30 Zch_… 2014 …yMDE0.rds 2023-12-25 16:07:51 1.36M
#> 4 …3NlwrsyMDEw min30 Zch_… 2010 …yMDEw.rds 2023-12-25 16:07:49 1.35M
#> 5 …3NlwrsyMDEx min30 Zch_… 2011 …yMDEx.rds 2023-12-25 16:07:50 1.35M
#> 6 …3NlwrsyMDEy min30 Zch_… 2012 …yMDEy.rds 2023-12-25 16:07:50 1.36M
#> 7 …3NlwrsyMDEz min30 Zch_… 2013 …yMDEz.rds 2023-12-25 16:07:50 1.36M
#> # ℹ abbreviated name: ¹local.modification_time
## destroy store (careful removes all files on the disk)
store$destroy("DELETE")
#> Store example_rOstluft destroyed
## missing examples for meta functions