To work with data from different sources a normilization of naming is necessary. This function allows flexible renaming/recoding of the input data based on a second table. In short following operation is performed:
data$data_dest = meta[meta$meta_key == data$data_src]$meta_val
For handling missing values for meta_key in meta there are different modes:
strict: stop execution
drop: drop rows from data
keep: keep the values in data
replace: use the mapping provided in the argument replacement (named vector/list)
The function is quiet chatty and reports which values are dropped, kept or replaced.
Usage
meta_apply(
data,
meta,
data_src,
data_dest,
meta_key,
meta_val,
mode = "strict",
replacements = NULL
)
Arguments
- data
Input data as tibble
- meta
Lookup table as tibble
- data_src
Name of the column in data used to lookup in meta
- data_dest
Name of the column to save the result in data
- meta_key
Name of the column in meta to match against data$data_src
- meta_val
Name of the column containing the replacement value in meta
- mode
One of "strict", "drop", "keep", "replace". Default "strict"
- replacements
Named vector/list with missing values in meta$meta_key or to overwrite specific mappings
See also
Under the hood the heavy lifting is done by
dplyr::recode()
.
Examples
meta_fn <- system.file("extdata", "meta_smn.rds",
package = "rOstluft.data", mustWork = TRUE)
meta <- readRDS(meta_fn)
tibble::glimpse(meta)
#> Rows: 214
#> Columns: 15
#> $ site_short <chr> "TAE", "TAE", "TAE", "TAE", "TAE", "TAE", "TAE", "T…
#> $ site <chr> "Aadorf/Tänikon", "Aadorf/Tänikon", "Aadorf/Tänikon…
#> $ Länge <chr> "8°54'", "8°54'", "8°54'", "8°54'", "8°54'", "8°54'…
#> $ Breite <chr> "47°29'", "47°29'", "47°29'", "47°29'", "47°29'", "…
#> $ x <dbl> 2710517, 2710517, 2710517, 2710517, 2710517, 271051…
#> $ y <dbl> 1259824, 1259824, 1259824, 1259824, 1259824, 125982…
#> $ masl <dbl> 539, 539, 539, 539, 539, 539, 539, 539, 539, 539, 5…
#> $ parameter_original <chr> "tre200s0", "ure200s0", "prestas0", "prestas0", "fk…
#> $ source <chr> "MeteoSchweiz", "MeteoSchweiz", "MeteoSchweiz", "Me…
#> $ unit <chr> "°C", "%", "hPa", "hPa", "m/s", "m/s", "°", "min", …
#> $ Beschreibung <chr> "Lufttemperatur", "Relative", "Luftdruck", "Luftdru…
#> $ timezone_original <chr> "UTC", "UTC", "UTC", "UTC", "UTC", "UTC", "UTC", "U…
#> $ site_long <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ interval <chr> "min10", "min10", "min10", "min10", "min10", "min10…
#> $ parameter <chr> "T", "Hr", "p", "p", "WVs_max", "WVs", "WD", "SunDu…
fn <- system.file("extdata", "smn.txt", package = "rOstluft.data", mustWork = TRUE)
data <- read_smn(fn, na.rm = FALSE)
data <- dplyr::arrange(data, .data$starttime)
data
#> # A tibble: 60 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2018-01-01 00:50:00 CHU dkl010z0 min10 NA 216
#> 2 2018-01-01 00:50:00 CHU fkl010z0 min10 NA 1.7
#> 3 2018-01-01 00:50:00 CHU fkl010z1 min10 NA 3
#> 4 2018-01-01 00:50:00 CHU gre000z0 min10 NA 5
#> 5 2018-01-01 00:50:00 CHU hto000s0 min10 NA NA
#> 6 2018-01-01 00:50:00 CHU prestas0 min10 NA 946.
#> 7 2018-01-01 00:50:00 CHU rre150z0 min10 NA 0
#> 8 2018-01-01 00:50:00 CHU sre000z0 min10 NA 0
#> 9 2018-01-01 00:50:00 CHU tre200s0 min10 NA 2.3
#> 10 2018-01-01 00:50:00 CHU ure200s0 min10 NA 82.7
#> # ℹ 50 more rows
# data contains no units, cryptic SwissMetNet parameter names and abbreviations for site.
# And the meta data for parameter rre150z0 is missing. Perfect!
# too lazy to update meta, add unit mapping based on SwissMetNet parameter names
# and we want to overwrite the mapping for dkl010z0 anyway => use replace
res <- meta_apply(data, meta, "parameter", "unit", "parameter_original", "unit",
mode = "replace",replacements = list(rre150z0 = "unit1", dkl010z0 = "unit2"))
#> apply meta data$unit = meta[meta$parameter_original == data$parameter]$unit:
#> missing keys in meta$parameter_original: rre150z0
#> replacements used: dkl010z0, rre150z0
res
#> # A tibble: 60 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2018-01-01 00:50:00 CHU dkl010z0 min10 unit2 216
#> 2 2018-01-01 00:50:00 CHU fkl010z0 min10 m/s 1.7
#> 3 2018-01-01 00:50:00 CHU fkl010z1 min10 m/s 3
#> 4 2018-01-01 00:50:00 CHU gre000z0 min10 W/m2 5
#> 5 2018-01-01 00:50:00 CHU hto000s0 min10 cm NA
#> 6 2018-01-01 00:50:00 CHU prestas0 min10 hPa 946.
#> 7 2018-01-01 00:50:00 CHU rre150z0 min10 unit1 0
#> 8 2018-01-01 00:50:00 CHU sre000z0 min10 min 0
#> 9 2018-01-01 00:50:00 CHU tre200s0 min10 °C 2.3
#> 10 2018-01-01 00:50:00 CHU ure200s0 min10 % 82.7
#> # ℹ 50 more rows
# rename the SwissMetNet Parameters, still no mapping for rre150z0, we aren't
# interested in the data and drop it
res2 <- meta_apply(res, meta, "parameter", "parameter",
"parameter_original", "parameter", mode = "drop")
#> Warning: apply meta data$parameter = meta[meta$parameter_original == data$parameter]$parameter:
#> missing keys in meta$parameter_original: rre150z0
#> dropping missing prameters
res2
#> # A tibble: 54 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2018-01-01 00:50:00 CHU WD min10 unit2 216
#> 2 2018-01-01 00:50:00 CHU WVs min10 m/s 1.7
#> 3 2018-01-01 00:50:00 CHU WVs_max min10 m/s 3
#> 4 2018-01-01 00:50:00 CHU StrGlo min10 W/m2 5
#> 5 2018-01-01 00:50:00 CHU SnowDep min10 cm NA
#> 6 2018-01-01 00:50:00 CHU p min10 hPa 946.
#> 7 2018-01-01 00:50:00 CHU SunDur min10 min 0
#> 8 2018-01-01 00:50:00 CHU T min10 °C 2.3
#> 9 2018-01-01 00:50:00 CHU Hr min10 % 82.7
#> 10 2018-01-01 01:00:00 CHU WD min10 unit2 209
#> # ℹ 44 more rows
# or we keep it
res <- meta_apply(res, meta, "parameter", "parameter",
"parameter_original", "parameter", mode = "keep")
#> Warning: apply meta data$parameter = meta[meta$parameter_original == data$parameter]$parameter:
#> missing keys in meta$parameter_original: rre150z0
#> keeping values: rre150z0
res
#> # A tibble: 60 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2018-01-01 00:50:00 CHU WD min10 unit2 216
#> 2 2018-01-01 00:50:00 CHU WVs min10 m/s 1.7
#> 3 2018-01-01 00:50:00 CHU WVs_max min10 m/s 3
#> 4 2018-01-01 00:50:00 CHU StrGlo min10 W/m2 5
#> 5 2018-01-01 00:50:00 CHU SnowDep min10 cm NA
#> 6 2018-01-01 00:50:00 CHU p min10 hPa 946.
#> 7 2018-01-01 00:50:00 CHU rre150z0 min10 unit1 0
#> 8 2018-01-01 00:50:00 CHU SunDur min10 min 0
#> 9 2018-01-01 00:50:00 CHU T min10 °C 2.3
#> 10 2018-01-01 00:50:00 CHU Hr min10 % 82.7
#> # ℹ 50 more rows
# rename the site abbreviation to the site name, strict should work
res <- meta_apply(res, meta, "site", "site", "site_short", "site")
res
#> # A tibble: 60 × 6
#> starttime site parameter interval unit value
#> <dttm> <fct> <fct> <fct> <fct> <dbl>
#> 1 2018-01-01 00:50:00 Chur WD min10 unit2 216
#> 2 2018-01-01 00:50:00 Chur WVs min10 m/s 1.7
#> 3 2018-01-01 00:50:00 Chur WVs_max min10 m/s 3
#> 4 2018-01-01 00:50:00 Chur StrGlo min10 W/m2 5
#> 5 2018-01-01 00:50:00 Chur SnowDep min10 cm NA
#> 6 2018-01-01 00:50:00 Chur p min10 hPa 946.
#> 7 2018-01-01 00:50:00 Chur rre150z0 min10 unit1 0
#> 8 2018-01-01 00:50:00 Chur SunDur min10 min 0
#> 9 2018-01-01 00:50:00 Chur T min10 °C 2.3
#> 10 2018-01-01 00:50:00 Chur Hr min10 % 82.7
#> # ℹ 50 more rows