Skip to contents

To work with data from different sources a normilization of naming is necessary. This function allows flexible renaming/recoding of the input data based on a second table. In short following operation is performed:

data$data_dest = meta[meta$meta_key == data$data_src]$meta_val

For handling missing values for meta_key in meta there are different modes:

  • strict: stop execution

  • drop: drop rows from data

  • keep: keep the values in data

  • replace: use the mapping provided in the argument replacement (named vector/list)

The function is quiet chatty and reports which values are dropped, kept or replaced.

Usage

meta_apply(
  data,
  meta,
  data_src,
  data_dest,
  meta_key,
  meta_val,
  mode = "strict",
  replacements = NULL
)

Arguments

data

Input data as tibble

meta

Lookup table as tibble

data_src

Name of the column in data used to lookup in meta

data_dest

Name of the column to save the result in data

meta_key

Name of the column in meta to match against data$data_src

meta_val

Name of the column containing the replacement value in meta

mode

One of "strict", "drop", "keep", "replace". Default "strict"

replacements

Named vector/list with missing values in meta$meta_key or to overwrite specific mappings

Value

transformed data

See also

Examples

meta_fn <- system.file("extdata", "meta_smn.rds",
                       package = "rOstluft.data", mustWork = TRUE)
meta <- readRDS(meta_fn)
tibble::glimpse(meta)
#> Rows: 214
#> Columns: 15
#> $ site_short         <chr> "TAE", "TAE", "TAE", "TAE", "TAE", "TAE", "TAE", "T…
#> $ site               <chr> "Aadorf/Tänikon", "Aadorf/Tänikon", "Aadorf/Tänikon…
#> $ Länge              <chr> "8°54'", "8°54'", "8°54'", "8°54'", "8°54'", "8°54'…
#> $ Breite             <chr> "47°29'", "47°29'", "47°29'", "47°29'", "47°29'", "…
#> $ x                  <dbl> 2710517, 2710517, 2710517, 2710517, 2710517, 271051…
#> $ y                  <dbl> 1259824, 1259824, 1259824, 1259824, 1259824, 125982…
#> $ masl               <dbl> 539, 539, 539, 539, 539, 539, 539, 539, 539, 539, 5…
#> $ parameter_original <chr> "tre200s0", "ure200s0", "prestas0", "prestas0", "fk…
#> $ source             <chr> "MeteoSchweiz", "MeteoSchweiz", "MeteoSchweiz", "Me…
#> $ unit               <chr> "°C", "%", "hPa", "hPa", "m/s", "m/s", "°", "min", …
#> $ Beschreibung       <chr> "Lufttemperatur", "Relative", "Luftdruck", "Luftdru…
#> $ timezone_original  <chr> "UTC", "UTC", "UTC", "UTC", "UTC", "UTC", "UTC", "U…
#> $ site_long          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ interval           <chr> "min10", "min10", "min10", "min10", "min10", "min10…
#> $ parameter          <chr> "T", "Hr", "p", "p", "WVs_max", "WVs", "WD", "SunDu…

fn <- system.file("extdata", "smn.txt", package = "rOstluft.data", mustWork = TRUE)
data <- read_smn(fn, na.rm = FALSE)
data <- dplyr::arrange(data, .data$starttime)
data
#> # A tibble: 60 × 6
#>    starttime           site  parameter interval unit  value
#>    <dttm>              <fct> <fct>     <fct>    <fct> <dbl>
#>  1 2018-01-01 00:50:00 CHU   dkl010z0  min10    NA    216  
#>  2 2018-01-01 00:50:00 CHU   fkl010z0  min10    NA      1.7
#>  3 2018-01-01 00:50:00 CHU   fkl010z1  min10    NA      3  
#>  4 2018-01-01 00:50:00 CHU   gre000z0  min10    NA      5  
#>  5 2018-01-01 00:50:00 CHU   hto000s0  min10    NA     NA  
#>  6 2018-01-01 00:50:00 CHU   prestas0  min10    NA    946. 
#>  7 2018-01-01 00:50:00 CHU   rre150z0  min10    NA      0  
#>  8 2018-01-01 00:50:00 CHU   sre000z0  min10    NA      0  
#>  9 2018-01-01 00:50:00 CHU   tre200s0  min10    NA      2.3
#> 10 2018-01-01 00:50:00 CHU   ure200s0  min10    NA     82.7
#> # ℹ 50 more rows

# data contains no units, cryptic SwissMetNet parameter names and abbreviations for site.
# And the meta data for parameter rre150z0 is missing. Perfect!

# too lazy to update meta, add unit mapping based on SwissMetNet parameter names
# and we want to overwrite the mapping for dkl010z0 anyway => use replace
res <- meta_apply(data, meta, "parameter", "unit", "parameter_original", "unit",
         mode = "replace",replacements = list(rre150z0 = "unit1", dkl010z0 = "unit2"))
#> apply meta data$unit = meta[meta$parameter_original == data$parameter]$unit:
#>   missing keys in meta$parameter_original: rre150z0
#>   replacements used: dkl010z0, rre150z0
res
#> # A tibble: 60 × 6
#>    starttime           site  parameter interval unit  value
#>    <dttm>              <fct> <fct>     <fct>    <fct> <dbl>
#>  1 2018-01-01 00:50:00 CHU   dkl010z0  min10    unit2 216  
#>  2 2018-01-01 00:50:00 CHU   fkl010z0  min10    m/s     1.7
#>  3 2018-01-01 00:50:00 CHU   fkl010z1  min10    m/s     3  
#>  4 2018-01-01 00:50:00 CHU   gre000z0  min10    W/m2    5  
#>  5 2018-01-01 00:50:00 CHU   hto000s0  min10    cm     NA  
#>  6 2018-01-01 00:50:00 CHU   prestas0  min10    hPa   946. 
#>  7 2018-01-01 00:50:00 CHU   rre150z0  min10    unit1   0  
#>  8 2018-01-01 00:50:00 CHU   sre000z0  min10    min     0  
#>  9 2018-01-01 00:50:00 CHU   tre200s0  min10    °C      2.3
#> 10 2018-01-01 00:50:00 CHU   ure200s0  min10    %      82.7
#> # ℹ 50 more rows

# rename the SwissMetNet Parameters, still no mapping for rre150z0, we aren't
# interested in the data and drop it
res2 <- meta_apply(res, meta, "parameter", "parameter",
         "parameter_original", "parameter", mode = "drop")
#> Warning: apply meta data$parameter = meta[meta$parameter_original == data$parameter]$parameter:
#>   missing keys in meta$parameter_original: rre150z0
#>   dropping missing prameters
res2
#> # A tibble: 54 × 6
#>    starttime           site  parameter interval unit  value
#>    <dttm>              <fct> <fct>     <fct>    <fct> <dbl>
#>  1 2018-01-01 00:50:00 CHU   WD        min10    unit2 216  
#>  2 2018-01-01 00:50:00 CHU   WVs       min10    m/s     1.7
#>  3 2018-01-01 00:50:00 CHU   WVs_max   min10    m/s     3  
#>  4 2018-01-01 00:50:00 CHU   StrGlo    min10    W/m2    5  
#>  5 2018-01-01 00:50:00 CHU   SnowDep   min10    cm     NA  
#>  6 2018-01-01 00:50:00 CHU   p         min10    hPa   946. 
#>  7 2018-01-01 00:50:00 CHU   SunDur    min10    min     0  
#>  8 2018-01-01 00:50:00 CHU   T         min10    °C      2.3
#>  9 2018-01-01 00:50:00 CHU   Hr        min10    %      82.7
#> 10 2018-01-01 01:00:00 CHU   WD        min10    unit2 209  
#> # ℹ 44 more rows

# or we keep it
res <- meta_apply(res, meta, "parameter", "parameter",
         "parameter_original", "parameter", mode = "keep")
#> Warning: apply meta data$parameter = meta[meta$parameter_original == data$parameter]$parameter:
#>   missing keys in meta$parameter_original: rre150z0
#>   keeping values: rre150z0
res
#> # A tibble: 60 × 6
#>    starttime           site  parameter interval unit  value
#>    <dttm>              <fct> <fct>     <fct>    <fct> <dbl>
#>  1 2018-01-01 00:50:00 CHU   WD        min10    unit2 216  
#>  2 2018-01-01 00:50:00 CHU   WVs       min10    m/s     1.7
#>  3 2018-01-01 00:50:00 CHU   WVs_max   min10    m/s     3  
#>  4 2018-01-01 00:50:00 CHU   StrGlo    min10    W/m2    5  
#>  5 2018-01-01 00:50:00 CHU   SnowDep   min10    cm     NA  
#>  6 2018-01-01 00:50:00 CHU   p         min10    hPa   946. 
#>  7 2018-01-01 00:50:00 CHU   rre150z0  min10    unit1   0  
#>  8 2018-01-01 00:50:00 CHU   SunDur    min10    min     0  
#>  9 2018-01-01 00:50:00 CHU   T         min10    °C      2.3
#> 10 2018-01-01 00:50:00 CHU   Hr        min10    %      82.7
#> # ℹ 50 more rows

# rename the site abbreviation to the site name, strict should work
res <- meta_apply(res, meta, "site", "site", "site_short", "site")
res
#> # A tibble: 60 × 6
#>    starttime           site  parameter interval unit  value
#>    <dttm>              <fct> <fct>     <fct>    <fct> <dbl>
#>  1 2018-01-01 00:50:00 Chur  WD        min10    unit2 216  
#>  2 2018-01-01 00:50:00 Chur  WVs       min10    m/s     1.7
#>  3 2018-01-01 00:50:00 Chur  WVs_max   min10    m/s     3  
#>  4 2018-01-01 00:50:00 Chur  StrGlo    min10    W/m2    5  
#>  5 2018-01-01 00:50:00 Chur  SnowDep   min10    cm     NA  
#>  6 2018-01-01 00:50:00 Chur  p         min10    hPa   946. 
#>  7 2018-01-01 00:50:00 Chur  rre150z0  min10    unit1   0  
#>  8 2018-01-01 00:50:00 Chur  SunDur    min10    min     0  
#>  9 2018-01-01 00:50:00 Chur  T         min10    °C      2.3
#> 10 2018-01-01 00:50:00 Chur  Hr        min10    %      82.7
#> # ℹ 50 more rows