当前位置: 首页>>代码示例 >>用法及示例精选 >>正文


R dtplyr lazy_dt 创建一个“惰性”data.table 以与 dplyr 动词一起使用


惰性 data.table 惰性捕获 dplyr 动词的意图,仅在请求时才实际执行计算(使用 collect()pull()as.data.frame()data.table::as.data.table()tibble::as_tibble() )。这允许 dtplyr 将 dplyr 动词转换为尽可能少的 data.table 表达式,从而实现高性能翻译。

有关翻译的详细信息,请参阅vignette("translation")

用法

lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL)

参数

x

数据表(或者可以强制为数据表的东西)。

name

(可选)提供要在生成的表达式中使用的名称。仅供专家使用。

immutable

如果 TRUEx 被视为不可变,并且永远不会被 dtplyr 生成的任何代码修改。或者,您可以设置 immutable = FALSE 以允许 dtplyr 修改输入对象。

key_by

使用 select() 语义设置数据帧的键(例如 key_by = c(key1, key2)

这使用data.table::setkey() 对表进行排序并构建索引。这将显著提高使用键的子集、摘要和联接的性能。

有关详细信息,请参阅vignette("datatable-keys-fast-subset")

例子

library(dplyr, warn.conflicts = FALSE)

# If you have a data.table, using it with any dplyr generic will
# automatically convert it to a lazy_dt object
dt <- data.table::data.table(x = 1:10, y = 10:1)
dt %>% filter(x == y)
#> Empty data.table (0 rows and 2 cols): x,y
dt %>% mutate(z = x + y)
#>      x  y  z
#>  1:  1 10 11
#>  2:  2  9 11
#>  3:  3  8 11
#>  4:  4  7 11
#>  5:  5  6 11
#>  6:  6  5 11
#>  7:  7  4 11
#>  8:  8  3 11
#>  9:  9  2 11
#> 10: 10  1 11

# Note that dtplyr will avoid mutating the input data.table, so the
# previous translation includes an automatic copy(). You can avoid this
# with a manual call to lazy_dt()
dt %>%
  lazy_dt(immutable = FALSE) %>%
  mutate(z = x + y)
#> Source: local data table [10 x 3]
#> Call:   `_DT20`[, `:=`(z = x + y)]
#> 
#>       x     y     z
#>   <int> <int> <int>
#> 1     1    10    11
#> 2     2     9    11
#> 3     3     8    11
#> 4     4     7    11
#> 5     5     6    11
#> 6     6     5    11
#> # … with 4 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# If you have a data frame, you can use lazy_dt() to convert it to
# a data.table:
mtcars2 <- lazy_dt(mtcars)
mtcars2
#> Source: local data table [32 x 11]
#> Call:   `_DT21`
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  21       6   160   110  3.9   2.62  16.5     0     1     4     4
#> 2  21       6   160   110  3.9   2.88  17.0     0     1     4     4
#> 3  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1
#> 4  21.4     6   258   110  3.08  3.22  19.4     1     0     3     1
#> 5  18.7     8   360   175  3.15  3.44  17.0     0     0     3     2
#> 6  18.1     6   225   105  2.76  3.46  20.2     1     0     3     1
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(mpg:cyl)
#> Source: local data table [32 x 2]
#> Call:   `_DT21`[, .(mpg, cyl)]
#> 
#>     mpg   cyl
#>   <dbl> <dbl>
#> 1  21       6
#> 2  21       6
#> 3  22.8     4
#> 4  21.4     6
#> 5  18.7     8
#> 6  18.1     6
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(x = mpg, y = cyl)
#> Source: local data table [32 x 2]
#> Call:   `_DT21`[, .(x = mpg, y = cyl)]
#> 
#>       x     y
#>   <dbl> <dbl>
#> 1  21       6
#> 2  21       6
#> 3  22.8     4
#> 4  21.4     6
#> 5  18.7     8
#> 6  18.1     6
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% filter(cyl == 4) %>% select(mpg)
#> Source: local data table [11 x 1]
#> Call:   `_DT21`[cyl == 4, .(mpg)]
#> 
#>     mpg
#>   <dbl>
#> 1  22.8
#> 2  24.4
#> 3  22.8
#> 4  32.4
#> 5  30.4
#> 6  33.9
#> # … with 5 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4)
#> Source: local data table [11 x 2]
#> Call:   `_DT21`[, .(mpg, cyl)][cyl == 4]
#> 
#>     mpg   cyl
#>   <dbl> <dbl>
#> 1  22.8     4
#> 2  24.4     4
#> 3  22.8     4
#> 4  32.4     4
#> 5  30.4     4
#> 6  33.9     4
#> # … with 5 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
#> Source: local data table [32 x 13]
#> Call:   copy(`_DT21`)[, `:=`(c("cyl2", "cyl4"), {
#>     cyl2 <- cyl * 2
#>     cyl4 <- cyl2 * 2
#>     .(cyl2, cyl4)
#> })]
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  21       6   160   110  3.9   2.62  16.5     0     1     4     4    12
#> 2  21       6   160   110  3.9   2.88  17.0     0     1     4     4    12
#> 3  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1     8
#> 4  21.4     6   258   110  3.08  3.22  19.4     1     0     3     1    12
#> 5  18.7     8   360   175  3.15  3.44  17.0     0     0     3     2    16
#> 6  18.1     6   225   105  2.76  3.46  20.2     1     0     3     1    12
#> # … with 26 more rows, and 1 more variable: cyl4 <dbl>
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
#> Source: local data table [32 x 2]
#> Call:   `_DT21`[, .(cyl2 = cyl * 2, vs2 = vs * 2)]
#> 
#>    cyl2   vs2
#>   <dbl> <dbl>
#> 1    12     0
#> 2    12     0
#> 3     8     2
#> 4    12     2
#> 5    16     0
#> 6    12     2
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2)
#> Source: local data table [14 x 12]
#> Call:   `_DT21`[cyl == 8][, `:=`(cyl2 = cyl * 2)]
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    16
#> 2  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    16
#> 3  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3    16
#> 4  17.3     8  276.   180  3.07  3.73  17.6     0     0     3     3    16
#> 5  15.2     8  276.   180  3.07  3.78  18       0     0     3     3    16
#> 6  10.4     8  472    205  2.93  5.25  18.0     0     0     3     4    16
#> # … with 8 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# Learn more about translation in vignette("translation")
by_cyl <- mtcars2 %>% group_by(cyl)
by_cyl %>% summarise(mpg = mean(mpg))
#> Source: local data table [3 x 2]
#> Call:   `_DT21`[, .(mpg = mean(mpg)), keyby = .(cyl)]
#> 
#>     cyl   mpg
#>   <dbl> <dbl>
#> 1     4  26.7
#> 2     6  19.7
#> 3     8  15.1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
by_cyl %>% mutate(mpg = mean(mpg))
#> Source: local data table [32 x 11]
#> Groups: cyl
#> Call:   copy(`_DT21`)[, `:=`(mpg = mean(mpg)), by = .(cyl)]
#> 
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  19.7     6   160   110  3.9   2.62  16.5     0     1     4     4
#> 2  19.7     6   160   110  3.9   2.88  17.0     0     1     4     4
#> 3  26.7     4   108    93  3.85  2.32  18.6     1     1     4     1
#> 4  19.7     6   258   110  3.08  3.22  19.4     1     0     3     1
#> 5  15.1     8   360   175  3.15  3.44  17.0     0     0     3     2
#> 6  19.7     6   225   105  2.76  3.46  20.2     1     0     3     1
#> # … with 26 more rows
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
by_cyl %>%
  filter(mpg < mean(mpg)) %>%
  summarise(hp = mean(hp))
#> Source: local data table [3 x 2]
#> Call:   `_DT21`[`_DT21`[, .I[mpg < mean(mpg)], by = .(cyl)]$V1, .(hp = mean(hp)), 
#>     keyby = .(cyl)]
#> 
#>     cyl    hp
#>   <dbl> <dbl>
#> 1     4  91.2
#> 2     6 132. 
#> 3     8 246. 
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
源代码:R/step-first.R

相关用法


注:本文由纯净天空筛选整理自Hadley Wickham等大神的英文原创作品 Create a "lazy" data.table for use with dplyr verbs。非经特殊声明,原始代码版权归原作者所有,本译文未经允许或授权,请勿转载或复制。