例子
# \donttest{
lahman_s <- dbplyr::lahman_sqlite()
#> Creating table: AllstarFull
#> Creating table: Appearances
#> Creating table: AwardsManagers
#> Creating table: AwardsPlayers
#> Creating table: AwardsShareManagers
#> Creating table: AwardsSharePlayers
#> Creating table: Batting
#> Creating table: BattingPost
#> Creating table: CollegePlaying
#> Creating table: Fielding
#> Creating table: FieldingOF
#> Creating table: FieldingOFsplit
#> Creating table: FieldingPost
#> Creating table: HallOfFame
#> Creating table: HomeGames
#> Creating table: LahmanData
#> Creating table: Managers
#> Creating table: ManagersHalf
#> Creating table: Parks
#> Creating table: People
#> Creating table: Pitching
#> Creating table: PitchingPost
#> Creating table: Salaries
#> Creating table: Schools
#> Creating table: SeriesPost
#> Creating table: Teams
#> Creating table: TeamsFranchises
#> Creating table: TeamsHalf
batting <- tbl(lahman_s, "Batting")
batting %>% show_query()
#> <SQL>
#> SELECT *
#> FROM `Batting`
batting %>% explain()
#> <SQL>
#> SELECT *
#> FROM `Batting`
#>
#> <PLAN>
#> id parent notused detail
#> 1 2 0 0 SCAN Batting
# The batting database has indices on all ID variables:
# SQLite automatically picks the most restrictive index
batting %>% filter(lgID == "NL" & yearID == 2000L) %>% explain()
#> <SQL>
#> SELECT *
#> FROM `Batting`
#> WHERE (`lgID` = 'NL' AND `yearID` = 2000)
#>
#> <PLAN>
#> id parent notused detail
#> 1 3 0 0 SEARCH Batting USING INDEX Batting_yearID (yearID=?)
# OR's will use multiple indexes
batting %>% filter(lgID == "NL" | yearID == 2000) %>% explain()
#> <SQL>
#> SELECT *
#> FROM `Batting`
#> WHERE (`lgID` = 'NL' OR `yearID` = 2000.0)
#>
#> <PLAN>
#> id parent notused detail
#> 1 4 0 0 MULTI-INDEX OR
#> 2 5 4 0 INDEX 1
#> 3 11 5 0 SEARCH Batting USING INDEX Batting_lgID (lgID=?)
#> 4 16 4 0 INDEX 2
#> 5 22 16 0 SEARCH Batting USING INDEX Batting_yearID (yearID=?)
# Joins will use indexes in both tables
teams <- tbl(lahman_s, "Teams")
batting %>% left_join(teams, c("yearID", "teamID")) %>% explain()
#> <SQL>
#> SELECT
#> `playerID`,
#> `Batting`.`yearID` AS `yearID`,
#> `stint`,
#> `Batting`.`teamID` AS `teamID`,
#> `Batting`.`lgID` AS `lgID.x`,
#> `Batting`.`G` AS `G.x`,
#> `Batting`.`AB` AS `AB.x`,
#> `Batting`.`R` AS `R.x`,
#> `Batting`.`H` AS `H.x`,
#> `Batting`.`X2B` AS `X2B.x`,
#> `Batting`.`X3B` AS `X3B.x`,
#> `Batting`.`HR` AS `HR.x`,
#> `RBI`,
#> `Batting`.`SB` AS `SB.x`,
#> `Batting`.`CS` AS `CS.x`,
#> `Batting`.`BB` AS `BB.x`,
#> `Batting`.`SO` AS `SO.x`,
#> `IBB`,
#> `Batting`.`HBP` AS `HBP.x`,
#> `SH`,
#> `Batting`.`SF` AS `SF.x`,
#> `GIDP`,
#> `Teams`.`lgID` AS `lgID.y`,
#> `franchID`,
#> `divID`,
#> `Rank`,
#> `Teams`.`G` AS `G.y`,
#> `Ghome`,
#> `W`,
#> `L`,
#> `DivWin`,
#> `WCWin`,
#> `LgWin`,
#> `WSWin`,
#> `Teams`.`R` AS `R.y`,
#> `Teams`.`AB` AS `AB.y`,
#> `Teams`.`H` AS `H.y`,
#> `Teams`.`X2B` AS `X2B.y`,
#> `Teams`.`X3B` AS `X3B.y`,
#> `Teams`.`HR` AS `HR.y`,
#> `Teams`.`BB` AS `BB.y`,
#> `Teams`.`SO` AS `SO.y`,
#> `Teams`.`SB` AS `SB.y`,
#> `Teams`.`CS` AS `CS.y`,
#> `Teams`.`HBP` AS `HBP.y`,
#> `Teams`.`SF` AS `SF.y`,
#> `RA`,
#> `ER`,
#> `ERA`,
#> `CG`,
#> `SHO`,
#> `SV`,
#> `IPouts`,
#> `HA`,
#> `HRA`,
#> `BBA`,
#> `SOA`,
#> `E`,
#> `DP`,
#> `FP`,
#> `name`,
#> `park`,
#> `attendance`,
#> `BPF`,
#> `PPF`,
#> `teamIDBR`,
#> `teamIDlahman45`,
#> `teamIDretro`
#> FROM `Batting`
#> LEFT JOIN `Teams`
#> ON (
#> `Batting`.`yearID` = `Teams`.`yearID` AND
#> `Batting`.`teamID` = `Teams`.`teamID`
#> )
#>
#> <PLAN>
#> id parent notused
#> 1 4 0 0
#> 2 6 0 0
#> detail
#> 1 SCAN Batting
#> 2 SEARCH Teams USING INDEX Teams_yearID (yearID=?) LEFT-JOIN
# }
相關用法
- R dplyr group_trim 修剪分組結構
- R dplyr slice 使用行的位置對行進行子集化
- R dplyr copy_to 將本地數據幀複製到遠程src
- R dplyr sample_n 從表中采樣 n 行
- R dplyr consecutive_id 為連續組合生成唯一標識符
- R dplyr row_number 整數排名函數
- R dplyr band_members 樂隊成員
- R dplyr mutate-joins 變異連接
- R dplyr nth 從向量中提取第一個、最後一個或第 n 個值
- R dplyr coalesce 找到第一個非缺失元素
- R dplyr group_split 按組分割 DataFrame
- R dplyr mutate 創建、修改和刪除列
- R dplyr order_by 用於排序窗口函數輸出的輔助函數
- R dplyr context 有關“當前”組或變量的信息
- R dplyr percent_rank 比例排名函數
- R dplyr recode 重新編碼值
- R dplyr starwars 星球大戰人物
- R dplyr desc 降序
- R dplyr between 檢測值落在指定範圍內的位置
- R dplyr cumall 任何、全部和平均值的累積版本
- R dplyr group_map 對每個組應用一個函數
- R dplyr do 做任何事情
- R dplyr nest_join 嵌套連接
- R dplyr pull 提取單列
- R dplyr group_by_all 按選擇的變量進行分組
注:本文由純淨天空篩選整理自Hadley Wickham等大神的英文原創作品 Explain details of a tbl。非經特殊聲明,原始代碼版權歸原作者所有,本譯文未經允許或授權,請勿轉載或複製。