๊ทธ๋ฃน ๋ด ๊ฐ์ธ๋ฟ๋ง ์๋๋ผ
species <- iris %.%
group_by(Species) %.%
summarise(wt = sum(Sepal.Length)) %.%
sample_n(5, replace = T, weight = wt) %.%
select(-wt)
inner_join(species, iris)
์ด๊ฒ ์ ๋ซํ๋์ง ๊ถ๊ธํฉ๋๋ค. ์ ์ฌ์ ์ผ๋ก ์ ์ฉํ ๊ธฐ๋ฅ์ธ ๊ฒ ๊ฐ์ต๋๋ค
iris %>%
group_by(Species) %>%
sample_n(1)
์์์ ์ข ์์ ๋ชจ๋ ๋ฐ์ดํฐ๋ฅผ ์ ํํ๋ ค๋ฉด, ์๋ฅผ ๋ค์ด
๊ทธ๋ฃน ๋ด ์ํ๋ง์ด ์ง๊ด์ ์ธ ๋์์ด๊ธฐ ๋๋ฌธ์ sample_n
์ ๋์์ด ๊ทธ๋ฃน์ ๋ํด ๋ณ๊ฒฝ๋์ด์ผ ํ๋ค๊ณ ์๊ฐํ์ง ์์ต๋๋ค. ๊ทธ๋ฌ๋ ๊ทธ๋ฃน ์ ์ฒด๋ฅผ ์ํ๋งํ ์ ์๋ ๊ฒ์ด ํธ๋ฆฌํ ๊ฒฝ์ฐ๊ฐ ๋ง์ต๋๋ค. ์ด๊ฒ์ ๋ ๋ฒ์งธ ๊ธฐ๋ฅ์ด์ด์ผ ํฉ๋๋ค. ๋ด ๊ตฌํ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค.
sample_n_groups = function(tbl, size, replace = FALSE, weight=NULL) {
# regroup when done
grps = tbl %>% groups %>% unlist %>% as.character
# check length of groups non-zero
keep = tbl %>% summarise() %>% sample_n(size, replace, weight)
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% semi_join(keep) %>% group_by_(grps)
}
@rcorty ์ ์์ ๋ ์์๋๋ก ์๋ํฉ๋๋ค.
iris %>% group_by(Species) %>% sample_n_groups(1)
+1
ํธ์ง: dplyr
๋ํ ๋ณ๊ฒฝ์ผ๋ก ์ธํด ์ด ์๋ฃจ์
์ด ์ค๋จ๋์์ต๋๋ค.
์ด ๊ธฐ๋ฅ์ ์ฐพ๊ณ ์๋ ๊ฒ์ ์์ง์ ํตํด ์ฌ๊ธฐ์ ๋์ฐฉํ ๋ถ๋ค์ ์ํด @MarcusWalz์ ์ํ ๊ตฌํ์ replace = TRUE
๋ ๋์ฒด๋ก ์ํ๋งํ์ง ์์ต๋๋ค. ์ค๋ณต์ ์ ์งํ๋ ค๋ฉด ๊ตฌํ์์ right_join
(๋๋ left_join
๋๋ inner_join
)๋ฅผ ์ฌ์ฉํด์ผ ํฉ๋๋ค.
sample_n_groups = function(tbl, size, replace = FALSE, weight=NULL) {
# regroup when done
grps = tbl %>% groups %>% unlist %>% as.character
# check length of groups non-zero
keep = tbl %>% summarise() %>% sample_n(size, replace, weight)
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% right_join(keep, by=grps) %>% group_by_(grps)
}
ํด๋ฌ์คํฐ ๋ถํธ์คํธ๋ํ์ ์ด ๊ธฐ๋ฅ์ ๊ด๋ฒ์ํ ์ฌ์ฉ ์ฌ๋ก์ ๋๋ค.
@drhagen , ๊ตฌํ ์ ์๋ก์ด ๊ณ ์ ๊ทธ๋ฃน ID๋ฅผ ์์ฑํ๋ ๋ฐฉ๋ฒ์ ๋ํ ์ ์ ์ฌํญ์ด ์์ต๋๊น?
์ฌ์ค ์ด๊ฒ์ ์์ฃผ ์ฝ์ต๋๋ค.
sample_n_groups = function(tbl, size, replace = FALSE, weight=NULL) {
# regroup when done
grps = tbl %>% groups %>% unlist %>% as.character
# check length of groups non-zero
keep = tbl %>% summarise() %>% sample_n(size, replace, weight) %>%
mutate(unique_id = 1:NROW(.))
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% right_join(keep, by=grps) %>% group_by_(grps)
}
@drhagen์ ์ ๋ต๋ณ์
sample_n_groups = function(tbl, size, replace = FALSE, weight = NULL) {
# regroup when done
grps = tbl %>% groups %>% lapply(as.character) %>% unlist
# check length of groups non-zero
keep = tbl %>% summarise() %>% ungroup() %>% sample_n(size, replace, weight)
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% right_join(keep, by=grps) %>% group_by_(.dots = grps)
}
๊ฐ์ฅ ์ ์ฉํ ๋๊ธ
@drhagen์ ์ ๋ต๋ณ์