Π ΡΠ°ΠΊΠΆΠ΅ ΠΎΡΠ΄Π΅Π»ΡΠ½ΡΠ΅ Π»ΠΈΡΠ° Π² Π³ΡΡΠΏΠΏΠ°Ρ
species <- iris %.%
group_by(Species) %.%
summarise(wt = sum(Sepal.Length)) %.%
sample_n(5, replace = T, weight = wt) %.%
select(-wt)
inner_join(species, iris)
ΠΠ½ΡΠ΅ΡΠ΅ΡΠ½ΠΎ, ΠΏΠΎΡΠ΅ΠΌΡ ΡΡΠΎ Π±ΡΠ»ΠΎ Π·Π°ΠΊΡΡΡΠΎ? ΠΠΎΡ ΠΎΠΆΠ΅ Π½Π° ΠΏΠΎΡΠ΅Π½ΡΠΈΠ°Π»ΡΠ½ΠΎ ΠΏΠΎΠ»Π΅Π·Π½ΡΡ ΡΡΠ½ΠΊΡΠΈΡ
iris %>%
group_by(Species) %>%
sample_n(1)
ΡΡΠΎΠ±Ρ Π²ΡΠ±ΡΠ°ΡΡ Π²ΡΠ΅ Π΄Π°Π½Π½ΡΠ΅ ΠΈΠ· ΡΠ»ΡΡΠ°ΠΉΠ½ΠΎΠ³ΠΎ Π²ΠΈΠ΄Π°, Π½Π°ΠΏΡΠΈΠΌΠ΅Ρ
Π― Π½Π΅ Π΄ΡΠΌΠ°Ρ, ΡΡΠΎ ΠΏΠΎΠ²Π΅Π΄Π΅Π½ΠΈΠ΅ sample_n
Π΄ΠΎΠ»ΠΆΠ½ΠΎ ΠΈΠ·ΠΌΠ΅Π½ΠΈΡΡΡΡ Π΄Π»Ρ Π³ΡΡΠΏΠΏ, ΠΏΠΎΡΠΎΠΌΡ ΡΡΠΎ Π²ΡΠ±ΠΎΡΠΊΠ° Π²Π½ΡΡΡΠΈ Π³ΡΡΠΏΠΏ - ΡΡΠΎ Π΅Π³ΠΎ ΠΈΠ½ΡΡΠΈΡΠΈΠ²Π½ΠΎΠ΅ ΠΏΠΎΠ²Π΅Π΄Π΅Π½ΠΈΠ΅. ΠΠ΄Π½Π°ΠΊΠΎ ΡΠ°ΡΡΠΎ Π±ΡΠ²Π°Π΅Ρ ΡΠ΄ΠΎΠ±Π½ΠΎ Π²ΡΠ±ΠΈΡΠ°ΡΡ Π³ΡΡΠΏΠΏΡ Π² ΡΠ΅Π»ΠΎΠΌ. ΠΡΠΎ Π΄ΠΎΠ»ΠΆΠ½Π° Π±ΡΡΡ Π²ΡΠΎΡΠ°Ρ ΡΡΠ½ΠΊΡΠΈΡ. ΠΠΎΡ ΠΌΠΎΡ ΡΠ΅Π°Π»ΠΈΠ·Π°ΡΠΈΡ:
sample_n_groups = function(tbl, size, replace = FALSE, weight=NULL) {
# regroup when done
grps = tbl %>% groups %>% unlist %>% as.character
# check length of groups non-zero
keep = tbl %>% summarise() %>% sample_n(size, replace, weight)
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% semi_join(keep) %>% group_by_(grps)
}
ΠΡΠΈΠΌΠ΅Ρ @rcorty ΡΠ°Π±ΠΎΡΠ°Π΅Ρ, ΠΊΠ°ΠΊ ΠΈ ΠΎΠΆΠΈΠ΄Π°Π»ΠΎΡΡ
iris %>% group_by(Species) %>% sample_n_groups(1)
+1
ΠΠ·ΠΌΠ΅Π½ΠΈΡΡ: ΠΈΠ·ΠΌΠ΅Π½Π΅Π½ΠΈΠ΅ Π½Π° dplyr
Π½Π°ΡΡΡΠΈΠ»ΠΎ ΡΡΠΎ ΡΠ΅ΡΠ΅Π½ΠΈΠ΅;
ΠΠ»Ρ ΡΠ΅Ρ
ΠΈΠ· Π²Π°Ρ, ΠΊΡΠΎ ΠΏΡΠΈΡΠ΅Π» ΡΡΠ΄Π° ΡΠ΅ΡΠ΅Π· ΠΏΠΎΠΈΡΠΊΠΎΠ²ΡΡ ΡΠΈΡΡΠ΅ΠΌΡ Π² ΠΏΠΎΠΈΡΠΊΠ°Ρ
ΡΡΠΎΠΉ ΡΡΠ½ΠΊΡΠΈΠΎΠ½Π°Π»ΡΠ½ΠΎΡΡΠΈ, ΡΠ΅Π°Π»ΠΈΠ·Π°ΡΠΈΡ @MarcusWalz Π½Π΅ Π²ΠΊΠ»ΡΡΠ°Π΅Ρ Π²ΡΠ±ΠΎΡΠΊΡ Ρ Π·Π°ΠΌΠ΅Π½ΠΎΠΉ, ΠΊΠΎΠ³Π΄Π° replace = TRUE
. Π ΡΠ΅Π°Π»ΠΈΠ·Π°ΡΠΈΠΈ Π½Π΅ΠΎΠ±Ρ
ΠΎΠ΄ΠΈΠΌΠΎ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°ΡΡ right_join
(ΠΈΠ»ΠΈ left_join
ΠΈΠ»ΠΈ inner_join
) Π΄Π»Ρ ΡΠΎΡ
ΡΠ°Π½Π΅Π½ΠΈΡ Π΄ΡΠ±Π»ΠΈΠΊΠ°ΡΠΎΠ²:
sample_n_groups = function(tbl, size, replace = FALSE, weight=NULL) {
# regroup when done
grps = tbl %>% groups %>% unlist %>% as.character
# check length of groups non-zero
keep = tbl %>% summarise() %>% sample_n(size, replace, weight)
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% right_join(keep, by=grps) %>% group_by_(grps)
}
Π‘Π°ΠΌΠΎΠ·Π°Π³ΡΡΠ·ΠΊΠ° ΠΊΠ»Π°ΡΡΠ΅ΡΠ° - ΡΡΠΎ ΡΠ°ΠΌΡΠΉ ΡΠ°ΡΠΏΡΠΎΡΡΡΠ°Π½Π΅Π½Π½ΡΠΉ Π²Π°ΡΠΈΠ°Π½Ρ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°Π½ΠΈΡ ΡΡΠΎΠΉ ΡΡΠ½ΠΊΡΠΈΠΈ.
@drhagen , Π² Π²Π°ΡΠ΅ΠΉ ΡΠ΅Π°Π»ΠΈΠ·Π°ΡΠΈΠΈ Π΅ΡΡΡ Π»ΠΈ Ρ Π²Π°Ρ ΠΏΡΠ΅Π΄Π»ΠΎΠΆΠ΅Π½ΠΈΡ ΠΏΠΎ ΡΠΎΠ·Π΄Π°Π½ΠΈΡ Π½ΠΎΠ²ΠΎΠ³ΠΎ ΡΠ½ΠΈΠΊΠ°Π»ΡΠ½ΠΎΠ³ΠΎ ΠΈΠ΄Π΅Π½ΡΠΈΡΠΈΠΊΠ°ΡΠΎΡΠ° Π³ΡΡΠΏΠΏΡ?
ΠΠ° ΡΠ°ΠΌΠΎΠΌ Π΄Π΅Π»Π΅ ΡΡΠΎ Π΄ΠΎΠ²ΠΎΠ»ΡΠ½ΠΎ ΠΏΡΠΎΡΡΠΎ:
sample_n_groups = function(tbl, size, replace = FALSE, weight=NULL) {
# regroup when done
grps = tbl %>% groups %>% unlist %>% as.character
# check length of groups non-zero
keep = tbl %>% summarise() %>% sample_n(size, replace, weight) %>%
mutate(unique_id = 1:NROW(.))
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% right_join(keep, by=grps) %>% group_by_(grps)
}
ΠΡΠ²Π΅Ρ @drhagen Π²ΡΡΠ΅ Π²ΡΠ³Π»ΡΠ΄ΠΈΡ ΡΡΡΠ°ΡΠ΅Π²ΡΠΈΠΌ. ΠΠ°ΠΆΠ΅ΡΡΡ, ΡΠ΅ΠΉΡΠ°Ρ ΡΡΠΎ ΡΠ°Π±ΠΎΡΠ°Π΅Ρ:
sample_n_groups = function(tbl, size, replace = FALSE, weight = NULL) {
# regroup when done
grps = tbl %>% groups %>% lapply(as.character) %>% unlist
# check length of groups non-zero
keep = tbl %>% summarise() %>% ungroup() %>% sample_n(size, replace, weight)
# keep only selected groups, regroup because joins change count.
# regrouping may be unnecessary but joins do something funky to grouping variable
tbl %>% right_join(keep, by=grps) %>% group_by_(.dots = grps)
}
Π‘Π°ΠΌΡΠΉ ΠΏΠΎΠ»Π΅Π·Π½ΡΠΉ ΠΊΠΎΠΌΠΌΠ΅Π½ΡΠ°ΡΠΈΠΉ
ΠΡΠ²Π΅Ρ @drhagen Π²ΡΡΠ΅ Π²ΡΠ³Π»ΡΠ΄ΠΈΡ ΡΡΡΠ°ΡΠ΅Π²ΡΠΈΠΌ. ΠΠ°ΠΆΠ΅ΡΡΡ, ΡΠ΅ΠΉΡΠ°Ρ ΡΡΠΎ ΡΠ°Π±ΠΎΡΠ°Π΅Ρ: