์ค๋ SO ์ ๋ฌธ์ ์ ๋ถ๋ช์ณค์ต๋๋ค.
actions = data.table(User_id = c("Carl","Carl","Carl","Lisa","Moe"),
category = c(1,1,2,2,1),
value= c(10,20,30,40,50))
users = actions[, other_var := 1, by=User_id]
# verbose says: the following is not optimized
users[, value_one := 0 ]
users[actions[category==1], value_one := sum(value), on="User_id", by=.EACHI, verbose=TRUE]
# verbose says: the following is optimized
rbind(
actions[category==1],
unique(actions[,"User_id", with=FALSE])[, value := 0 ],
fill=TRUE)[, sum(value), by=User_id, verbose=TRUE]
๋ณ์๊ฐ ๊ฒฐ๊ตญ users
๋ก ๋๋์ผ ํ๋ค๋ ์ ์ ๊ณ ๋ คํ ๋ ์ฒซ ๋ฒ์งธ ๋ฐฉ๋ฒ์ ๊ด์ฉ์ ์ผ๋ก ๋ณด์
๋๋ค.
๊ธฐํ: https://stackoverflow.com/a/47338118/ (gtail)
๋ ๋ค๋ฅธ https://stackoverflow.com/a/51569126/ ์ DT[, mx := max(pt), by=Subject][, diff := mx - pt][]
ํด์ผ ํฉ๋๋ค.
๋ฉ๋ชจ๋ฆฌ ์ฑ๋ฅ์ ํนํ ๊ด์ฌ์ด ์๋ ๋ ๋ค๋ฅธ ํญ๋ชฉ: https://stackoverflow.com/q/52189712 "data.table ์ฐธ์กฐ ์๋ฏธ: ๋ชจ๋ ์ด์ ํตํ ๋ฐ๋ณต์ ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ"
๋ค๋ฅธ ํ๋๋ scale
/demean ๋ค์ค ๋ณ์๋ฅผ ์ํฉ๋๋ค: https://stackoverflow.com/q/52528123
ํ์ ์งํฉ ์กฐ๊ฑด์ผ๋ก ๊ทธ๋ฃน๋ณ๋ก ์ต๋๊ฐ์ ์ทจํ๊ณ :=(akrun์ ๋ต๋ณ ์ฐธ์กฐ) https://stackoverflow.com/a/54911855/๋ฅผ ์ถ๊ฐํ๋ ๊ฒ๋ #971์ ์ด๋ฏธ ์๋ฃ๋ ๋ถ๋ถ๊ณผ ๊ด๋ จ์ด ์์ต๋๋ค.
์ด ๊ธฐ๋ฅ์ ํ์ฑํํ๋ฉด ์ฝ๊ฐ์ ์์ ์ด ํ์ํ๋๋ผ๋ ๋ณต์กํ ํํ์ GForce๋ฅผ ํจ๊ณผ์ ์ผ๋ก ์ฌ์ฉํ ์ ์๋ค๋ ์ ์ ๊ฐ์กฐํ๊ณ ์ถ์์ต๋๋ค. ์๋ฅผ ๋ค์ด ์ด ๊ฒ์๋ฌผ์์ ๋ค์ ์ ์ํด ํ์ฑํํ๋ ๋ฐฉ๋ฒ์
slope <- function(x, y) {
x_ux <- x - mean(x)
uy <- mean(y)
sum(x_ux * (y - uy)) / sum(x_ux ^ 2)
}
ํจ์ผ๋ก์จ:
DT <- data.table(grp, x, y)
setkey(DT, grp)
DTsum <- DT[, .(ux=mean(x), uy=mean(y)), keyby=grp]
DT[DTsum, `:=`(x_ux=x - ux, y_uy=y - uy)]
DT[, `:=`(x_ux.y_uy=x_ux * y_uy, x_ux2=x_ux^2)]
DTsum <- DT[, .(x_ux.y_uy=sum(x_ux.y_uy), x_ux2=sum(x_ux2)), keyby=grp]
res.slope.dt2 <- DTsum[, .(grp, V1=x_ux.y_uy / x_ux2)]
๋ฐ๋ฉด์ GForce๊ฐ :=
์์ ์ง์๋๋ค๋ฉด ์ฐ๋ฆฌ๋ ๋ค์์ ํ ์ ์์ต๋๋ค:
DT <- data.table(grp, x, y)
DT[, `:=`(ux=mean(x), uy=mean(y)), keyby=grp]
DT[, `:=`(x_ux=x - ux, y_uy=y - uy)]
DT[, `:=`(x_ux.y_uy=x_ux * y_uy, x_ux2=x_ux^2)]
DTsum <- DT[, .(x_ux.y_uy=sum(x_ux.y_uy), x_ux2=sum(x_ux2)), keyby=grp]
res.slope.dt3 <- DTsum[, .(grp, x_ux.y_uy/x_ux2)]
๋ ๊นจ๋ํด ๋ณด์ด๊ณ ๋ ๋นจ๋ผ์ผ ํฉ๋๋ค.
@MichaelChirico์์ ํ ๋ก ์ ํตํด ์ด ๋ฌธ์ ์ ๋งค์ฐ ๊ฐ๊น์ด ์ฌ์ด์ด ๋ค์๊ณผ
> DT <- data.table(x, y, grp)
> DT[, .(x, mean(x)), keyby=grp]
Detected that j uses these columns: x
Finding groups using forderv ... 1.049s elapsed (0.946s cpu)
Finding group sizes from the positions (can be avoided to save RAM) ... 0.011s elapsed (0.011s cpu)
lapply optimization is on, j unchanged as 'list(x, mean(x))'
GForce is on, left j unchanged
Old mean optimization changed j from 'list(x, mean(x))' to 'list(x, .External(Cfastmean, x, FALSE))'
Making each group and running j (GForce FALSE) ...
collecting discontiguous groups took 1.293s for 999953 groups
eval(j) took 1.860s for 999953 calls
5.517s elapsed (3.862s cpu)
grp x V2
1: 1 0.2151365 0.5512966
2: 1 0.5358256 0.5512966
3: 1 0.8496598 0.5512966
4: 1 0.8480730 0.5512966
5: 1 0.3464458 0.5512966
---
9999996: 1000000 0.2601940 0.5474986
9999997: 1000000 0.7940921 0.5474986
9999998: 1000000 0.3825493 0.5474986
9999999: 1000000 0.1786861 0.5474986
10000000: 1000000 0.9179119 0.5474986
#523๊ณผ์ ๊ต์ฐจ ๋งํฌ.
๊ฐ์ฅ ์ ์ฉํ ๋๊ธ
์ด ๊ธฐ๋ฅ์ ํ์ฑํํ๋ฉด ์ฝ๊ฐ์ ์์ ์ด ํ์ํ๋๋ผ๋ ๋ณต์กํ ํํ์ GForce๋ฅผ ํจ๊ณผ์ ์ผ๋ก ์ฌ์ฉํ ์ ์๋ค๋ ์ ์ ๊ฐ์กฐํ๊ณ ์ถ์์ต๋๋ค. ์๋ฅผ ๋ค์ด ์ด ๊ฒ์๋ฌผ์์ ๋ค์ ์ ์ํด ํ์ฑํํ๋ ๋ฐฉ๋ฒ์
ํจ์ผ๋ก์จ:
๋ฐ๋ฉด์ GForce๊ฐ
:=
์์ ์ง์๋๋ค๋ฉด ์ฐ๋ฆฌ๋ ๋ค์์ ํ ์ ์์ต๋๋ค:๋ ๊นจ๋ํด ๋ณด์ด๊ณ ๋ ๋นจ๋ผ์ผ ํฉ๋๋ค.