心理統計法('17) その8

放送大学「心理統計法(‘17)」第10章 その 3

授業ではrstanパッケージを使ってますが、MCMCpackパッケージを使ってみます。初学者ですので間違いが多々あると思います。

(Rスクリプトはここ)
早稲田大学文学部文学研究科 豊田研究室

(参考)
ベイジアンMCMCによる統計モデル
Using the ggmcmc package
Bayesian Inference With Stan ~番外編~
Exercise 2: Bayesian A/B testing using MCMC Metropolis-Hastings
政治学方法論 I

1
2
3
library(MCMCpack)
library(knitr)
#library(loo)

推測例3(2水準の主効果の分析)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#表10.13「知覚時間」データ入力
y3<-c(
35.24,31.29,31.00,32.23,30.13,26.27,30.71,29.88,31.11,26.99,
28.73,31.34,32.09,33.68,31.60,32.17,29.54,28.75,29.57,26.70,
29.59,30.51,32.05,28.99,28.10,31.29,28.48,30.80,33.88,33.52,
34.34,29.29,30.00,33.31,27.98,29.23,32.34,30.54,31.82,31.85,
27.88,30.26,29.96,24.01,29.48,34.10,30.69,27.32,26.67,33.97,
27.20,30.14,31.62,29.46,29.19,29.18,31.26,28.61,26.66,25.21,
28.52,29.13,26.07,30.27,27.26,26.49,30.62,29.11,29.28,30.80,
32.03,32.48,28.64,24.34,29.19,25.09,35.23,24.04,30.39,31.51)
A <- rep(1:4,each=10,times=2) #要因Aの水準の指定
B <- rep(1:2,each=40) #要因Bの水準の指定
#
#mat<-matrix(c(mean(y3[1:10]), mean(y3[11:20]),mean(y3[21:30]),mean(y3[31:40]),
# mean(y3[41:50]),mean(y3[51:60]),mean(y3[61:70]),mean(y3[71:80]) ),ncol=2,byrow=F)
dat<-data.frame(A,B,y3)
Data<-split(dat,list(dat$A,dat$B))
mat <- matrix(apply(sapply(Data, "[[", 3),2,mean),ncol=2,byrow=F)
#
#png("sinri10_03.png")
par(family="TakaoPMincho")
matplot(1:4, mat, type="l", ylab="時間(秒)", xlab="",main="S3氏のデータの平均値プロット", lwd=1.5,col=1:2, xaxt = "n",las=1,ylim=c(28,33))
matplot(1:4, mat, type="p", ylab="", xlab="",main="", pch=19,cex=2, col=1:2,xaxt = "n",ylim=c(28,33),add=T)
axis(1,1:4,labels=c("対照","聴音","音読","運動"))
text(x=2.5,y=31,labels="平常時",col=1)
text(x=2.5,y=28.5,labels="起床直後",col=2)
#dev.off()

1
2
3
4
5
mean_sd <- matrix(apply(sapply(Data, "[[", 3),2,mean),ncol=8)
mean_sd <- rbind(mean_sd,matrix(apply(sapply(Data, "[[", 3),2,sd)*sqrt(9/10),ncol=8) )
colnames(mean_sd)<-c("平常時 対照","平常時 聴音","平常時 音読","平常時 運動","起床直後 対照","起床直後 聴音","起床直後 音読","起床直後 運動")
rownames(mean_sd)<-c("平均","標準偏差")
kable(round(mean_sd,2))
平常時 対照 平常時 聴音 平常時 音読 平常時 運動 起床直後 対照 起床直後 聴音 起床直後 音読 起床直後 運動
平均 30.48 30.42 30.72 31.07 29.43 28.85 28.75 29.29
標準偏差 2.40 1.99 1.90 1.90 2.98 1.91 1.59 3.60
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fun <- function(beta, x1 , x2 , x3 , x4 ,x5 ,x6,x7,x8) {
# mu : beta[1]
# a1 : beta[2] , a2 : beta[3] , a3 : beta[4]
# b1 : beta[5]
# ab11 : beta[6] , ab21 : beta[7] , ab31 : beta[8]
# sigmaE : beta[9]
## a4 : -(beta[2]+beta[3]+beta[4])
## b2 : -beta[5]
## ab12 : -beta[6] , ab22 : -beta[7] , ab32 : -beta[8] ,
## ab41 : -(beta[6]+beta[7]+beta[8]) , ab42 : beta[6]+beta[7]+beta[8]
ifelse( beta[9] < 0 , l<- -Inf ,
### mu + a + b + (ab) + sigmaE
l <- sum(log(dnorm(x1, mean=beta[1] +beta[2] +beta[5] +beta[6] , sd=beta[9])) ) + # a1 , b1
sum(log(dnorm(x2, mean=beta[1] +beta[3] +beta[5] +beta[7] , sd=beta[9])) ) + # a2 , b1
sum(log(dnorm(x3, mean=beta[1] +beta[4] +beta[5] +beta[8] , sd=beta[9])) ) + # a3 , b1
sum(log(dnorm(x4, mean=beta[1] +(-(beta[2]+beta[3]+beta[4])) +beta[5] +(-(beta[6]+beta[7]+beta[8])), sd=beta[9])) ) + # a4 , b1
sum(log(dnorm(x5, mean=beta[1] +beta[2] +(-beta[5]) +(-beta[6]) , sd=beta[9])) ) + # a1 , b2
sum(log(dnorm(x6, mean=beta[1] +beta[3] +(-beta[5]) +(-beta[7]) , sd=beta[9])) ) + # a2 , b2
sum(log(dnorm(x7, mean=beta[1] +beta[4] +(-beta[5]) +(-beta[8]) , sd=beta[9])) ) + # a3 , b2
sum(log(dnorm(x8, mean=beta[1] +(-(beta[2]+beta[3]+beta[4])) +(-beta[5]) +(beta[6]+beta[7]+beta[8]) , sd=beta[9])) )) # a4 , b2
return(l)
}
#
chains <- 1:5
seeds <- seq(1,5,1)
init<-c(1,1,1,1,1,1,1,1,30)
burnin<-2000
mcmc_n<-200000
thin<- verbose <-10
#sink("./likelihood.txt")
E2Ind<- lapply(chains ,
function(chain) {
MCMCmetrop1R(fun = fun ,
theta.init = init ,
burnin = burnin, mcmc = mcmc_n ,thin=thin,
# verbose= verbose,
seed = seeds[chain],
x1=y3[1:10], x2=y3[11:20],x3=y3[21:30],x4=y3[31:40],
x5=y3[41:50],x6=y3[51:60],x7=y3[61:70],x8=y3[71:80] )})
# sink()
# system("gawk '/function value/' ./likelihood.txt | gawk 'gsub(\"function value =\",\"\")' > ./lik.txt")
# l<-scan("./lik.txt")
# ll<-matrix(l,ncol=length(chains),byrow = FALSE)
# head(ll) ; tail(ll)
# lll<-ll[-c(1 : burnin/thin),]
# ( waic1 <- waic(matrix(lll,ncol=1)) )
#
E2Ind.mcmc <- mcmc.list(E2Ind)
#
#summary(E2Ind.mcmc)
#
gelman.diag(E2Ind.mcmc)
#
#effectiveSize(E2Ind.mcmc)
1
2
3
4
5
6
7
8
9
10
11
12
13
df <- data.frame(mu=unlist(E2Ind.mcmc[,1]),muA1=unlist(E2Ind.mcmc[,2]) ,muA2=unlist(E2Ind.mcmc[,3]) ,muA3=unlist(E2Ind.mcmc[,4]) ,
muA4= -(unlist(E2Ind.mcmc[,2])+unlist(E2Ind.mcmc[,3])+unlist(E2Ind.mcmc[,4])) ,
muB1= (unlist(E2Ind.mcmc[,5])) , muB2= -unlist(E2Ind.mcmc[,5]) ,
muAB11 = unlist(E2Ind.mcmc[,6]) ,muAB12 = -unlist(E2Ind.mcmc[,6]),
muAB21 = unlist(E2Ind.mcmc[,7]) , muAB22 = -unlist(E2Ind.mcmc[,7]),
muAB31 = unlist(E2Ind.mcmc[,8]) , muAB32 = -unlist(E2Ind.mcmc[,8]) ,
muAB41 = -(unlist(E2Ind.mcmc[,6]) + unlist(E2Ind.mcmc[,7]) + unlist(E2Ind.mcmc[,8])) ,
muAB42 = unlist(E2Ind.mcmc[,6]) + unlist(E2Ind.mcmc[,7]) + unlist(E2Ind.mcmc[,8]) ,
sigmaE = unlist(E2Ind.mcmc[,9]) )
#
ryou<-rbind(EAP=apply(df, 2, mean),post.sd=apply(df, 2, sd),
apply(df, 2, quantile, probs=c(0.025,0.05, 0.5,0.95, 0.975)))
kable(t(round(ryou,2)))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu 29.88 0.29 29.32 29.41 29.88 30.35 30.44
muA1 0.08 0.50 -0.89 -0.73 0.08 0.90 1.05
muA2 -0.24 0.50 -1.23 -1.07 -0.24 0.57 0.73
muA3 -0.14 0.50 -1.11 -0.95 -0.14 0.67 0.84
muA4 0.30 0.49 -0.67 -0.51 0.30 1.11 1.27
muB1 0.79 0.29 0.23 0.33 0.79 1.26 1.36
muB2 -0.79 0.29 -1.36 -1.26 -0.79 -0.33 -0.23
muAB11 -0.27 0.49 -1.25 -1.08 -0.27 0.54 0.70
muAB12 0.27 0.49 -0.70 -0.54 0.27 1.08 1.25
muAB21 -0.01 0.49 -0.98 -0.83 -0.02 0.80 0.96
muAB22 0.01 0.49 -0.96 -0.80 0.02 0.83 0.98
muAB31 0.19 0.50 -0.79 -0.63 0.19 1.01 1.17
muAB32 -0.19 0.50 -1.17 -1.01 -0.19 0.63 0.79
muAB41 0.09 0.50 -0.88 -0.73 0.09 0.91 1.07
muAB42 -0.09 0.50 -1.07 -0.91 -0.09 0.73 0.88
sigmaE 2.54 0.22 2.16 2.21 2.53 2.93 3.02

水準とセルの効果の評価

P.147 (9.12)式

水準・交互作用の効果が0より大きい(小さい)確率

1
2
3
4
5
6
7
8
9
10
11
12
c0<-matrix(rep(NA,28),ncol=14)
colnames(c0)<-c("muA1","muA2","muA3","muA4","muB1","muB2","muAB11","muAB12","muAB21","muAB22","muAB31","muAB32","muAB41","muAB42")
rownames(c0)<-c("0 < ","=< 0")
#
c<-0
for (i in 2:15){
t<-sum((df[,i] > c)=="TRUE")
f<-sum((df[,i] < c)=="TRUE")
c0[1,i-1] <- round(t/ nrow(df),3)
c0[2,i-1] <- round(f/ nrow(df),3)
}
kable(c0)
muA1 muA2 muA3 muA4 muB1 muB2 muAB11 muAB12 muAB21 muAB22 muAB31 muAB32 muAB41 muAB42
0 < 0.566 0.31 0.39 0.728 0.997 0.003 0.29 0.71 0.487 0.513 0.654 0.346 0.573 0.427
=< 0 0.434 0.69 0.61 0.272 0.003 0.997 0.71 0.29 0.513 0.487 0.346 0.654 0.427 0.573

要因の効果の評価

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#### 測定値の標準偏差
#
sigmaA<-sqrt((df$muA1^2+ df$muA2^2 + df$muA3^2 + df$muA4^2)/4)
#sigmaB<-sqrt((df$muB1^2+ df$muB2^2)/2)
sigmaB<-sqrt(df$muB1^2)
sigmaAB<-sqrt((df$muAB11^2+ df$muAB12^2 + df$muAB21^2 + df$muAB22^2 + df$muAB31^2+ df$muAB32^2 + df$muAB41^2 + df$muAB42^2)/8)
#
#### 説明率
#
etaA2<-sigmaA^2/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
etaB2<-sigmaB^2/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
etaAB2<-sigmaAB^2/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
etat2<-(sigmaA^2+sigmaB^2+sigmaAB^2)/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
#
#### 効果量
#
deltaA<-sigmaA/df$sigmaE
deltaB<-sigmaB/df$sigmaE
deltaAB<-sigmaAB/df$sigmaE
#
hyouka<-cbind(sigmaA,sigmaB,sigmaAB,sigmaE=df$sigmaE,etaA2,etaB2,etaAB2,etat2,deltaA,deltaB,deltaAB)
#
ryou<-rbind(EAP=apply(hyouka, 2, mean),post.sd=apply(hyouka, 2, sd),
apply(hyouka, 2, quantile, probs=c(0.025,0.05, 0.5,0.95, 0.975)))
kable(t(round(ryou,3)))
EAP post.sd 2.5% 5% 50% 95% 97.5%
sigmaA 0.496 0.211 0.142 0.183 0.477 0.874 0.960
sigmaB 0.795 0.285 0.235 0.325 0.794 1.265 1.359
sigmaAB 0.482 0.207 0.140 0.178 0.462 0.853 0.939
sigmaE 2.544 0.218 2.162 2.215 2.528 2.926 3.017
etaA2 0.037 0.028 0.003 0.005 0.030 0.092 0.108
etaB2 0.090 0.054 0.007 0.014 0.084 0.190 0.212
etaAB2 0.035 0.027 0.003 0.004 0.028 0.087 0.103
etat2 0.162 0.061 0.056 0.069 0.158 0.269 0.291
deltaA 0.195 0.082 0.057 0.073 0.189 0.340 0.372
deltaB 0.315 0.115 0.091 0.126 0.315 0.505 0.540
deltaAB 0.190 0.080 0.056 0.071 0.183 0.331 0.362