心理統計法('17) その7

放送大学「心理統計法(‘17)」第10章 その 2

授業ではrstanパッケージを使ってますが、MCMCpackパッケージを使ってみます。初学者ですので間違いが多々あると思います。

(Rスクリプトはここ)
早稲田大学文学部文学研究科 豊田研究室

(参考)
ベイジアンMCMCによる統計モデル
Using the ggmcmc package
Bayesian Inference With Stan ~番外編~
Exercise 2: Bayesian A/B testing using MCMC Metropolis-Hastings
政治学方法論 I

1
2
3
library(MCMCpack)
library(knitr)
#library(loo)

推測例2(3水準以上の主効果の分析)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#表10.9「知覚時間」データ入力
y2<-c(
33.00,31.92,27.85,30.99,31.61,30.50,31.06,29.48,33.08,32.55,
34.89,32.33,31.98,34.22,35.48,27.54,35.53,28.57,29.57,33.94,
36.30,30.51,33.70,28.77,32.60,35.11,29.35,31.99,33.22,33.04,
27.34,33.40,24.63,27.24,28.27,31.85,25.05,29.04,31.29,30.20,
29.38,29.80,31.28,31.54,31.83,30.50,27.22,30.06,29.61,35.33,
35.69,35.02,31.94,33.98,30.38,26.21,34.91,33.36,31.37,33.49,
30.00,31.23,30.09,34.73,35.43,33.44,34.65,33.35,30.41,33.41,
26.35,28.61,29.52,27.63,27.05,33.32,29.85,34.44,28.52,32.40)
A <- rep(1:4,each=10,times=2) #要因Aの水準の指定
B <- rep(1:2,each=40) #要因Bの水準の指定
#
dat<-data.frame(A,B,y2)
Data<-split(dat,list(dat$A,dat$B))
mat <- matrix(apply(sapply(Data, "[[", 3),2,mean),ncol=2,byrow=F)
#
#png("sinri10_02.png")
par(family="TakaoPMincho")
matplot(1:4, mat, type="l", ylab="時間(秒)", xlab="",main="S2氏のデータの平均値プロット", lwd=1.5,col=1:2, xaxt = "n",las=1,ylim=c(28,33))
matplot(1:4, mat, type="p", ylab="", xlab="",main="", pch=19,cex=2, col=1:2,xaxt = "n",ylim=c(28,33),add=T)
axis(1,1:4,labels=c("対照","聴音","音読","運動"))
text(x=2.5,y=32.1,labels="平常時",col=1)
text(x=2.5,y=32.9,labels="起床直後",col=2)
#dev.off()

1
2
3
4
5
mean_sd <- matrix(apply(sapply(Data, "[[", 3),2,mean),ncol=8)
mean_sd <- rbind(mean_sd,matrix(apply(sapply(Data, "[[", 3),2,sd)*sqrt(9/10),ncol=8) )
colnames(mean_sd)<-c("平常時 対照","平常時 聴音","平常時 音読","平常時 運動","起床直後 対照","起床直後 聴音","起床直後 音読","起床直後 運動")
rownames(mean_sd)<-c("平均","標準偏差")
kable(round(mean_sd,2))
平常時 対照 平常時 聴音 平常時 音読 平常時 運動 起床直後 対照 起床直後 聴音 起床直後 音読 起床直後 運動
平均 31.20 32.41 32.46 28.83 30.66 32.63 32.67 29.77
標準偏差 1.55 2.79 2.27 2.74 2.00 2.68 1.96 2.61
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fun <- function(beta, x1 , x2 , x3 , x4 ,x5 ,x6,x7,x8) {
# mu : beta[1]
# a1 : beta[2] , a2 : beta[3] , a3 : beta[4]
# b1 : beta[5]
# ab11 : beta[6] , ab21 : beta[7] , ab31 : beta[8]
# sigmaE : beta[9]
## a4 : -(beta[2]+beta[3]+beta[4])
## b2 : -beta[5]
## ab12 : -beta[6] , ab22 : -beta[7] , ab32 : -beta[8] ,
## ab41 : -(beta[6]+beta[7]+beta[8]) , ab42 : beta[6]+beta[7]+beta[8]
ifelse( beta[9] < 0 , l<- -Inf ,
### mu + a + b + (ab) + sigmaE
l <- sum(log(dnorm(x1, mean=beta[1] +beta[2] +beta[5] +beta[6] , sd=beta[9])) ) + # a1 , b1
sum(log(dnorm(x2, mean=beta[1] +beta[3] +beta[5] +beta[7] , sd=beta[9])) ) + # a2 , b1
sum(log(dnorm(x3, mean=beta[1] +beta[4] +beta[5] +beta[8] , sd=beta[9])) ) + # a3 , b1
sum(log(dnorm(x4, mean=beta[1] +(-(beta[2]+beta[3]+beta[4])) +beta[5] +(-(beta[6]+beta[7]+beta[8])), sd=beta[9])) ) + # a4 , b1
sum(log(dnorm(x5, mean=beta[1] +beta[2] +(-beta[5]) +(-beta[6]) , sd=beta[9])) ) + # a1 , b2
sum(log(dnorm(x6, mean=beta[1] +beta[3] +(-beta[5]) +(-beta[7]) , sd=beta[9])) ) + # a2 , b2
sum(log(dnorm(x7, mean=beta[1] +beta[4] +(-beta[5]) +(-beta[8]) , sd=beta[9])) ) + # a3 , b2
sum(log(dnorm(x8, mean=beta[1] +(-(beta[2]+beta[3]+beta[4])) +(-beta[5]) +(beta[6]+beta[7]+beta[8]) , sd=beta[9])) )) # a4 , b2
return(l)
}
#
chains <- 1:5
seeds <- seq(1,5,1)
init<-c(1,1,1,1,1,1,1,1,30)
burnin<-2000
mcmc_n<-200000
thin<- verbose <-10
#sink("./likelihood.txt")
E2Ind<- lapply(chains ,
function(chain) {
MCMCmetrop1R(fun = fun ,
theta.init = init ,
burnin = burnin, mcmc = mcmc_n ,thin=thin,
# verbose= verbose,
seed = seeds[chain],
x1=y2[1:10], x2=y2[11:20],x3=y2[21:30],x4=y2[31:40],
x5=y2[41:50],x6=y2[51:60],x7=y2[61:70],x8=y2[71:80] )})
# sink()
# system("gawk '/function value/' ./likelihood.txt | gawk 'gsub(\"function value =\",\"\")' > ./lik.txt")
# l<-scan("./lik.txt")
# ll<-matrix(l,ncol=length(chains),byrow = FALSE)
# head(ll) ; tail(ll)
# lll<-ll[-c(1 : burnin/thin),]
# ( waic1 <- waic(matrix(lll,ncol=1)) )
#
#
E2Ind.mcmc <- mcmc.list(E2Ind)
#
#summary(E2Ind.mcmc)
#
#gelman.diag(E2Ind.mcmc)
#
#effectiveSize(E2Ind.mcmc)
1
2
3
4
5
6
7
8
9
10
11
12
13
df <- data.frame(mu=unlist(E2Ind.mcmc[,1]),muA1=unlist(E2Ind.mcmc[,2]) ,muA2=unlist(E2Ind.mcmc[,3]) ,muA3=unlist(E2Ind.mcmc[,4]) ,
muA4= -(unlist(E2Ind.mcmc[,2])+unlist(E2Ind.mcmc[,3])+unlist(E2Ind.mcmc[,4])) ,
muB1= (unlist(E2Ind.mcmc[,5])) , muB2= -unlist(E2Ind.mcmc[,5]) ,
muAB11 = unlist(E2Ind.mcmc[,6]) ,muAB12 = -unlist(E2Ind.mcmc[,6]),
muAB21 = unlist(E2Ind.mcmc[,7]) , muAB22 = -unlist(E2Ind.mcmc[,7]),
muAB31 = unlist(E2Ind.mcmc[,8]) , muAB32 = -unlist(E2Ind.mcmc[,8]) ,
muAB41 = -(unlist(E2Ind.mcmc[,6]) + unlist(E2Ind.mcmc[,7]) + unlist(E2Ind.mcmc[,8])) ,
muAB42 = unlist(E2Ind.mcmc[,6]) + unlist(E2Ind.mcmc[,7]) + unlist(E2Ind.mcmc[,8]) ,
sigmaE = unlist(E2Ind.mcmc[,9]) )
#
ryou<-rbind(EAP=apply(df, 2, mean),post.sd=apply(df, 2, sd),
apply(df, 2, quantile, probs=c(0.025,0.05, 0.5,0.95, 0.975)))
kable(t(round(ryou,2)))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu 31.33 0.28 30.77 30.86 31.33 31.80 31.89
muA1 -0.40 0.49 -1.37 -1.21 -0.40 0.41 0.57
muA2 1.19 0.49 0.22 0.38 1.19 2.00 2.16
muA3 1.24 0.49 0.29 0.44 1.24 2.05 2.20
muA4 -2.03 0.49 -3.00 -2.84 -2.03 -1.22 -1.05
muB1 -0.11 0.28 -0.66 -0.57 -0.11 0.36 0.45
muB2 0.11 0.28 -0.45 -0.36 0.11 0.57 0.66
muAB11 0.38 0.49 -0.58 -0.42 0.38 1.19 1.35
muAB12 -0.38 0.49 -1.35 -1.19 -0.38 0.42 0.58
muAB21 -0.02 0.49 -0.98 -0.82 -0.02 0.79 0.95
muAB22 0.02 0.49 -0.95 -0.79 0.02 0.82 0.98
muAB31 0.00 0.49 -0.97 -0.82 0.00 0.81 0.96
muAB32 0.00 0.49 -0.96 -0.81 0.00 0.82 0.97
muAB41 -0.36 0.49 -1.33 -1.17 -0.36 0.44 0.60
muAB42 0.36 0.49 -0.60 -0.44 0.36 1.17 1.33
sigmaE 2.53 0.22 2.15 2.21 2.52 2.91 3.00

水準とセルの効果の評価

P.147 (9.12)式

水準・交互作用の効果が0より大きい(小さい)確率

1
2
3
4
5
6
7
8
9
10
11
12
c0<-matrix(rep(NA,28),ncol=14)
colnames(c0)<-c("muA1","muA2","muA3","muA4","muB1","muB2","muAB11","muAB12","muAB21","muAB22","muAB31","muAB32","muAB41","muAB42")
rownames(c0)<-c("0 < ","=< 0")
#
c<-0
for (i in 2:15){
t<-sum((df[,i] > c)=="TRUE")
f<-sum((df[,i] < c)=="TRUE")
c0[1,i-1] <- round(t/ nrow(df),3)
c0[2,i-1] <- round(f/ nrow(df),3)
}
kable(c0)
muA1 muA2 muA3 muA4 muB1 muB2 muAB11 muAB12 muAB21 muAB22 muAB31 muAB32 muAB41 muAB42
0 < 0.209 0.992 0.994 0 0.354 0.646 0.782 0.218 0.487 0.513 0.501 0.499 0.227 0.773
=< 0 0.791 0.008 0.006 1 0.646 0.354 0.218 0.782 0.513 0.487 0.499 0.501 0.773 0.227

要因の効果の評価

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#### 測定値の標準偏差
#
sigmaA<-sqrt((df$muA1^2+ df$muA2^2 + df$muA3^2 + df$muA4^2)/4)
#sigmaB<-sqrt((df$muB1^2+ df$muB2^2)/2)
sigmaB<-sqrt(df$muB1^2)
sigmaAB<-sqrt((df$muAB11^2+ df$muAB12^2 + df$muAB21^2 + df$muAB22^2 + df$muAB31^2+ df$muAB32^2 + df$muAB41^2 + df$muAB42^2)/8)
#
#### 説明率
#
etaA2<-sigmaA^2/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
etaB2<-sigmaB^2/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
etaAB2<-sigmaAB^2/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
etat2<-(sigmaA^2+sigmaB^2+sigmaAB^2)/(sigmaA^2+sigmaB^2+sigmaAB^2+df$sigmaE^2)
#
#### 効果量
#
deltaA<-sigmaA/df$sigmaE
deltaB<-sigmaB/df$sigmaE
deltaAB<-sigmaAB/df$sigmaE
#
hyouka<-cbind(sigmaA,sigmaB,sigmaAB,sigmaE=df$sigmaE,etaA2,etaB2,etaAB2,etat2,deltaA,deltaB,deltaAB)
#
ryou<-rbind(EAP=apply(hyouka, 2, mean),post.sd=apply(hyouka, 2, sd),
apply(hyouka, 2, quantile, probs=c(0.025,0.05, 0.5,0.95, 0.975)))
kable(t(round(ryou,3)))
EAP post.sd 2.5% 5% 50% 95% 97.5%
sigmaA 1.405 0.278 0.860 0.951 1.404 1.862 1.955
sigmaB 0.242 0.184 0.010 0.019 0.205 0.597 0.681
sigmaAB 0.514 0.216 0.150 0.192 0.495 0.898 0.983
sigmaE 2.534 0.216 2.154 2.208 2.519 2.911 3.001
etaA2 0.228 0.072 0.092 0.112 0.226 0.348 0.371
etaB2 0.010 0.014 0.000 0.000 0.005 0.039 0.050
etaAB2 0.034 0.026 0.003 0.004 0.028 0.086 0.101
etat2 0.272 0.072 0.134 0.154 0.272 0.390 0.412
deltaA 0.558 0.118 0.330 0.366 0.558 0.751 0.790
deltaB 0.096 0.072 0.004 0.008 0.081 0.234 0.267
deltaAB 0.203 0.084 0.059 0.076 0.197 0.352 0.383

水準間の比較

水準の効果ajはaj’より大きい

1
2
3
tsui<-matrix(rep(NA,16),ncol=4)
colnames(tsui)<-rownames(tsui)<-c("対照(a1)","聴音(a2)","音読(a3)","運動(a4)")
head(df[,2:5])

muA1 muA2 muA3 muA4
1 -0.4800268 0.6440452 1.3087941 -1.4728126
2 -0.4943629 0.4228612 0.9737331 -0.9022314
3 -0.5314313 1.1962563 0.9976088 -1.6624338
4 -0.5314313 1.1962563 0.9976088 -1.6624338
5 -0.1957930 1.2907271 0.8177521 -1.9126863
6 -0.2849278 1.0069562 0.9639928 -1.6860213

1
2
3
4
5
6
7
8
for (i in 2:5){
for (j in 2:5){
t<-sum((df[,i] -df[,j] > 0)=="TRUE")
tsui[i-1,j-1] <- round(t/ nrow(df),3)
}
}
#
kable(tsui)
対照(a1) 聴音(a2) 音読(a3) 運動(a4)
対照(a1) 0.000 0.024 0.020 0.977
聴音(a2) 0.976 0.000 0.472 1.000
音読(a3) 0.980 0.528 0.000 1.000
運動(a4) 0.023 0.000 0.000 0.000

連言命題が正しい確率

u_a2>a1 u_a3>a1 u_a1>a4

1
2
t<-sum((df$muA2-df$muA1 > 0)=="TRUE" & (df$muA3-df$muA1 > 0)=="TRUE" & (df$muA1-df$muA4 > 0)=="TRUE" )
t/nrow(df)

0.93709