心理統計法('17) その5

放送大学「心理統計法(‘17)」第9章

授業ではrstanパッケージを使ってますが、MCMCpackパッケージを使ってみます。初学者ですので間違いが多々あると思います。

(Rスクリプトはここ)
早稲田大学文学部文学研究科 豊田研究室

(参考)
ベイジアンMCMCによる統計モデル
Using the ggmcmc package
Bayesian Inference With Stan ~番外編~
Exercise 2: Bayesian A/B testing using MCMC Metropolis-Hastings
政治学方法論 I

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
library(MCMCpack)
library(knitr)
#library(loo)
#
#表9.1の「知覚時間」データ入力
#対照
x1<-c(31.43,31.09,33.38,30.49,29.62,35.40,32.58,28.96,29.43,28.52,
25.39,32.68,30.51,30.15,32.33,30.43,32.50,32.07,32.35,31.57)
#聴音
x2<-c(32.30,34.24,28.10,33.40,37.71,31.62,31.37,35.85,32.33,34.04,
34.96,31.43,35.28,30.19,35.09,33.38,31.49,28.44,32.12,31.81)
#音読
x3<-c(31.62,37.04,33.76,30.01,34.18,33.08,28.77,33.90,28.06,37.54,
33.89,32.23,35.95,36.68,33.57,30.87,32.20,29.98,33.08,35.12)
#運動
x4<-c(27.79,30.33,29.02,27.02,35.28,26.89,30.33,27.98,27.99,28.04,
25.02,28.80,30.74,26.79,28.65,32.43,29.13,29.50,28.14,32.19)
#
E1Indfun <- function(beta, x1 , x2 , x3 , x4) {
# beta[1] : mu1 , beta[2] : mu2 ,beta[3] : mu3 , beta[4] : mu4 , beta[5] : sigmaE
ifelse( beta[5] < 0 , l<- -Inf ,
l <- sum(log(dnorm(x1, mean=beta[1], sd=beta[5])) ) +
sum(log(dnorm(x2, mean=beta[2], sd=beta[5])) ) +
sum(log(dnorm(x3, mean=beta[3], sd=beta[5])) ) +
sum(log(dnorm(x4, mean=beta[4], sd=beta[5]))) )
return(l)
}
#
chains <- 1:5
seeds <- seq(1,5,1)
init<-c(0,0,0,0,30)
#init <- c(mean(x1), mean(x2), max(sd(x1),sd(x2))*5 )
burnin<-2000
mcmc_n<-200000
thin<- verbose <-10
#sink("./likelihood.txt")
E1Ind <- lapply(chains ,
function(chain) {
MCMCmetrop1R(fun = E1Indfun ,
theta.init = init ,
burnin = burnin, mcmc = mcmc_n ,thin=thin,
# verbose= verbose,
seed = seeds[chain],
x1=x1, x2=x2 ,x3=x3 , x4=x4)})
# sink()
# system("gawk '/function value/' ./likelihood.txt | gawk 'gsub(\"function value =\",\"\")' > ./E1Ind_lik.txt")
# l<-scan("./E1Ind_lik.txt")
# ll<-matrix(l,ncol=length(chains),byrow = FALSE)
# head(ll) ; tail(ll)
# lll<-ll[-c(1 : burnin/thin),]
# ( waic1 <- waic(matrix(lll,ncol=1)) )
#
E1Ind.mcmc <- mcmc.list(E1Ind)
#
#plot(E1Ind.mcmc, trace = TRUE, density = TRUE)
#
summary(E1Ind.mcmc)
#
gelman.diag(E1Ind.mcmc)
#
effectiveSize(E1Ind.mcmc)
#
df<-data.frame(mu1=unlist(E1Ind.mcmc[,1]),mu2=unlist(E1Ind.mcmc[,2]),
mu3=unlist(E1Ind.mcmc[,3]),mu4=unlist(E1Ind.mcmc[,4]),sigmaE=unlist(E1Ind.mcmc[,5]) )
#
head(df)
#
ryou<-data.frame(c(round(mean(df[,1]),2) , round(sd(df[,1]),2) ,
round(quantile(df[,1],c(0.025,0.05,0.5,0.95,0.975)),2)))
for (i in 2:ncol(df) ){
ryou<-cbind(ryou,data.frame(c(round(mean(df[,i]),2) , round(sd(df[,i]),2) ,
round(quantile(df[,i],c(0.025,0.05,0.5,0.95,0.975)),2))))
}
rownames(ryou)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
names(ryou)<-c("mu1","mu2","mu3","mu4","sigmaE")
kable(t(ryou))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu1 31.04 0.55 29.97 30.15 31.04 31.94 32.11
mu2 32.76 0.55 31.68 31.86 32.76 33.65 33.83
mu3 33.07 0.54 32.00 32.18 33.07 33.97 34.14
mu4 29.10 0.54 28.04 28.21 29.10 30.00 30.17
sigmaE 2.43 0.20 2.08 2.13 2.42 2.78 2.86

水準の平均と水準の効果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
mu<- apply(df[,1:4],1,mean)
#summary(mu)
#
a1<- df$mu1 - mu
a2<- df$mu2 - mu
a3<- df$mu3 - mu
a4<- df$mu4 - mu
#
kouka<-cbind(mu,a1,a2,a3,a4)
k<-data.frame(c(round(mean(kouka[,1]),2) , round(sd(kouka[,1]),2) ,
round(quantile(kouka[,1],c(0.025,0.05,0.5,0.95,0.975)),2)))
for (i in 2:ncol(kouka) ){
k<-cbind(k,data.frame(c(round(mean(kouka[,i]),2) , round(sd(kouka[,i]),2) ,
round(quantile(kouka[,i],c(0.025,0.05,0.5,0.95,0.975)),2))))
}
rownames(k)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
names(k)<-c("mu","a1","a2","a3","a4")
kable(t(k))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu 31.49 0.27 30.96 31.04 31.49 31.95 32.03
a1 -0.45 0.47 -1.38 -1.23 -0.45 0.33 0.48
a2 1.26 0.47 0.33 0.48 1.26 2.04 2.19
a3 1.58 0.47 0.65 0.80 1.58 2.35 2.51
a4 -2.39 0.47 -3.32 -3.17 -2.39 -1.62 -1.47

水準と要因の考察

1
2
3
4
5
6
7
c<-0
for (i in 2:5){
temp<-table(kouka[,i] > c)
print(temp)
print(round(temp[2]/(temp[1]+temp[2]),3) )
print(round(temp[1]/(temp[1]+temp[2]),3) )
}
Uaj > 0 がTRUE

FALSE TRUE
83158 16842

TRUE
0.168
FALSE
0.832

FALSE TRUE
431 99569

TRUE
0.996
FALSE
0.004

FALSE TRUE
47 99953

TRUE
1
FALSE
0

FALSE
100000


NA
FALSE
NA

TRUEやFALSEが0の場合、table関数を使うと確率計算で不具合が起きる。
(度数を出しておけば、どちらかが1でもう片方が0ということなので大した問題ではないが、)
table関数を使わずに、sum((kouka[,i] > c)==”TRUE”) とすればよい。

1
2
3
4
5
6
7
8
9
10
11
12
13
c0<-matrix(rep(NA,8),ncol=4)
colnames(c0)<-c("対照","聴音","音読","運動")
rownames(c0)<-c("Uaj > 0","Uaj < 0")
#
c<-0
for (i in 2:5){
t<-sum((kouka[,i] > c)=="TRUE")
f<-sum((kouka[,i] < c)=="TRUE")
c0[1,i-1] <- round(t/ nrow(kouka),3)
c0[2,i-1] <- round(f/ nrow(kouka),3)
}
#
kable(c0)
対照 聴音 音読 運動
Uaj > 0 0.168 0.996 1 0
Uaj < 0 0.832 0.004 0 1

要因の効果の評価

説明率(分散説明率)

誤差の分散 sigmaE^2 : df$sigmaE^2
1
sigmaE <- df$sigmaE
要因の分散 sigmaA^2
1
sigmaA<-sqrt((kouka[,2]^2+kouka[,3]^2+kouka[,4]^2+kouka[,5]^2)/4)
説明率
1
eta2<-sigmaA^2/(sigmaA^2+sigmaE^2)
効果量
1
2
3
4
5
6
7
8
9
10
11
12
delta<-sigmaA/sigmaE
#
hyouka<-cbind(sigmaA,eta2,delta)
h<-data.frame(c(round(mean(hyouka[,1]),3) , round(sd(hyouka[,1]),3) ,
round(quantile(hyouka[,1],c(0.025,0.05,0.5,0.95,0.975)),3)))
for (i in 2:ncol(hyouka) ){
h<-cbind(h,data.frame(c(round(mean(hyouka[,i]),3) , round(sd(hyouka[,i]),3) ,
round(quantile(hyouka[,i],c(0.025,0.05,0.5,0.95,0.975)),3))))
}
rownames(h)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
names(h)<-c("sigmaA","eta2","delta")
kable(t(h))
EAP post.sd 2.5% 5% 50% 95% 97.5%
sigmaA 1.628 0.269 1.101 1.186 1.627 2.071 2.157
eta2 0.311 0.076 0.160 0.183 0.312 0.434 0.456
delta 0.674 0.122 0.436 0.474 0.674 0.875 0.915
1
2
3
4
5
6
7
8
#png("sinri09_01.png")
par(mfrow=c(2,1))
hist(eta2,breaks=100,col="lightblue",main="説明率")
segments(mean(eta2),0,mean(eta2),par("usr")[4],lwd=2.5 ,col="red")
hist(delta,breaks=100,col="lightgreen",main="効果量")
segments(mean(delta),0,mean(delta),par("usr")[4],lwd=2.5 ,col="red")
par(mfrow=c(1,1))
#dev.off()

水準間の考察

1
2
3
4
5
6
7
8
9
10
11
12
13
tsui<-matrix(rep(NA,16),ncol=4)
colnames(tsui)<-rownames(tsui)<-c("対照(mu1)","聴音(mu2)","音読(mu3)","運動(mu4)")
#
head(df[,1:4])
#
for (i in 1:4){
for (j in 1:4){
t<-sum((df[,i] -df[,j] > 0)=="TRUE")
tsui[i,j] <- round(t/ nrow(df),3)
}
}
#
kable(tsui)
対照(mu1) 聴音(mu2) 音読(mu3) 運動(mu4)
対照(mu1) 0.000 0.014 0.004 0.994
聴音(mu2) 0.986 0.000 0.340 1.000
音読(mu3) 0.996 0.660 0.000 1.000
運動(mu4) 0.006 0.000 0.000 0.000

連言命題が正しい確率

(注意)上の表から対応するところを取り出して単純に掛け算してはダメです。

& : 論理積 を計算します。TRUE,TRUE,TRUEの場合だけが、TRUE (命題が3つのとき)

Umu3>mu2 Umu2>mu1 Umu1>mu4
1
2
t<-sum((df[,3] -df[,2] > 0) & (df[,2] -df[,1] > 0) & (df[,1] -df[,4] > 0) =="TRUE")
round(t/ nrow(df),3)

0.643

Umu3>mu1 Umu2>mu1 Umu1>mu4
1
2
t<-sum((df[,3] -df[,1] > 0) & (df[,2] -df[,1] > 0) & (df[,1] -df[,4] > 0) =="TRUE")
round(t/ nrow(df),3)

0.976

Umu3>mu4 Umu2>mu4 Umu1>mu4
1
2
t<-sum((df[,3] -df[,4] > 0) & (df[,2] -df[,4] > 0) & (df[,1] -df[,4] > 0) =="TRUE")
round(t/ nrow(df),3)

0.994

特に興味のある2水準間の比較

(注意)当該のデータだけを使って再分析をしてはダメ。

聴音(mu2) VS 音読(mu3)

1
2
mu3_mu2<-df$mu3 - df$mu2
#summary(mu3_mu2)

効果量 delta

P.130 (8.7)式

1
2
delta32<-mu3_mu2/df$sigmaE
#summary(delta32)

非重複度

P.96 (6.7)式

1
2
U3<-pnorm(df$mu3, mean =df$mu2, sd = df$sigmaE)
#summary(U3)

優越率

P.102 (6.18)式

1
2
pid<-pnorm(delta32/sqrt(2) , mean =0, sd = 1)
#summary(yuetsu)

閾上率

P.105 (6.24)式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
c<-0.5
pi05<-pnorm( (df$mu3-df$mu2-c)/(sqrt(2)*df$sigmaE) , mean =0, sd = 1)
#summary(pi05)
#
hyouka32<-cbind(mu3_mu2,delta32,U3,pid,pi05)
h<-data.frame(c(round(mean(hyouka32[,1]),3) , round(sd(hyouka32[,1]),3) ,
round(quantile(hyouka32[,1],c(0.025,0.05,0.5,0.95,0.975)),3)))
for (i in 2:ncol(hyouka32) ){
h<-cbind(h,data.frame(c(round(mean(hyouka32[,i]),3) , round(sd(hyouka32[,i]),3) ,
round(quantile(hyouka32[,i],c(0.025,0.05,0.5,0.95,0.975)),3))))
}
rownames(h)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
names(h)<-c("mu3_mu2","delta","U3","pi_d","pi_0.5")
kable(t(h))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu3_mu2 0.316 0.770 -1.197 -0.955 0.317 1.584 1.821
delta 0.131 0.316 -0.488 -0.390 0.131 0.650 0.748
U3 0.550 0.119 0.313 0.348 0.552 0.742 0.773
pi_d 0.536 0.087 0.365 0.391 0.537 0.677 0.702
pi_0.5 0.479 0.087 0.312 0.337 0.479 0.623 0.649

音読(mu3) VS 運動(mu4)

1
2
mu3_mu4<-df$mu3 - df$mu4
#summary(mu3_mu4)

効果量 delta

P.130 (8.7)式

1
2
delta34<-mu3_mu4/df$sigmaE
#summary(delta34)

非重複度

P.96 (6.7)式

1
2
U3<-pnorm(df$mu3, mean =df$mu4, sd = df$sigmaE)
#summary(U3)

優越率

P.102 (6.18)式

1
2
pid<-pnorm(delta34/sqrt(2) , mean =0, sd = 1)
#summary(yuetsu)

閾上率

P.105 (6.24)式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
c<-2
pi2<-pnorm( (df$mu3-df$mu4-c)/(sqrt(2)*df$sigmaE) , mean =0, sd = 1)
#summary(pi2)
#
hyouka34<-cbind(mu3_mu4,delta34,U3,pid,pi2)
h<-data.frame(c(round(mean(hyouka34[,1]),3) , round(sd(hyouka34[,1]),3) ,
round(quantile(hyouka34[,1],c(0.025,0.05,0.5,0.95,0.975)),3)))
for (i in 2:ncol(hyouka34) ){
h<-cbind(h,data.frame(c(round(mean(hyouka34[,i]),3) , round(sd(hyouka34[,i]),3) ,
round(quantile(hyouka34[,i],c(0.025,0.05,0.5,0.95,0.975)),3))))
}
rownames(h)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
names(h)<-c("mu3_mu4","delta","U3","pi_d","pi_2.0")
kable(t(h))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu3_mu4 3.967 0.769 2.453 2.707 3.965 5.236 5.476
delta 1.643 0.343 0.973 1.082 1.642 2.210 2.320
U3 0.940 0.041 0.835 0.860 0.950 0.986 0.990
pi_d 0.871 0.050 0.754 0.778 0.877 0.941 0.950
pi_2.0 0.713 0.076 0.551 0.580 0.718 0.830 0.847