心理統計法('17) その3

放送大学「心理統計法(‘17)」第5章 第6章 その2

授業ではrstanパッケージを使ってますが、MCMCpackパッケージを使ってみます。初学者ですので間違いが多々あると思います。

(Rスクリプトはここ)
早稲田大学文学部文学研究科 豊田研究室

(参考)
ベイジアンMCMCによる統計モデル
Using the ggmcmc package
Bayesian Inference With Stan ~番外編~
Exercise 2: Bayesian A/B testing using MCMC Metropolis-Hastings
政治学方法論 I

(前回の記事)
心理統計法(‘17) その2 放送大学「心理統計法(‘17)」第5章 第6章 その1

標準偏差が異なるモデル(DEF)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
library(MCMCpack)
library(knitr)
library(loo)
#library(ggmcmc)
#表5.1の聴音条件のデータ入力
x1<-c(32.30,34.24,28.10,33.40,37.71,31.62,31.37,35.85,32.33,34.04,
34.96,31.43,35.28,30.19,35.09,33.38,31.49,28.44,32.12,31.81)
#表1.1の対照条件のデータ入力
x2<-c(31.43,31.09,33.38,30.49,29.62,35.40,32.58,28.96,29.43,28.52,
25.39,32.68,30.51,30.15,32.33,30.43,32.50,32.07,32.35,31.57)
#
######### burnin = 2000 , mcmc=200000 , thin=10
#
########## 標準偏差が異なるモデル(DEF) ##########
#
ttestfun <- function(beta, x , y) {
# beta[1] : mu1 , beta[2] : mu2 , beta[3] : sigma1 , beta[4] : sigma2
ifelse( beta[3] < 0 | beta[4] < 0 , l<- -Inf ,
l <- sum(log(dnorm(x, mean=beta[1], sd=beta[3])) ) +
sum(log(dnorm(y, mean=beta[2], sd=beta[4]))) )
return(l)
}
#
chains <- 1:5
seeds <- seq(1,5,1)
init<-c(0,0,30,30)
#init <- c(mean(x1), mean(x2), sd(x1), sd(x2))
burnin<-2000
mcmc_n<-200000
thin<- verbose <-10
#sink("./likelihood.txt")
DEF <- lapply(chains ,
function(chain) {
MCMCmetrop1R(fun = ttestfun ,
theta.init = init ,
burnin = burnin, mcmc = mcmc_n ,thin=thin,
# verbose= verbose,
seed = seeds[chain],
x=x1, y=x2 )})
# sink()
# system("gawk '/function value/' ./likelihood.txt | gawk 'gsub(\"function value =\",\"\")' > ./def_lik.txt")
# l<-scan("./def_lik.txt")
# ll<-matrix(l,ncol=length(chains),byrow = FALSE)
# head(ll) ; tail(ll)
# lll<-ll[-c(1 : burnin/thin),]
# ( waic2<-waic(matrix(lll,ncol=1)) )
DEF.mcmc <- mcmc.list(DEF)
summary(DEF.mcmc)

Iterations = 2001:201991
Thinning interval = 10
Number of chains = 5
Sample size per chain = 20000

  1. Empirical mean and standard deviation for each variable,
    plus standard error of the mean:

    Mean SD Naive SE Time-series SE
    [1,] 32.753 0.5897 0.001865 0.002524
    [2,] 31.042 0.5163 0.001633 0.002219
    [3,] 2.590 0.4612 0.001458 0.002287
    [4,] 2.276 0.4054 0.001282 0.002013

  2. Quantiles for each variable:

    2.5% 25% 50% 75% 97.5%
    [1,] 31.584 32.370 32.754 33.137 33.918
    [2,] 30.024 30.707 31.040 31.376 32.067
    [3,] 1.874 2.265 2.529 2.846 3.661
    [4,] 1.643 1.989 2.221 2.503 3.221

1
gelman.diag(DEF.mcmc)

Potential scale reduction factors:

Point est. Upper C.I.
[1,] 1 1
[2,] 1 1
[3,] 1 1
[4,] 1 1

Multivariate psrf

1

1
effectiveSize(DEF.mcmc)

var1 var2 var3 var4
54612.12 54288.77 40756.98 40936.99

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
df<-data.frame(mu1=unlist(DEF.mcmc[,1]),mu2=unlist(DEF.mcmc[,2]),
sigma1=unlist(DEF.mcmc[,3]) ,sigma2=unlist(DEF.mcmc[,4]) )
#
ryou<-data.frame(c(round(mean(df[,1]),2) , round(sd(df[,1]),2) ,
round(quantile(df[,1],c(0.025,0.05,0.5,0.95,0.975)),2)))
#
for (i in 2:ncol(df) ){
ryou<-cbind(ryou,data.frame(c(round(mean(df[,i]),2) , round(sd(df[,i]),2) ,
round(quantile(df[,i],c(0.025,0.05,0.5,0.95,0.975)),2))))
}
names(ryou)<-c("mu1","mu2","sigma1","sigma2")
rownames(ryou)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
mu1_mu2 <- df$mu1 - df$mu2
ryou<-cbind(ryou,data.frame(c(round(mean(mu1_mu2),2) , round(sd(mu1_mu2),2) ,
round(quantile(mu1_mu2,c(0.025,0.05,0.5,0.95,0.975)),2))))
names(ryou)<-c("x1","x2","sigma1","sigma2","mu1_mu2")
#kable(t(ryou))
1
2
meanDiff <- df$mu1 - df$mu2
( prob4 <- mean(meanDiff > 0) )

[1] 0.98335

1
2
meanDiff <- df$mu2 - df$mu1
( prob5 <- mean(meanDiff > 0) )

[1] 0.01665

1
2
meanDiff <- df$mu1 - df$mu2
( prob6 <- mean(meanDiff > 1) )

[1] 0.82206

sdの小さい mu2 を先に描く
1
2
3
4
5
6
#png("mcmchist6_3.png")
hist(df$mu2,breaks=100,col=rgb(0,1,0,0.3),main="母平均の事後分布( DEF )mu1:red , mu2:green",xlim=c(min(df$mu1,df$mu2),max(df$mu1,df$mu2)) )
hist(df$mu1,breaks=100,col=rgb(1,0,0,0.3),add=T)
segments(mean(df$mu1),0,mean(df$mu1),par("usr")[4],lwd=2 ,col="blue")
segments(mean(df$mu2),0,mean(df$mu2),par("usr")[4],lwd=2 ,col="blue")
#dev.off()

1
2
3
4
5
#png("mcmchist6_4.png")
hist(mu1_mu2,breaks=100,col="lightgreen",main="母平均の差の事後分布( DEF )")
#abline(v=mean(mu1_mu2),lwd=2.5 , col="red")
segments(mean(mu1_mu2),0,mean(mu1_mu2),par("usr")[4],lwd=2.5 ,col="red")
#dev.off()

1
2
3
4
5
6
7
8
9
10
x1aste<-rnorm(n = nrow(df), mean =df$mu1, sd = df$sigma1)
x2aste<-rnorm(n = nrow(df), mean =df$mu2, sd = df$sigma2)
#
ryou<-cbind(ryou,data.frame(c(round(mean(x1aste),2) , round(sd(x1aste),2) ,
round(quantile(x1aste,c(0.025,0.05,0.5,0.95,0.975)),2))))
ryou<-cbind(ryou,data.frame(c(round(mean(x2aste),2) , round(sd(x2aste),2) ,
round(quantile(x2aste,c(0.025,0.05,0.5,0.95,0.975)),2))))
#
names(ryou)<-c("mu1","mu2","sigma1","sigma2","mu1_mu2","x1aste","x2aste")
kable(t(ryou))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu1 32.75 0.59 31.58 31.79 32.75 33.72 33.92
mu2 31.04 0.52 30.02 30.20 31.04 31.89 32.07
sigma1 2.59 0.46 1.87 1.96 2.53 3.44 3.66
sigma2 2.28 0.41 1.64 1.72 2.22 3.02 3.22
mu1_mu2 1.71 0.78 0.16 0.43 1.71 2.99 3.25
x1aste 32.75 2.69 27.42 28.35 32.76 37.16 38.08
x2aste 31.06 2.37 26.38 27.20 31.05 34.91 35.76

効果量

1
2
3
delta1<- (df$mu1-df$mu2)/df$sigma1
c(EAP=round(mean(delta1),3) , post.sd=round(sd(delta1),3) ,
round(quantile(delta1,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.681 0.327 0.059 0.160 0.674 1.231 1.345

1
2
3
delta2<- (df$mu1-df$mu2)/df$sigma2
c(EAP=round(mean(delta2),3) , post.sd=round(sd(delta2),3) ,
round(quantile(delta2,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.774 0.374 0.067 0.182 0.764 1.407 1.543

1
2
delta1_02<-table(delta1 > 0.2)
delta1_02[2]/(delta1_02[1]+delta1_02[2])

TRUE
0.9341

1
2
delta2_02<-table(delta2 > 0.2)
delta2_02[2]/(delta2_02[1]+delta2_02[2])

TRUE
0.94462

非重複度

1
2
3
U31<-pnorm(df$mu1, mean =df$mu2, sd = df$sigma2)
c(EAP=round(mean(U31),3) , post.sd=round(sd(U31),3) ,
round(quantile(U31,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.766 0.107 0.527 0.572 0.778 0.920 0.939

1
2
3
U32<-1 - pnorm(df$mu2, mean =df$mu1, sd = df$sigma1)
c(EAP=round(mean(U32),3) , post.sd=round(sd(U32),3) ,
round(quantile(U32,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.741 0.100 0.523 0.563 0.750 0.891 0.911

1
2
U31_075<-table(U31 > 0.75)
U31_075[2]/(U31_075[1]+U31_075[2])

TRUE
0.59707

1
2
U32_075<-table(U32 > 0.75)
U32_075[2]/(U32_075[1]+U32_075[2])

TRUE
0.49877

優越率

直接比較
1
2
yuetsu<-table(x1aste-x2aste > 0)
yuetsu[2]/(yuetsu[1]+yuetsu[2])

TRUE
0.68667

事後分布から
1
2
3
yuetsu<-pnorm((df$mu1-df$mu2)/sqrt(df$sigma1^2+df$sigma2^2) , mean =0, sd = 1)
c(EAP=round(mean(yuetsu),3) , post.sd=round(sd(yuetsu),3) ,
round(quantile(yuetsu,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.687 0.081 0.517 0.548 0.692 0.811 0.831

1
2
yuetsu075<-table(yuetsu>0.75)
yuetsu075[2]/(yuetsu075[1]+yuetsu075[2])

TRUE
0.22758

閾上率

直接比較
1
2
3
c<-1
ikizyo<-table(x1aste-x2aste > c)
ikizyo[2]/(ikizyo[1]+ikizyo[2])

TRUE
0.57793

事後分布から
1
2
3
4
c<-1
ikizyo<-pnorm( (df$mu1-df$mu2-c)/sqrt(df$sigma1^2+df$sigma2^2) , mean =0, sd = 1)
c(EAP=round(mean(ikizyo),3) , post.sd=round(sd(ikizyo),3) ,
round(quantile(ikizyo,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.580 0.086 0.407 0.436 0.583 0.718 0.742

1
2
ikizyo075<-table(ikizyo>0.75)
ikizyo075[2]/(ikizyo075[1]+ikizyo075[2])

TRUE
0.01953

WAIC

184.8

EQU : 182.7 < DEF : 184.8

標準偏差が共通するモデル(EQU)を採用。