心理統計法('17) その4

放送大学「心理統計法(‘17)」第7章 第8章 その1 (改)

授業ではrstanパッケージを使ってますが、MCMCpackパッケージを使ってみます。初学者ですので間違いが多々あると思います。

x1aste , x2aste の求め方が間違っていました。rnorm ではなく、mvtnorm::rmvnormを使います。

(Rスクリプトはここ)
早稲田大学文学部文学研究科 豊田研究室

(参考)
ベイジアンMCMCによる統計モデル
Using the ggmcmc package
Bayesian Inference With Stan ~番外編~
Exercise 2: Bayesian A/B testing using MCMC Metropolis-Hastings
政治学方法論 I

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
library(MCMCpack)
library(loo)
library(knitr)
library(mvtnorm)
#
# dnorm() ― dmvnorm():確率密度を求める
# pnorm() ― pmvnorm():累積密度を求める
# qnorm() ― qmvnorm():累積密度から確率点を求める
# rnorm() ― rmvnorm():乱数を生成する
#
library(ggplot2)
library(ggExtra)
#表7.1の「パスタ」データ入力
x1<- c(110,232,176,207,122,202,191,124,193,250); #目測群
x2<- c(130,268,104,185,128,147,162, 68,142,175); #実測群

burnin = 2000 , mcmc=200000 , thin=10 (時間がかかるならmcmc=20000,thin=1とかする。但し、effectiveSizeの値は小さくなる。)

標準偏差が共通するモデル(EQU)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
ptfun <- function(beta, x ,y ) {
# beta[1] : mu1 , beta[2] : mu2 , beta[3] : sigma , beta[4] : rho
ifelse( beta[3] < 0 , l<- -Inf ,
# 確率密度(pdf): dxxx(q):q は確率点を表す
# xとyの共分散 mean((x-mean(x))*(y-mean(y)))
l <- sum(log(dmvnorm(matrix(c(x,y),ncol=2), mean=c(beta[1],beta[2]) ,
# sigma=matrix(c(beta[3]^2,mean((x-beta[1])*(y-beta[2])),mean((x-beta[1])*(y-beta[2])),beta[3]^2),ncol=2)))))
sigma=matrix(c(beta[3]^2,beta[3]^2*beta[4],beta[3]^2*beta[4],beta[3]^2),ncol=2)))))
return(l)
}
#
chains <- 1:5
seeds <- seq(1,5,1)
init<-c(0,0,30,0)
burnin<-2000
mcmc_n<-200000
thin<- verbose <-10
#WAICを(linuxで)求める部分はコメントアウトしています。
#sink("./likelihood.txt")
EQU <- lapply(chains ,
function(chain) {
MCMCmetrop1R(fun = ptfun ,
theta.init = init ,
burnin = burnin, mcmc = mcmc_n ,thin=thin,
# verbose= verbose,
seed = seeds[chain],
x=x1,y=x2 )})
# sink()
# system("gawk '/function value/' ./likelihood.txt | gawk 'gsub(\"function value =\",\"\")' > ./equ_lik.txt")
# l<-scan("./equ_lik.txt")
# ll<-matrix(l,ncol=length(chains),byrow = FALSE)
# head(ll) ; tail(ll)
# lll<-ll[-c(1 : burnin/thin),]
# ( waic1 <- waic(matrix(lll,ncol=1)) )
#### waic 213.5
#
EQU.mcmc <- mcmc.list(EQU)
#
#plot(EQU.mcmc, trace = TRUE, density = TRUE)
#summary(EQU.mcmc)
#gelman.diag(EQU.mcmc)
#effectiveSize(EQU.mcmc)
#
df<-data.frame(mu1=unlist(EQU.mcmc[,1]),mu2=unlist(EQU.mcmc[,2]),sigma=unlist(EQU.mcmc[,3]),rho=unlist(EQU.mcmc[,4]) )
#
# create a ggplot2 scatterplot
#png("marhist7_1.png")
p <- ggplot(df, aes(mu1, mu2)) + geom_point(size=0.05,colour=rgb(0,0,1,0.3)) + theme_classic() +
xlim(min(df$mu1,df$mu2),max(df$mu1,df$mu2))+ylim(min(df$mu1,df$mu2),max(df$mu1,df$mu2))
p <- p + geom_abline(intercept = 0, slope =1,size=0.3 ,colour="red")
# add marginal histograms
ggMarginal(p, type = "histogram",bins=100,fill="lightgreen")
#dev.off()

赤い直線は、mu1=mu2となる線。この線より下はmu1>mu2、この線より上はmu1<mu2。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
ryou<-data.frame(c(round(mean(df[,1]),2) , round(sd(df[,1]),2) ,
round(quantile(df[,1],c(0.025,0.05,0.5,0.95,0.975)),2)))
for (i in 2:ncol(df) ){
ryou<-cbind(ryou,data.frame(c(round(mean(df[,i]),2) , round(sd(df[,i]),2) ,
round(quantile(df[,i],c(0.025,0.05,0.5,0.95,0.975)),2))))
}
names(ryou)<-c("mu1","mu2","sigma","rho")
rownames(ryou)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
mu1_mu2 <- df$mu1 - df$mu2
ryou<-cbind(ryou,data.frame(c(round(mean(mu1_mu2),2) , round(sd(mu1_mu2),2) ,
round(quantile(mu1_mu2,c(0.025,0.05,0.5,0.95,0.975)),2))))
names(ryou)<-c("mu1","mu2","sigma","rho","mu1_mu2")
#kable(t(ryou))
#
set.seed(1234)
mu<-matrix(rep(NA,nrow(df)*2),ncol=2)
for (i in 1:nrow(df)){
mu[i,]<-rmvnorm(n = 1, mean =c(df$mu1[i],df$mu2[i]), sigma=matrix(c(df$sigma[i]^2,df$sigma[i]^2*df$rho[i],df$sigma[i]^2*df$rho[i],df$sigma[i]^2),ncol=2))
}
x1aste<-mu[,1]
x2aste<-mu[,2]
#
ryou<-cbind(ryou,data.frame(c(round(mean(x1aste),2) , round(sd(x1aste),2) ,
round(quantile(x1aste,c(0.025,0.05,0.5,0.95,0.975)),2))))
ryou<-cbind(ryou,data.frame(c(round(mean(x2aste),2) , round(sd(x2aste),2) ,
round(quantile(x2aste,c(0.025,0.05,0.5,0.95,0.975)),2))))
#
names(ryou)<-c("mu1","mu2","sigma","rho","mu1_mu2","x1*","x2*")
kable(t(ryou))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu1 180.50 17.97 144.79 151.31 180.55 209.57 216.09
mu2 150.78 17.92 115.41 121.71 150.83 179.77 186.43
sigma 55.51 12.40 37.97 39.84 53.37 78.25 85.54
rho 0.60 0.21 0.08 0.19 0.64 0.87 0.89
mu1_mu2 29.72 15.38 -1.30 4.76 29.80 54.51 60.30
x1* 180.80 59.45 63.15 84.16 180.89 277.73 298.91
x2* 150.90 59.34 34.10 55.27 150.90 247.56 268.82

平均値の差の推測

1
2
sa<-table(mu1_mu2>0)
sa[2]/(sa[1]+sa[2])

TRUE
0.97097

1
2
sa10<-table(mu1_mu2>10)
sa10[2]/(sa10[1]+sa10[2])

TRUE
0.91036

相関の推測

1
2
soukan<-table(df$rho>0.5)
soukan[2]/(soukan[1]+soukan[2])

TRUE
0.7306

差得点の標準偏差の推測

1
2
3
4
5
#sigmad<-sqrt(2*df$sigma^2-2*df$rho*df$sigma^2)
sigmad<-df$sigma*sqrt(2*(1-df$rho))
#
c(EAP=round(mean(sigmad),3) , post.sd=round(sd(sigmad),3) ,
round(quantile(sigmad,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
46.966 13.410 28.593 30.508 44.428 71.933 80.103

1
2
3
c<-30
s<-table(sigmad < c)
s[2]/(s[1]+s[2])

TRUE
0.04197

差得点の効果量

差得点の効果量の事後分布

1
2
3
4
deltad<-(df$mu1-df$mu2)/sigmad
#deltad<- (df$mu1-df$mu2)/(df$sigma*sqrt(2*(1-df$rho)))
c(EAP=round(mean(deltad),3) , post.sd=round(sd(deltad),3) ,
round(quantile(deltad,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.679 0.360 -0.024 0.090 0.677 1.277 1.391

基準値より大きい差得点の効果量

1
2
deltad03<-table(deltad>0.3)
deltad03[2]/(deltad03[1]+deltad03[2])

TRUE
0.85529

差得点の優越率

差得点の優越率の事後分布

1
2
3
yuetsu<-pnorm(deltad , mean =0, sd = 1)
c(EAP=round(mean(yuetsu),3) , post.sd=round(sd(yuetsu),3) ,
round(quantile(yuetsu,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.738 0.111 0.490 0.536 0.751 0.899 0.918

基準値確率より大きい差得点の優越率

1
2
yuetsu07<-table(yuetsu>0.7)
yuetsu07[2]/(yuetsu07[1]+yuetsu07[2])

TRUE
0.66477

差得点の閾上率

差得点の閾上率の事後分布

1
2
3
4
c<-10
ikizyo<-pnorm( (df$mu1-df$mu2-c)/(df$sigma*sqrt(2*(1-df$rho))) , mean =0, sd = 1)
c(EAP=round(mean(ikizyo),3) , post.sd=round(sd(ikizyo),3) ,
round(quantile(ikizyo,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.665 0.117 0.418 0.459 0.674 0.843 0.868

基準値確率より大きい差得点の閾上率

1
2
ikizyo07<-table(ikizyo>0.7)
ikizyo07[2]/(ikizyo07[1]+ikizyo07[2])

TRUE
0.41347

burnin = 2000 , mcmc=200000 , thin=10

標準偏差が異なるモデル(DEF)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
pffun <- function(beta, x ,y ) {
# beta[1] : mu1 , beta[2] : mu2 , beta[3] : sigma1 , beta[4] : sigma2 , beta[5] : rho
ifelse( beta[3] < 0 | beta[4] < 0 , l<- -Inf ,
# 確率密度(pdf): dxxx(q):q は確率点を表す
# xとyの共分散 mean((x-mean(x))*(y-mean(y)))
l <- sum(log(dmvnorm(matrix(c(x,y),ncol=2), mean=c(beta[1],beta[2]) ,
# sigma=matrix(c(beta[3]^2,mean((x-beta[1])*(y-beta[2])),mean((x-beta[1])*(y-beta[2])),beta[3]^2),ncol=2)))))
sigma=matrix(c(beta[3]^2,beta[3]*beta[4]*beta[5],beta[3]*beta[4]*beta[5],beta[4]^2),ncol=2)))))
return(l)
}
#
chains <- 1:5
seeds <- seq(1,5,1)
init<-c(0,0,30,30,0)
burnin<-2000
mcmc_n<-200000
thin<- verbose <-10
#sink("./likelihood.txt")
DEF <- lapply(chains ,
function(chain) {
MCMCmetrop1R(fun = pffun ,
theta.init = init ,
burnin = burnin, mcmc = mcmc_n ,thin=thin,
# verbose= verbose,
seed = seeds[chain],
x=x1,y=x2 )})
# sink()
# system("gawk '/function value/' ./likelihood.txt | gawk 'gsub(\"function value =\",\"\")' > ./def_lik.txt")
# l<-scan("./def_lik.txt")
# ll<-matrix(l,ncol=length(chains),byrow = FALSE)
# head(ll) ; tail(ll)
# lll<-ll[-c(1 : burnin/thin),]
# ( waic2 <- waic(matrix(lll,ncol=1)) )
######## waic 217.0 #######
#
# WAIC : EQU=213.5 < 217=DEF --> EQUを採用する
#
DEF.mcmc <- mcmc.list(DEF)
#
#plot(DEF.mcmc, trace = TRUE, density = TRUE)
#summary(DEF.mcmc)
#gelman.diag(DEF.mcmc)
#effectiveSize(DEF.mcmc)
#
df<-data.frame(mu1=unlist(DEF.mcmc[,1]),mu2=unlist(DEF.mcmc[,2]),
sigma1=unlist(DEF.mcmc[,3]),sigma2=unlist(DEF.mcmc[,4]),rho=unlist(DEF.mcmc[,5]) )
#
ryou<-data.frame(c(round(mean(df[,1]),2) , round(sd(df[,1]),2) ,
round(quantile(df[,1],c(0.025,0.05,0.5,0.95,0.975)),2)))
for (i in 2:ncol(df) ){
ryou<-cbind(ryou,data.frame(c(round(mean(df[,i]),2) , round(sd(df[,i]),2) ,
round(quantile(df[,i],c(0.025,0.05,0.5,0.95,0.975)),2))))
}
names(ryou)<-c("mu1","mu2","sigma1","sigma2","rho")
rownames(ryou)<-c("EAP","post.sd","2.5%","5%","50%","95%","97.5%")
mu1_mu2 <- df$mu1 - df$mu2
ryou<-cbind(ryou,data.frame(c(round(mean(mu1_mu2),2) , round(sd(mu1_mu2),2) ,
round(quantile(mu1_mu2,c(0.025,0.05,0.5,0.95,0.975)),2))))
names(ryou)<-c("mu1","mu2","sigma1","sigma2","rho","mu1_mu2")
#kable(t(ryou))
#
set.seed(1234)
mu<-matrix(rep(NA,nrow(df)*2),ncol=2)
for (i in 1:nrow(df)){
mu[i,]<-rmvnorm(n = 1, mean =c(df$mu1[i],df$mu2[i]), sigma=matrix(c(df$sigma1[i]^2,df$sigma1[i]*df$sigma2[i]*df$rho[i],df$sigma1[i]*df$sigma2[i]*df$rho[i],df$sigma2[i]^2),ncol=2))
}
x1aste<-mu[,1]
x2aste<-mu[,2]
#
ryou<-cbind(ryou,data.frame(c(round(mean(x1aste),2) , round(sd(x1aste),2) ,
round(quantile(x1aste,c(0.025,0.05,0.5,0.95,0.975)),2))))
ryou<-cbind(ryou,data.frame(c(round(mean(x2aste),2) , round(sd(x2aste),2) ,
round(quantile(x2aste,c(0.025,0.05,0.5,0.95,0.975)),2))))
#
names(ryou)<-c("mu1","mu2","sigma1","sigma2","rho","mu1_mu2","x1*","x2*")
kable(t(ryou))
EAP post.sd 2.5% 5% 50% 95% 97.5%
mu1 180.57 18.38 144.08 150.79 180.58 210.51 217.15
mu2 150.85 20.77 109.13 117.30 150.87 184.47 192.66
sigma1 56.39 15.93 34.63 36.83 53.41 86.06 96.14
sigma2 63.37 18.31 38.84 41.28 59.80 97.13 108.99
rho 0.59 0.23 0.02 0.14 0.64 0.87 0.90
mu1_mu2 29.72 18.00 -6.50 0.96 29.84 58.34 65.56
x1* 180.91 61.43 58.48 81.84 180.98 280.08 303.30
x2* 150.95 68.85 13.48 39.49 150.99 262.51 289.30

平均値の差の推測

1
2
sa<-table(mu1_mu2>0)
sa[2]/(sa[1]+sa[2])

TRUE
0.9544

1
2
sa10<-table(mu1_mu2>10)
sa10[2]/(sa10[1]+sa10[2])

TRUE
0.88292

相関の推測

1
2
soukan<-table(df$rho>0.5)
soukan[2]/(soukan[1]+soukan[2])

TRUE
0.71188

差得点の標準偏差の推測

1
2
3
sigmad<-sqrt(df$sigma1^2+df$sigma2^2-2*df$rho*df$sigma1*df$sigma2)
c(EAP=round(mean(sigmad),3) , post.sd=round(sd(sigmad),3) ,
round(quantile(sigmad,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
53.806 17.453 31.045 33.242 50.186 86.494 97.543

1
2
3
c<-30
s<-table(sigmad < c)
s[2]/(s[1]+s[2])

TRUE
0.01668

差得点の効果量

差得点の効果量の事後分布

1
2
3
4
deltad<-(df$mu1-df$mu2)/sigmad
#deltad<- (df$mu1-df$mu2)/sqrt(df$sigma1^2+df$sigma2^2-2*df$rho*df$sigma1*df$sigma2)
c(EAP=round(mean(deltad),3) , post.sd=round(sd(deltad),3) ,
round(quantile(deltad,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.603 0.360 -0.097 0.015 0.602 1.196 1.312

基準値より大きい差得点の効果量

1
2
deltad03<-table(deltad>0.3)
deltad03[2]/(deltad03[1]+deltad03[2])

TRUE
0.80075

差得点の優越率

差得点の優越率の事後分布

1
2
3
yuetsu<-pnorm(deltad , mean =0, sd = 1)
c(EAP=round(mean(yuetsu),3) , post.sd=round(sd(yuetsu),3) ,
round(quantile(yuetsu,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.715 0.116 0.461 0.506 0.726 0.884 0.905

基準値確率より大きい差得点の優越率

1
2
yuetsu07<-table(yuetsu>0.7)
yuetsu07[2]/(yuetsu07[1]+yuetsu07[2])

TRUE
0.58419

差得点の閾上率

差得点の閾上率の事後分布

1
2
3
4
c<-10
ikizyo<-pnorm( (df$mu1-df$mu2-c)/sqrt(df$sigma1^2+df$sigma2^2-2*df$rho*df$sigma1*df$sigma2) , mean =0, sd = 1)
c(EAP=round(mean(ikizyo),3) , post.sd=round(sd(ikizyo),3) ,
round(quantile(ikizyo,c(0.025,0.05,0.5,0.95,0.975)),3))

EAP post.sd 2.5% 5% 50% 95% 97.5%
0.648 0.119 0.398 0.440 0.656 0.830 0.855

基準値確率より大きい差得点の閾上率

1
2
ikizyo07<-table(ikizyo>0.7)
ikizyo07[2]/(ikizyo07[1]+ikizyo07[2])

TRUE
0.35621