アソシエーション分析

「データからの知識発見」第9章アソシエーション分析

arules パッケージをインストールしておく

1
2
3
library(arules) #読み込みに数秒かかる
data(Groceries)
Groceries

transactions in sparse format with
9835 transactions (rows) and
169 items (columns)

1
2
g0<-Groceries
gfrm0<-as(g0,"data.frame")

ファイルに保存する箇所は省略

1
grule1<-apriori(g0)

parameter specification:
confidence minval smax arem aval originalSupport support minlen maxlen target ext
0.8 0.1 1 none FALSE TRUE 0.1 1 10 rules FALSE

algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

apriori - find association rules with the apriori algorithm
version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
set item appearances …[0 item(s)] done [0.00s].
set transactions …[169 item(s), 9835 transaction(s)] done [0.02s].
sorting and recoding items … [8 item(s)] done [0.00s].
creating transaction tree … done [0.02s].
checking subsets of size 1 2 done [0.00s].
writing … [0 rule(s)] done [0.00s].
creating S4 object … done [0.00s].

1
grule2<-apriori(g0,p=list(support=0.01,confidence=0.5))

parameter specification:
confidence minval smax arem aval originalSupport support minlen maxlen target ext
0.5 0.1 1 none FALSE TRUE 0.01 1 10 rules FALSE

algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

apriori - find association rules with the apriori algorithm
version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
set item appearances …[0 item(s)] done [0.00s].
set transactions …[169 item(s), 9835 transaction(s)] done [0.01s].
sorting and recoding items … [88 item(s)] done [0.01s].
creating transaction tree … done [0.02s].
checking subsets of size 1 2 3 4 done [0.00s].
writing … [15 rule(s)] done [0.00s].
creating S4 object … done [0.01s].

1
2
grule3<-sort(grule2,by="confidence") #信頼度によって並び替え
inspect(grule3)

lhs rhs support confidence lift
1 {citrus fruit,
root vegetables} => {other vegetables} 0.01037112 0.5862069 3.029608
2 {tropical fruit,
root vegetables} => {other vegetables} 0.01230300 0.5845411 3.020999
3 {curd,
yogurt} => {whole milk} 0.01006609 0.5823529 2.279125
4 {other vegetables,
butter} => {whole milk} 0.01148958 0.5736041 2.244885
5 {tropical fruit,
root vegetables} => {whole milk} 0.01199797 0.5700483 2.230969
6 {root vegetables,
yogurt} => {whole milk} 0.01453991 0.5629921 2.203354
7 {other vegetables,
domestic eggs} => {whole milk} 0.01230300 0.5525114 2.162336
8 {yogurt,
whipped/sour cream} => {whole milk} 0.01087951 0.5245098 2.052747
9 {root vegetables,
rolls/buns} => {whole milk} 0.01270971 0.5230126 2.046888
10 {pip fruit,
other vegetables} => {whole milk} 0.01352313 0.5175097 2.025351
11 {tropical fruit,
yogurt} => {whole milk} 0.01514997 0.5173611 2.024770
12 {other vegetables,
yogurt} => {whole milk} 0.02226741 0.5128806 2.007235
13 {other vegetables,
whipped/sour cream} => {whole milk} 0.01464159 0.5070423 1.984385
14 {root vegetables,
rolls/buns} => {other vegetables} 0.01220132 0.5020921 2.594890
15 {root vegetables,
yogurt} => {other vegetables} 0.01291307 0.5000000 2.584078

1
2
grule4<-head(grule3)
inspect(grule4)

lhs rhs support confidence lift
1 {citrus fruit,
root vegetables} => {other vegetables} 0.01037112 0.5862069 3.029608
2 {tropical fruit,
root vegetables} => {other vegetables} 0.01230300 0.5845411 3.020999
3 {curd,
yogurt} => {whole milk} 0.01006609 0.5823529 2.279125
4 {other vegetables,
butter} => {whole milk} 0.01148958 0.5736041 2.244885
5 {tropical fruit,
root vegetables} => {whole milk} 0.01199797 0.5700483 2.230969
6 {root vegetables,
yogurt} => {whole milk} 0.01453991 0.5629921 2.203354

1
2
gdat1<-itemFrequency(g0)
itemFrequencyPlot(g0)

1
head(gdat1)

frankfurter sausage liver loaf ham meat finished products
0.058973055 0.093950178 0.005083884 0.026029487 0.025826131 0.006507372

1
head(sort(gdat1))

baby food sound storage medium preservation products kitchen utensil bags frozen chicken
0.0001016777 0.0001016777 0.0002033554 0.0004067107 0.0004067107 0.0006100661

1
head(sort(gdat1,d=T))

whole milk other vegetables rolls/buns soda yogurt bottled water
0.2555160 0.1934926 0.1839349 0.1743772 0.1395018 0.1105236

1
tail(sort(gdat1))

bottled water yogurt soda rolls/buns other vegetables whole milk
0.1105236 0.1395018 0.1743772 0.1839349 0.1934926 0.2555160

特定のアイテムを含んだルールだけを抽出

結論部(rhs)にwhole milk を含んだルールだけを抽出

1
grule5<-apriori(g0,p=list(support=0.005,confidence=0.7),appearance=list(rhs="whole milk",default="lhs"))

parameter specification:
confidence minval smax arem aval originalSupport support minlen maxlen targe ext
0.7 0.1 1 none FALSE TRUE 0.005 1 10 rules FALSE

algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

apriori - find association rules with the apriori algorithm
version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
set item appearances …[1 item(s)] done [0.00s].
set transactions …[169 item(s), 9835 transaction(s)] done [0.00s].
sorting and recoding items … [120 item(s)] done [0.00s].
creating transaction tree … done [0.00s].
checking subsets of size 1 2 3 4 done [0.02s].
writing … [1 rule(s)] done [0.00s].
creating S4 object … done [0.00s].

1
inspect(grule5)

lhs rhs support confidence lift
1 {tropical fruit,
root vegetables,
yogurt} => {whole milk} 0.00569395 0.7 2.739554

maxlenなし
1
grule6<-apriori(g0,p=list(support=0.005,confidence=0.5))

parameter specification:
confidence minval smax arem aval originalSupport support minlen maxlen targe ext
0.5 0.1 1 none FALSE TRUE 0.005 1 10 rules FALSE

algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

apriori - find association rules with the apriori algorithm
version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
set item appearances …[0 item(s)] done [0.00s].
set transactions …[169 item(s), 9835 transaction(s)] done [0.00s].
sorting and recoding items … [120 item(s)] done [0.00s].
creating transaction tree … done [0.01s].
checking subsets of size 1 2 3 4 done [0.00s].
writing … [120 rule(s)] done [0.00s].
creating S4 object … done [0.00s].

maxlen=2としたとき
1
grule6<-apriori(g0,p=list(support=0.005,confidence=0.5,maxlen=2))

parameter specification:
confidence minval smax arem aval originalSupport support minlen maxlen targe ext
0.5 0.1 1 none FALSE TRUE 0.005 1 2 rules FALSE

algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

apriori - find association rules with the apriori algorithm
version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
set item appearances …[0 item(s)] done [0.00s].
set transactions …[169 item(s), 9835 transaction(s)] done [0.00s].
sorting and recoding items … [120 item(s)] done [0.00s].
creating transaction tree … done [0.02s].
checking subsets of size 1 2 done [0.00s].
writing … [1 rule(s)] done [0.00s].
creating S4 object … done [0.00s].

1
inspect(grule6)

lhs rhs support confidence lift
1 {baking powder} => {whole milk} 0.009252669 0.5229885 2.046793