R ile Birliktelik Kuralları Analizi (Association Rules Analysis with R Project)

Merhaba,

Bir önceki yazımda Birliktelik Kuralları Analizi (Association Rules Analysis) ilgili R kodlarını paylaşacağımı belirtmiştim. Dilerseniz hızlıca başlayalım. R üzerinde Birliktelik Kuralları Analizi için kullanacağım kütüphaneler başlıca kütüphaneler: arules ve arulesViz

arules kütüphanesi ile Apriori algoritmasını kullanmak için, arulesViz kütüphanesi ile de Apriori algoritması sonucunda ortaya çıkan kuralları(rules) görselleştirmek için kullanacağım.

# Association Rules Analysis with R Project ----------------------------

# Clean the System & Console Variable --------------------
rm(list = ls())
cat("\014")
options(warn = -1)

# Sys_Date <- format(Sys.Date(), "%Y%m%d")
# Sys_Time <- format(Sys.time(), "%H:%M:%S")

# Install & Use Library ---------------------------------------------------
if (require('arules') == FALSE){
  install.packages('arules')
  library(arules)
}

if (require('arulesViz') == FALSE){
  install.packages('arulesViz')
  library(arulesViz)
}

if (require('htmlwidgets') == FALSE){
  install.packages('htmlwidgets')
  library(htmlwidgets)
}

# Set the System Path & Variable
if (require('here') == FALSE){
  install.packages('here')
  library(here)
}

Path <- here()

setwd(Path)

arules ve arulesViz kütüphanelerinin dışında çalışma dizini için here ve interaktif çıktıları export edebilmemiz için htmlwigdets kütüphaneleri kullandım.

Şimdi kullanacağım veri seti içeriye alalım. Kullanacağımız veri seti aşağıdaki bağlantılardan indirebilirsiniz.

  1. http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml13/groceries.csv
  2. R içerisinde arules kütüphanesi ile birlikte gelen Groceries veri setini data(Groceries) yazılarak içeriye aktarılabilirsiniz.
# Load Dataset

Groceries = read.transactions(file = "./Input/groceries.csv",
                              format = c("basket"),
                              sep= ",",
                              cols = NULL,
                              rm.duplicates = TRUE,
                              encoding = "UTF-8")

# Second Way Data Load 
# data(Groceries)

Veri seti için bazı tanımlayıcı bilgilere bakacak olursak;

# Show Type and Class of Dataset

typeof(Groceries)
[1] "S4"

class(Groceries)
[1] "transactions"
attr(,"package")
[1] "arules"

# Dimension of Dataset

dim(Groceries)
[1] 9835  169

length(Groceries)
[1] 9835

str(Groceries)
Formal class 'transactions' [package "arules"] with 3 slots
  ..@ data       :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
  .. .. ..@ i       : int [1:43367] 29 88 118 132 33 157 167 166 38 91 ...
  .. .. ..@ p       : int [1:9836] 0 4 7 8 12 16 21 22 27 28 ...
  .. .. ..@ Dim     : int [1:2] 169 9835
  .. .. ..@ Dimnames:List of 2
  .. .. .. ..$ : NULL
  .. .. .. ..$ : NULL
  .. .. ..@ factors : list()
  ..@ itemInfo   :'data.frame':	169 obs. of  1 variable:
  .. ..$ labels: chr [1:169] "abrasive cleaner" "artif. sweetener" "baby cosmetics" "baby food" ...
  ..@ itemsetInfo:'data.frame':	0 obs. of  0 variables

Veri seti 9385×169 yani 9385 transaction(element) ve 169 items ‘tan oluşmaktadır. Veri seti içerisindeki ilk 10 gözleme bakalım;

# Show The Dataset
# inspect(Groceries)

inspect(Groceries[1:10])
     items                                                                 
[1]  {citrus fruit,margarine,ready soups,semi-finished bread}              
[2]  {coffee,tropical fruit,yogurt}                                        
[3]  {whole milk}                                                          
[4]  {cream cheese,meat spreads,pip fruit,yogurt}                          
[5]  {condensed milk,long life bakery product,other vegetables,whole milk} 
[6]  {abrasive cleaner,butter,rice,whole milk,yogurt}                      
[7]  {rolls/buns}                                                          
[8]  {bottled beer,liquor (appetizer),other vegetables,rolls/buns,UHT-milk}
[9]  {potted plants}                                                       
[10] {cereals,whole milk}

Veri seti içerisindeki yer alan tüm items listeleyelim.

# Show the Items
ItemSetList <- Groceries@itemInfo
ItemSetList
                       labels
1            abrasive cleaner
2            artif. sweetener
3              baby cosmetics
4                   baby food
5                        bags
...
...
...
165               white bread
166                white wine
167                whole milk
168                    yogurt
169                  zwieback

İlk ve son 10 items listeyecek olursak;

# First 5 items of DataSet 
# head(ItemSetList, n=10)

head(Groceries@itemInfo, n=10)
             labels
1  abrasive cleaner
2  artif. sweetener
3    baby cosmetics
4         baby food
5              bags
6     baking powder
7  bathroom cleaner
8              beef
9           berries
10        beverages

# End 5 items of DataSet 
# tail(ItemSetList, n=10)

tail(Groceries@itemInfo, n=10)
                labels
160           UHT-milk
161            vinegar
162            waffles
163 whipped/sour cream
164             whisky
165        white bread
166         white wine
167         whole milk
168             yogurt
169           zwieback

Veri setini özetleyici bilgiler ile devam ediyoruz;

# Summary of Dataset

summary(Groceries)
transactions as itemMatrix in sparse format with
 9835 rows (elements/itemsets/transactions) and
 169 columns (items) and a density of 0.02609146 

most frequent items:
      whole milk other vegetables       rolls/buns             soda           yogurt          (Other) 
            2513             1903             1809             1715             1372            34055 

element (itemset/transaction) length distribution:
sizes
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22 
2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55   46   29   14   14    9   11    4 
  23   24   26   27   28   29   32 
   6    1    1    1    1    3    1 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   2.000   3.000   4.409   6.000  32.000 

includes extended item information - examples:
            labels
1 abrasive cleaner
2 artif. sweetener
3   baby cosmetics

En çok kullanılan items‘lar;

# Most Frequent Items

s@itemSummary
      whole milk other vegetables       rolls/buns             soda           yogurt          (Other) 
            2513             1903             1809             1715             1372            34055

En çok kullanılan 5 items‘a baktığımızda 2513 adet ile Whole Milk ilk sırada olduğu ve onu 1903 adet ile other vegetables izlediği görülmektedir.

Items (Birlikte yer alanda 1 ile 32 arasındaki items’ların) uzunlukları;

# Length Distribution Size

s@lengths

sizes
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22 
2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55   46   29   14   14    9   11    4 
  23   24   26   27   28   29   32 
   6    1    1    1    1    3    1

Bura tabloyu inceleyecek olursak; sadece 1 items‘ı içeren 2159 adet transaction olduğu ve sadece 32 items‘ı içeren 1 adet transaction olduğunu görülmektedir.

itemFrequency(Groceries, type = "relative")
         abrasive cleaner          artif. sweetener            baby cosmetics                 baby food 
             0.0035587189              0.0032536858              0.0006100661              0.0001016777 
                     bags             baking powder          bathroom cleaner                      beef 
             0.0004067107              0.0176919166              0.0027452974              0.0524656838 
                  berries                 beverages              bottled beer             bottled water 
             0.0332486019              0.0260294865              0.0805287239              0.1105236401 
                   brandy               brown bread                    butter               butter milk 
             0.0041687850              0.0648703610              0.0554143366              0.0279613625 
                 cake bar                   candles                     candy               canned beer 
             0.0132180986              0.0089476360              0.0298932384              0.0776817489 
              canned fish              canned fruit         canned vegetables                  cat food 
             0.0150482969              0.0032536858              0.0107778343              0.0232841891 
                  cereals               chewing gum                   chicken                 chocolate 
             0.0056939502              0.0210472801              0.0429079817              0.0496187087 
    chocolate marshmallow              citrus fruit                   cleaner           cling film/bags 
             0.0090493137              0.0827656329              0.0050838841              0.0113879004 
             cocoa drinks                    coffee            condensed milk         cooking chocolate 
             0.0022369090              0.0580579563              0.0102694459              0.0025419420 
                 cookware                     cream              cream cheese                      curd 
             0.0027452974              0.0013218099              0.0396542959              0.0532791052 
              curd cheese               decalcifier               dental care                   dessert 
             0.0050838841              0.0015251652              0.0057956279              0.0371123538 
                detergent              dish cleaner                    dishes                  dog food 
             0.0192170819              0.0104728012              0.0175902389              0.0085409253 
            domestic eggs  female sanitary products         finished products                      fish 
             0.0634468734              0.0061006609              0.0065073716              0.0029486528 
                    flour            flower (seeds)    flower soil/fertilizer               frankfurter 
             0.0173868836              0.0103711235              0.0019318760              0.0589730554 
           frozen chicken            frozen dessert               frozen fish             frozen fruits 
             0.0006100661              0.0107778343              0.0116929334              0.0012201322 
             frozen meals    frozen potato products         frozen vegetables     fruit/vegetable juice 
             0.0283680732              0.0084392476              0.0480935435              0.0722928317 
                   grapes                hair spray                       ham            hamburger meat 
             0.0223690900              0.0011184545              0.0260294865              0.0332486019 
              hard cheese                     herbs                     honey    house keeping products 
             0.0245043213              0.0162684291              0.0015251652              0.0083375699 
         hygiene articles                 ice cream            instant coffee     Instant food products 
             0.0329435689              0.0250127097              0.0074224708              0.0080325369 
                      jam                   ketchup            kitchen towels           kitchen utensil 
             0.0053889171              0.0042704626              0.0059989832              0.0004067107 
              light bulbs                   liqueur                    liquor        liquor (appetizer) 
             0.0041687850              0.0009150991              0.0110828673              0.0079308592 
               liver loaf  long life bakery product           make up remover            male cosmetics 
             0.0050838841              0.0374173869              0.0008134215              0.0045754957 
                margarine                mayonnaise                      meat              meat spreads 
             0.0585663447              0.0091509914              0.0258261312              0.0042704626 
          misc. beverages                   mustard                   napkins                newspapers 
             0.0283680732              0.0119979664              0.0523640061              0.0798169802 
                nut snack               nuts/prunes                       oil                    onions 
             0.0031520081              0.0033553635              0.0280630402              0.0310116929 
         organic products           organic sausage          other vegetables packaged fruit/vegetables 
             0.0016268429              0.0022369090              0.1934926284              0.0130147433 
                    pasta                    pastry                  pet care                photo/film 
             0.0150482969              0.0889679715              0.0094560244              0.0092526690 
       pickled vegetables                 pip fruit                   popcorn                      pork 
             0.0178952720              0.0756481952              0.0072191154              0.0576512456 
          potato products             potted plants     preservation products          processed cheese 
             0.0028469751              0.0172852059              0.0002033554              0.0165734621 
                 prosecco            pudding powder               ready soups            red/blush wine 
             0.0020335536              0.0023385867              0.0018301983              0.0192170819 
                     rice             roll products                rolls/buns           root vegetables 
             0.0076258261              0.0102694459              0.1839349263              0.1089984748 
          rubbing alcohol                       rum            salad dressing                      salt 
             0.0010167768              0.0044738180              0.0008134215              0.0107778343 
              salty snack                    sauces                   sausage         seasonal products 
             0.0378240976              0.0054905948              0.0939501779              0.0142348754 
      semi-finished bread             shopping bags                 skin care             sliced cheese 
             0.0176919166              0.0985256736              0.0035587189              0.0245043213 
           snack products                      soap                      soda               soft cheese 
             0.0030503305              0.0026436197              0.1743772242              0.0170818505 
                 softener      sound storage medium                     soups            sparkling wine 
             0.0054905948              0.0001016777              0.0068124047              0.0055922725 
            specialty bar          specialty cheese       specialty chocolate             specialty fat 
             0.0273512964              0.0085409253              0.0304016268              0.0036603965 
     specialty vegetables                    spices             spread cheese                     sugar 
             0.0017285206              0.0051855618              0.0111845450              0.0338586680 
            sweet spreads                     syrup                       tea                   tidbits 
             0.0090493137              0.0032536858              0.0038637519              0.0023385867 
           toilet cleaner            tropical fruit                    turkey                  UHT-milk 
             0.0007117438              0.1049313676              0.0081342145              0.0334519573 
                  vinegar                   waffles        whipped/sour cream                    whisky 
             0.0065073716              0.0384341637              0.0716827656              0.0008134215 
              white bread                white wine                whole milk                    yogurt 
             0.0420945602              0.0190137265              0.2555160142              0.1395017794 
                 zwieback 
             0.0069140824

Her bir items için relative değelerini gözükmektedir.

itemFrequency(Groceries, type = "absolute")
         abrasive cleaner          artif. sweetener            baby cosmetics                 baby food 
                       35                        32                         6                         1 
                     bags             baking powder          bathroom cleaner                      beef 
                        4                       174                        27                       516 
                  berries                 beverages              bottled beer             bottled water 
                      327                       256                       792                      1087 
                   brandy               brown bread                    butter               butter milk 
                       41                       638                       545                       275 
                 cake bar                   candles                     candy               canned beer 
                      130                        88                       294                       764 
              canned fish              canned fruit         canned vegetables                  cat food 
                      148                        32                       106                       229 
                  cereals               chewing gum                   chicken                 chocolate 
                       56                       207                       422                       488 
    chocolate marshmallow              citrus fruit                   cleaner           cling film/bags 
                       89                       814                        50                       112 
             cocoa drinks                    coffee            condensed milk         cooking chocolate 
                       22                       571                       101                        25 
                 cookware                     cream              cream cheese                      curd 
                       27                        13                       390                       524 
              curd cheese               decalcifier               dental care                   dessert 
                       50                        15                        57                       365 
                detergent              dish cleaner                    dishes                  dog food 
                      189                       103                       173                        84 
            domestic eggs  female sanitary products         finished products                      fish 
                      624                        60                        64                        29 
                    flour            flower (seeds)    flower soil/fertilizer               frankfurter 
                      171                       102                        19                       580 
           frozen chicken            frozen dessert               frozen fish             frozen fruits 
                        6                       106                       115                        12 
             frozen meals    frozen potato products         frozen vegetables     fruit/vegetable juice 
                      279                        83                       473                       711 
                   grapes                hair spray                       ham            hamburger meat 
                      220                        11                       256                       327 
              hard cheese                     herbs                     honey    house keeping products 
                      241                       160                        15                        82 
         hygiene articles                 ice cream            instant coffee     Instant food products 
                      324                       246                        73                        79 
                      jam                   ketchup            kitchen towels           kitchen utensil 
                       53                        42                        59                         4 
              light bulbs                   liqueur                    liquor        liquor (appetizer) 
                       41                         9                       109                        78 
               liver loaf  long life bakery product           make up remover            male cosmetics 
                       50                       368                         8                        45 
                margarine                mayonnaise                      meat              meat spreads 
                      576                        90                       254                        42 
          misc. beverages                   mustard                   napkins                newspapers 
                      279                       118                       515                       785 
                nut snack               nuts/prunes                       oil                    onions 
                       31                        33                       276                       305 
         organic products           organic sausage          other vegetables packaged fruit/vegetables 
                       16                        22                      1903                       128 
                    pasta                    pastry                  pet care                photo/film 
                      148                       875                        93                        91 
       pickled vegetables                 pip fruit                   popcorn                      pork 
                      176                       744                        71                       567 
          potato products             potted plants     preservation products          processed cheese 
                       28                       170                         2                       163 
                 prosecco            pudding powder               ready soups            red/blush wine 
                       20                        23                        18                       189 
                     rice             roll products                rolls/buns           root vegetables 
                       75                       101                      1809                      1072 
          rubbing alcohol                       rum            salad dressing                      salt 
                       10                        44                         8                       106 
              salty snack                    sauces                   sausage         seasonal products 
                      372                        54                       924                       140 
      semi-finished bread             shopping bags                 skin care             sliced cheese 
                      174                       969                        35                       241 
           snack products                      soap                      soda               soft cheese 
                       30                        26                      1715                       168 
                 softener      sound storage medium                     soups            sparkling wine 
                       54                         1                        67                        55 
            specialty bar          specialty cheese       specialty chocolate             specialty fat 
                      269                        84                       299                        36 
     specialty vegetables                    spices             spread cheese                     sugar 
                       17                        51                       110                       333 
            sweet spreads                     syrup                       tea                   tidbits 
                       89                        32                        38                        23 
           toilet cleaner            tropical fruit                    turkey                  UHT-milk 
                        7                      1032                        80                       329 
                  vinegar                   waffles        whipped/sour cream                    whisky 
                       64                       378                       705                         8 
              white bread                white wine                whole milk                    yogurt 
                      414                       187                      2513                      1372 
                 zwieback 
                       68

Her bir items için absolute değelerini gözükmektedir.

Veri setine ait Absolute ve Relative grafiklerini çizdirelim:

Absolute Item Frequency Plot Top 25

Item Frequency Plot

jpeg(filename = "./Output/1-Absolute_ItemFrequencyPlot.jpg", width=1024, height=768, units="px")
itemFrequencyPlot(Groceries, topN=25, type="absolute",
                  main='Absolute Item Frequency Plot (Top 25)', 
                  xlab="Items", 
                  ylab="Item Frequency (Absolute)")
dev.off()

 

Relative Item Frequency Plot Top 25

jpeg(filename = "./Output/2-Relative_ItemFrequencyPlot.jpg", width=1024, height=768, units="px")
itemFrequencyPlot(Groceries, topN=25, type="relative",
                  main='Relative Item Frequency Plot (Top 25)', 
                  xlab="Items", 
                  ylab="Item Frequency (relative)")
dev.off()

Şimdi Birliktelik Kuralları oluşması için apriori modelini tasarlayalım;

Min. Support değerini %1 yani 0,01; Min. Confidence değerini %25 yani 0,25 olarak set ediyoruz. Ve Modelimizi bu parametreler ile initialize ediyoruz.

Not: Eğer Min. Support ve Min. Confidence değerlerini  vermezisek; varsayılan başlangıç (%10 ve %80) değerlerini alarak model oluşturulacaktır.

# Both Association Rules

options(digits = 2)
min_supp <- 0.01
min_conf <- 0.25
min_lenght <- 2

BasketRules <- apriori(Groceries, parameter = list(supp = min_supp, conf = min_conf,
+                                                    minlen = min_lenght, target = "rules"))

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen maxlen target   ext
       0.25    0.1    1 none FALSE            TRUE       5    0.01      2     10  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 98 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[169 item(s), 9835 transaction(s)] done [0.07s].
sorting and recoding items ... [88 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 done [0.00s].
writing ... [170 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].

Verdiğimiz min support ve min confidence değelerince 170 adet rules oluştuğu görülmektedir.

summary(BasketRules)
set of 170 rules

rule length distribution (lhs + rhs):sizes
 2  3 
96 74 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  2.000   2.000   2.000   2.435   3.000   3.000 

summary of quality measures:
    support          confidence          lift            count      
 Min.   :0.01007   Min.   :0.2517   Min.   :0.9932   Min.   : 99.0  
 1st Qu.:0.01159   1st Qu.:0.2973   1st Qu.:1.5215   1st Qu.:114.0  
 Median :0.01454   Median :0.3587   Median :1.7784   Median :143.0  
 Mean   :0.01822   Mean   :0.3703   Mean   :1.8747   Mean   :179.2  
 3rd Qu.:0.02097   3rd Qu.:0.4253   3rd Qu.:2.1453   3rd Qu.:206.2  
 Max.   :0.07483   Max.   :0.5862   Max.   :3.2950   Max.   :736.0  

mining info:
      data ntransactions support confidence
 Groceries          9835    0.01       0.25

170 adet oluşan kuralların ilk 25 tanesini görelim;

# inspect(BasketRules)
inspect(BasketRules[1:25])
     lhs                           rhs                support    confidence lift     count
[1]  {hard cheese}              => {whole milk}       0.01006609 0.4107884  1.607682  99  
[2]  {butter milk}              => {other vegetables} 0.01037112 0.3709091  1.916916 102  
[3]  {butter milk}              => {whole milk}       0.01159126 0.4145455  1.622385 114  
[4]  {ham}                      => {whole milk}       0.01148958 0.4414062  1.727509 113  
[5]  {sliced cheese}            => {whole milk}       0.01077783 0.4398340  1.721356 106  
[6]  {oil}                      => {whole milk}       0.01128622 0.4021739  1.573968 111  
[7]  {onions}                   => {other vegetables} 0.01423488 0.4590164  2.372268 140  
[8]  {onions}                   => {whole milk}       0.01209964 0.3901639  1.526965 119  
[9]  {berries}                  => {yogurt}           0.01057448 0.3180428  2.279848 104  
[10] {berries}                  => {other vegetables} 0.01026945 0.3088685  1.596280 101  
[11] {berries}                  => {whole milk}       0.01179461 0.3547401  1.388328 116  
[12] {hamburger meat}           => {other vegetables} 0.01382816 0.4159021  2.149447 136  
[13] {hamburger meat}           => {whole milk}       0.01474326 0.4434251  1.735410 145  
[14] {hygiene articles}         => {whole milk}       0.01281139 0.3888889  1.521975 126  
[15] {salty snack}              => {other vegetables} 0.01077783 0.2849462  1.472646 106  
[16] {salty snack}              => {whole milk}       0.01118454 0.2956989  1.157262 110  
[17] {sugar}                    => {other vegetables} 0.01077783 0.3183183  1.645119 106  
[18] {sugar}                    => {whole milk}       0.01504830 0.4444444  1.739400 148  
[19] {waffles}                  => {other vegetables} 0.01006609 0.2619048  1.353565  99  
[20] {waffles}                  => {whole milk}       0.01270971 0.3306878  1.294196 125  
[21] {long life bakery product} => {other vegetables} 0.01067616 0.2853261  1.474610 105  
[22] {long life bakery product} => {whole milk}       0.01352313 0.3614130  1.414444 133  
[23] {dessert}                  => {other vegetables} 0.01159126 0.3123288  1.614164 114  
[24] {dessert}                  => {whole milk}       0.01372649 0.3698630  1.447514 135  
[25] {cream cheese}             => {yogurt}           0.01240468 0.3128205  2.242412 122  
> 
> inspect(head(BasketRules, n=5))
    lhs                rhs                support    confidence lift     count
[1] {hard cheese}   => {whole milk}       0.01006609 0.4107884  1.607682  99  
[2] {butter milk}   => {other vegetables} 0.01037112 0.3709091  1.916916 102  
[3] {butter milk}   => {whole milk}       0.01159126 0.4145455  1.622385 114  
[4] {ham}           => {whole milk}       0.01148958 0.4414062  1.727509 113  
[5] {sliced cheese} => {whole milk}       0.01077783 0.4398340  1.721356 106  
> inspect(tail(BasketRules, n=5))
    lhs                              rhs                support    confidence lift     count
[1] {other vegetables,yogurt}     => {whole milk}       0.02226741 0.5128806  2.007235 219  
[2] {whole milk,yogurt}           => {other vegetables} 0.02226741 0.3974592  2.054131 219  
[3] {other vegetables,whole milk} => {yogurt}           0.02226741 0.2975543  2.132979 219  
[4] {other vegetables,rolls/buns} => {whole milk}       0.01789527 0.4200477  1.643919 176  
[5] {rolls/buns,whole milk}       => {other vegetables} 0.01789527 0.3159785  1.633026 176

Burada ilk kuralı yorumlayacak olursak;

     lhs                           rhs                support    confidence lift     count
[1]  {hard cheese}              => {whole milk}       0.01006609 0.4107884  1.607682  99

Kuralın Sol Tarafını baz alarak başlıyoruz;

Support Yorumu: Bir fiş hareketi içerisinde Hard Cheese ürünü ile Whole Milk 0,01 yani %1 değeri ile birlikte oldukça az görülmektedir.

Confidence Yorumu: Aynı zamanda Hard Cheese ürününü satın alan bir kişi 0,41 yani %41 ile Whole Milk ürünün de satın alacağı görülmektedir. Kuralın RHS(Right Hand Side) yani Consequent tarafı ile ilgilenir.

Lift Yorumu: Hard Cheese ürününü olan alışveriş sepetlerinde Whole Milk ürünlerini satışı 1,60 kat artırıyor. Bu değer kuralın LHS(Left Hand Side) yani Antecedent tarafı ile ilgilenir.

Count Yorumu: Her iki items’ın birlikte görülme sıklığı 99 olduğu görülmüştür.


Şimdi de oluşturulan Apriori Modeline ait bilgileri ekrana yazdırıyoruz.

cat(paste0("\n","Association Rules  Analysis", "\n",
+            "= = = = = = = = = = = = = =", "\n",
+            "Min. Supp. & Conf. Values:", "\t", min_supp, " & ", min_conf, "\n",
+            "X ==> Y Rules Count:", "\t\t", length(BasketRules)))

Association Rules  Analysis
= = = = = = = = = = = = = =
Min. Supp. & Conf. Values:	0.01 & 0.25
X ==> Y Rules Count:		170

Apriori algoritması kullanılarak Min. Supp ve Min. Conf değerleri ile 170 adet kural oluştuğu görülmektedir. Oluşan bu kuralları Condifence, Support ve Lift değerlerine göre descent(azalan) olarak sıralıyoruz. Ve devamında ilk ve son 5 adet kuralları listeliyoruz.

Confidence değerlerine göre ilk ve son 5 kural:

BasketRules_Conf <- sort(BasketRules, by="confidence", decreasing=TRUE)

inspect(head(BasketRules_Conf, n=5))
    lhs                                 rhs                support    confidence lift     count
[1] {citrus fruit,root vegetables}   => {other vegetables} 0.01037112 0.5862069  3.029608 102  
[2] {root vegetables,tropical fruit} => {other vegetables} 0.01230300 0.5845411  3.020999 121  
[3] {curd,yogurt}                    => {whole milk}       0.01006609 0.5823529  2.279125  99  
[4] {butter,other vegetables}        => {whole milk}       0.01148958 0.5736041  2.244885 113  
[5] {root vegetables,tropical fruit} => {whole milk}       0.01199797 0.5700483  2.230969 118  

inspect(tail(BasketRules_Conf, n=5))
    lhs                        rhs                support    confidence lift      count
[1] {fruit/vegetable juice} => {soda}             0.01840366 0.2545710  1.4598869 181  
[2] {bottled beer}          => {whole milk}       0.02043721 0.2537879  0.9932367 201  
[3] {pastry}                => {other vegetables} 0.02257245 0.2537143  1.3112349 222  
[4] {chicken}               => {root vegetables}  0.01087951 0.2535545  2.3262206 107  
[5] {margarine}             => {rolls/buns}       0.01474326 0.2517361  1.3686151 145

Support değerlerine göre ilk ve son 5 kural:

BasketRules_Supp <- sort(BasketRules, by="support", decreasing=TRUE)

inspect(head(BasketRules_Supp, n=5))
    lhs                   rhs                support    confidence lift     count
[1] {other vegetables} => {whole milk}       0.07483477 0.3867578  1.513634 736  
[2] {whole milk}       => {other vegetables} 0.07483477 0.2928770  1.513634 736  
[3] {rolls/buns}       => {whole milk}       0.05663447 0.3079049  1.205032 557  
[4] {yogurt}           => {whole milk}       0.05602440 0.4016035  1.571735 551  
[5] {root vegetables}  => {whole milk}       0.04890696 0.4486940  1.756031 481  

inspect(tail(BasketRules_Supp, n=5))
    lhs                     rhs                support    confidence lift     count
[1] {sausage,whole milk} => {other vegetables} 0.01016777 0.3401361  1.757876 100  
[2] {hard cheese}        => {whole milk}       0.01006609 0.4107884  1.607682  99  
[3] {waffles}            => {other vegetables} 0.01006609 0.2619048  1.353565  99  
[4] {curd,yogurt}        => {whole milk}       0.01006609 0.5823529  2.279125  99  
[5] {curd,whole milk}    => {yogurt}           0.01006609 0.3852140  2.761356  99

Lift değerlerine göre ilk ve son 5 kural:

BasketRules_Lift <- sort(BasketRules, by="lift", decreasing=TRUE)

inspect(head(BasketRules_Lift, n=5))
    lhs                                  rhs                support    confidence lift     count
[1] {citrus fruit,other vegetables}   => {root vegetables}  0.01037112 0.3591549  3.295045 102  
[2] {other vegetables,tropical fruit} => {root vegetables}  0.01230300 0.3427762  3.144780 121  
[3] {beef}                            => {root vegetables}  0.01738688 0.3313953  3.040367 171  
[4] {citrus fruit,root vegetables}    => {other vegetables} 0.01037112 0.5862069  3.029608 102  
[5] {root vegetables,tropical fruit}  => {other vegetables} 0.01230300 0.5845411  3.020999 121  

inspect(tail(BasketRules_Lift, n=5))
    lhs                rhs          support    confidence lift      count
[1] {sausage}       => {whole milk} 0.02989324 0.3181818  1.2452520 294  
[2] {bottled water} => {whole milk} 0.03436706 0.3109476  1.2169396 338  
[3] {rolls/buns}    => {whole milk} 0.05663447 0.3079049  1.2050318 557  
[4] {salty snack}   => {whole milk} 0.01118454 0.2956989  1.1572618 110  
[5] {bottled beer}  => {whole milk} 0.02043721 0.2537879  0.9932367 201

Şimdi ise oluşan kuralları visualise(görselleştirme) ederek somutlaştıralım. Bunun için Grap.R dosyasının içerisinde yazmış olduğum kod;

source('Graph.R')
cat("Export Graph for BasketRules")

jpeg(filename = "./Output/3-BasketRules_ScatterPlot.jpg", width=1024, height=768, units="px")
plot(BasketRules, main="Scatter Plot for Association TelcoRules")
dev.off()

jpeg(filename = "./Output/4-BasketRules_GroupedPlot.jpg", width=1024, height=768, units="px")
plot(BasketRules, method="grouped", main="Grouped Matrix for Groceries")
dev.off()

jpeg(filename = "./Output/5-BasketRules_GraphPlot.jpg", width=1024, height=768, units="px")
plot(BasketRules[1:25], method="graph", control=list(alpha="1"), main="Graph Method for Groceries")
dev.off()

jpeg(filename = "./Output/6-BasketRules_Paracoord.jpg", width=1024, height=768, units="px")
plot(BasketRules, method ="paracoord", control = list(reorder = TRUE), main="GraphName6")
dev.off()

Şimdi de Interactive Graph(İnteraktif Grafik) ile daha kullanışlı ve akıllı görselleştirmeler kullanalım. Bunun için InteractiveGraph.R dosyasının içerisinde yazmış olduğum kod;

# Export Interactive Graph

source('InteractiveGraph.R')
cat("Interactive Graph")

if (file.exists("./Output/7-InteractiveGraph/") == FALSE){
  dir.create(file.path("./Output/7-InteractiveGraph/"))
}

ip1 <- plotly_arules(BasketRules, measure = c("support", "confidence"), shading = "lift")
saveWidget(ip1, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-1.html"), selfcontained = FALSE)

ip2 <- plotly_arules(BasketRules, measure = c("support", "lift"), shading = "confidence")
saveWidget(ip2, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-2.html"), selfcontained = FALSE)

ip3 <- plotly_arules(BasketRules, method = "two-key plot", measure = c("support", "confidence"))
saveWidget(ip3, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-3.html"), selfcontained = FALSE)

ip4 <- plotly_arules(BasketRules, method = "two-key plot", measure = c("support", "lift"))
saveWidget(ip4, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-4.html"), selfcontained = FALSE)

ip5 <- plotly_arules(BasketRules, method = "matrix", measure = c("support", "lift"), shading = "confidence")
saveWidget(ip5, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-5.html"), selfcontained = FALSE)

ip6 <- plotly_arules(BasketRules, method = "matrix", measure = c("support", "confidence"), shading = "lift")
saveWidget(ip6, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-6.html"), selfcontained = FALSE)

Bazı interaktif görselleri aşağıda paylaşıyorum. İnteraktif görselleri html dosyası olarak kaydedip. Herhangi bir tarayıcı ile açıp ilgili kuralların üzerine gelip bilgiler elde edebilirsiniz. Ne demek istediğimi Association Rules Analysis with R Github Repositories’ini incelediğinizde daha iyi anlayacaksınız.

Not: Interactive Graph çıktılarını Output dizini altında 7-InteractiveGraph dizinini içerisinde bulabilirsiniz.

 

Son olarak da Other Graph Format’ları olan (edgelist, graphml, gml, dot) kullanalım. Bunun için OtherGraph.R dosyasının içerisinde yazmış olduğum kod;

# Export Other Graph Format: edgelist, graphml, gml, dot

source('OtherGraph.R')
cat("Export Other Format: edgelist, graphml, gml, dot")

Export Other Format: edgelist, graphml, gml, dot

if (file.exists("./Output/8-OtherGraphFormat") == FALSE){
  dir.create(file.path("./Output/8-OtherGraphFormat"))
}

Tip <- c("items", "itemsets")
Format <- c("edgelist", "graphml", "dimacs", "gml", "dot")

for (t in Tip){
  for (f in Format){
    if (f == "dimacs"){
      next
    }
    saveAsGraph(BasketRules, paste0("./Output/8-OtherGraphFormat/BasketRules_", t, ".", f), type=t, format=f)
  }
}

Not: Other Graph Formatları Items ve Itemset olarak Output dizini altında 8-OtherGraphFormat dizininde bulabilirsiniz.

# Export The Rules

BasketRules_DF <- as(BasketRules, "data.frame")

write.csv2(BasketRules_DF, file = "./Output/BasketRules.csv", row.names = FALSE)

Oluşan 170 adet kuralı Output dizini altına BasketRules.csv dosyasına kaydediyoruz.

# Export Interactive HTML The Rules

html_page <- inspectDT(BasketRules)
saveWidget(html_page, file = paste0(Path, "/Output/", "BasketRules.html"), selfcontained = FALSE)

# inspectDT(BasketRules)
# browseURL(paste0(Path, "/Output/", "BasketRules.html"), browser = getOption("browser"), encodeIfNeeded = FALSE)

Oluşan 170 adet kuralı Interactive bir formatta Output dizini altına BasketRules.html dosyasına kaydediyoruz. Çıktısı:

Modeli ve diğer tüm değişkenleri .RData olarak Output dizini içerisinde BasketRules.RData dosyasına kaydediyoruz.

save.image(file = "./Output/BasketRules.RData")

Yukarıdaki tüm kodlar;

# Association Rules Analysis with R Project ----------------------------

# Clean the System & Console Variable --------------------
rm(list = ls())
cat("\014")
options(warn = -1)

# Sys_Date <- format(Sys.Date(), "%Y%m%d")
# Sys_Time <- format(Sys.time(), "%H:%M:%S")

# Install & Use Library ---------------------------------------------------
if (require('arules') == FALSE){
  install.packages('arules')
  library(arules)
}

if (require('arulesViz') == FALSE){
  install.packages('arulesViz')
  library(arulesViz)
}

if (require('htmlwidgets') == FALSE){
  install.packages('htmlwidgets')
  library(htmlwidgets)
}

# Set the System Path & Variable
if (require('here') == FALSE){
  install.packages('here')
  library(here)
}

Path <- here()

setwd(Path)

# Load Dataset

Groceries = read.transactions(file = "./Input/groceries.csv",
                              format = c("basket"),
                              sep= ",",
                              cols = NULL,
                              rm.duplicates = TRUE,
                              encoding = "UTF-8")

# Second Way Data Load 
# data(Groceries)

# Show Type and Class of Dataset
typeof(Groceries)
class(Groceries)

# Dimension of Dataset
dim(Groceries)
length(Groceries)

# Structure of Dataset
str(Groceries)

# Show The Dataset
# inspect(Groceries)
inspect(Groceries[1:10])

# Show the Items
ItemSetList <- Groceries@itemInfo
ItemSetList

# First 5 items of DataSet 
head(ItemSetList, n=10)
head(Groceries@itemInfo, n=10)

# End 5 ites of DataSet 
tail(ItemSetList, n=10)
tail(Groceries@itemInfo, n=10)

# Summary of Dataset
summary(Groceries)
s <- summary(Groceries)

# Most Frequent Items
s@itemSummary

# Length Distribution Size
s@lengths

# frequency/support
itemFrequency(Groceries, type = "relative")

# frequency/support (Items Count)
itemFrequency(Groceries, type = "absolute")

# Item Frequency Plot

jpeg(filename = "./Output/1-Absolute_ItemFrequencyPlot.jpg", width=1024, height=768, units="px")
itemFrequencyPlot(Groceries, topN=25, type="absolute",
                  main='Absolute Item Frequency Plot (Top 25)', 
                  xlab="Items", 
                  ylab="Item Frequency (Absolute)")
dev.off()

jpeg(filename = "./Output/2-Relative_ItemFrequencyPlot.jpg", width=1024, height=768, units="px")
itemFrequencyPlot(Groceries, topN=25, type="relative",
                  main='Relative Item Frequency Plot (Top 25)', 
                  xlab="Items", 
                  ylab="Item Frequency (relative)")
dev.off()


# Both Association Rules
options(digits = 2)
min_supp <- 0.01
min_conf <- 0.25
min_lenght <- 2
BasketRules <- apriori(Groceries, parameter = list(supp = min_supp, conf = min_conf,
                                                   minlen = min_lenght, target = "rules"))

summary(BasketRules)

#inspect(BasketRules)
inspect(BasketRules[1:25])

inspect(head(BasketRules, n=5))
inspect(tail(BasketRules, n=5))

cat(paste0("\n","Association Rules  Analysis", "\n",
           "= = = = = = = = = = = = = =", "\n",
           "Min. Supp. & Conf. Values:", "\t", min_supp, " & ", min_conf, "\n",
           "X ==> Y Rules Count:", "\t\t", length(BasketRules)))

BasketRules_Conf <- sort(BasketRules, by="confidence", decreasing=TRUE)
BasketRules_Supp <- sort(BasketRules, by="support", decreasing=TRUE)
BasketRules_Lift <- sort(BasketRules, by="lift", decreasing=TRUE)

inspect(head(BasketRules_Conf, n=5))
inspect(tail(BasketRules_Conf, n=5))

inspect(head(BasketRules_Supp, n=5))
inspect(tail(BasketRules_Supp, n=5))

inspect(head(BasketRules_Lift, n=5))
inspect(tail(BasketRules_Lift, n=5))

# Export Graph
source('Graph.R')

cat("Export Graph for BasketRules")

jpeg(filename = "./Output/3-BasketRules_ScatterPlot.jpg", width=1024, height=768, units="px")
plot(BasketRules, main="Scatter Plot for Association TelcoRules")
dev.off()

jpeg(filename = "./Output/4-BasketRules_GroupedPlot.jpg", width=1024, height=768, units="px")
plot(BasketRules, method="grouped", main="Grouped Matrix for Groceries")
dev.off()

jpeg(filename = "./Output/5-BasketRules_GraphPlot.jpg", width=1024, height=768, units="px")
plot(BasketRules[1:25], method="graph", control=list(alpha="1"), main="Graph Method for Groceries")
dev.off()

jpeg(filename = "./Output/6-BasketRules_Paracoord.jpg", width=1024, height=768, units="px")
plot(BasketRules, method ="paracoord", control = list(reorder = TRUE), main="GraphName6")
dev.off()


# Export Interactive Graph
source('InteractiveGraph.R')

cat("Interactive Graph")

if (file.exists("./Output/7-InteractiveGraph/") == FALSE){
  dir.create(file.path("./Output/7-InteractiveGraph/"))
}

ip1 <- plotly_arules(BasketRules, measure = c("support", "confidence"), shading = "lift")
saveWidget(ip1, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-1.html"), selfcontained = FALSE)

ip2 <- plotly_arules(BasketRules, measure = c("support", "lift"), shading = "confidence")
saveWidget(ip2, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-2.html"), selfcontained = FALSE)

ip3 <- plotly_arules(BasketRules, method = "two-key plot", measure = c("support", "confidence"))
saveWidget(ip3, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-3.html"), selfcontained = FALSE)

ip4 <- plotly_arules(BasketRules, method = "two-key plot", measure = c("support", "lift"))
saveWidget(ip4, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-4.html"), selfcontained = FALSE)

ip5 <- plotly_arules(BasketRules, method = "matrix", measure = c("support", "lift"), shading = "confidence")
saveWidget(ip5, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-5.html"), selfcontained = FALSE)

ip6 <- plotly_arules(BasketRules, method = "matrix", measure = c("support", "confidence"), shading = "lift")
saveWidget(ip6, file = paste0(Path, "/Output/7-InteractiveGraph/", "InteractiveGraph-6.html"), selfcontained = FALSE)

# Export Other Graph Format: edgelist, graphml, gml, dot
source('OtherGraph.R')

cat("Export Other Format: edgelist, graphml, gml, dot")

if (file.exists("./Output/8-OtherGraphFormat") == FALSE){
  dir.create(file.path("./Output/8-OtherGraphFormat"))
}

Tip <- c("items", "itemsets")
Format <- c("edgelist", "graphml", "dimacs", "gml", "dot")

for (t in Tip){
  for (f in Format){
    if (f == "dimacs"){
      next
    }
    saveAsGraph(BasketRules, paste0("./Output/8-OtherGraphFormat/BasketRules_", t, ".", f), type=t, format=f)
  }
}

# Export The Rules
BasketRules_DF <- as(BasketRules, "data.frame")
write.csv2(BasketRules_DF, file = "./Output/BasketRules.csv", row.names = FALSE)

# Export Interactive HTML The Rules
html_page <- inspectDT(BasketRules)
saveWidget(html_page, file = paste0(Path, "/Output/", "BasketRules.html"), selfcontained = FALSE)
# inspectDT(BasketRules)
# browseURL(paste0(Path, "/Output/", "BasketRules.html"), browser = getOption("browser"), encodeIfNeeded = FALSE)

save.image(file = "./Output/BasketRules.RData")

Son Olarak; Github üzerinde yer alan repository’e buradan (kod, input, output ve diğer dosyalara) ulaşabilirsiniz. Yazıyı beğenerek ve paylaşarak daha fazla kişiye ulaşmasına yardımcı olabilirsiniz. Görüş ve önerileriniz için uslumetin@gmail.com’dan bana ulaşabilirsiniz.

Yeniden görüşmek üzere, Selamlar 🙂

Yazar Hakkında
Toplam 7 yazı
Metin USLU
Metin USLU
Veri Bilimci @ LC Waikiki#Programmer #Statistician #ComEngStudent
Yorumlar (1 Yorum)
çaylak
çaylak Yanıtla
- 14:34

merhabalar, arulesViz paketi yeni versiyonu sanırım sıkıntı çıkarıyor, hata veriyor plotly_arules fonksiyonu paketten çıkarılmış gibi.

Bir yanıt yazın

E-posta adresiniz yayınlanmayacak. Gerekli alanlar * ile işaretlenmişlerdir

×

Bir Şeyler Ara