Probabilistic Graphical Models

Marta Vomlelova
9.1.2019

Libraries

require(gRain)
require(Rgraphviz)
require(RBGL) #grafy, ne pravdep., maxClique, ansestralSet
require(xtable)
require(gRim)  #mmod, dmod, cmod
#setwd('C:/Users/marta/Desktop/pgm/2016')
#install.packages('ggm','deal','SIN')
require(bnlearn)
require(gRim)
library(xtable)
library(gRbase)
require(MASS)
library(lars)
require('entropy')
library(pcalg)
library(glasso)
library(SIN)

A soubory .net:

loadHuginNet(“monty_hall.net”)

loadHuginNet(“two_coins_2.net”)

loadHuginNet(“two_coins_1.net”)

loadHuginNet(“preg4.net”)

Libraries

Tables and Bayesian network

yn <- c("yes","no") 
a <- cptable(~asia, values=c(1,99),levels=yn) 
t.a <- cptable(~tub+asia, values=c(5,95,1,99),levels=yn) 
s <- cptable(~smoke, values=c(5,5), levels=yn) 
l.s <- cptable(~lung+smoke, values=c(1,9,1,99), levels=yn) 
b.s <- cptable(~bronc+smoke, values=c(6,4,3,7), levels=yn) 
x.e <- cptable(~xray+either, values=c(98,2,5,95), levels=yn) 
d.be <- cptable(~dysp+bronc+either, values=c(9,1,7,3,8,2,1,9), levels=yn) 

#e.lt <- cptable(~either+lung+tub,values=c(1,0,1,0,1,0,0,1),levels=yn) 
e.lt<- ortable(~either+lung+tub,levels=yn)

plist <- compileCPT(list(a, t.a, s, l.s, b.s, e.lt, x.e, d.be)) 

chestdag <- grain(plist) 
chestdag<-compile(chestdag)

gRbase DAG

par(mfrow=c(1,4)) #4 obrazky dohromady
plot(chestdag,type='dag',main='DAG')
plot(moralize(chestdag$dag),main='Moral')
plot(chestdag$ug,main='Triangulate')
plot(chestdag,type='jt',main='Join Tree')

plot of chunk unnamed-chunk-3

Running Intersection Property

chestdag$rip
cliques
  1 : asia tub 
  2 : either lung tub 
  3 : either lung bronc 
  4 : smoke lung bronc 
  5 : either dysp bronc 
  6 : either xray 
separators
  1 :  
  2 : tub 
  3 : either lung 
  4 : lung bronc 
  5 : either bronc 
  6 : either 
parents
  1 : 0 
  2 : 1 
  3 : 2 
  4 : 3 
  5 : 3 
  6 : 5 

Parents and children, d-separate

gRbase::parents('either',chestdag$dag)
[1] "tub"  "lung"
gRbase::children('tub',chestdag$dag)
[1] "either"
gRbase::ancestralSet('either',chestdag$dag)
[1] "asia"   "tub"    "smoke"  "lung"   "either"
str(edgeList(chestdag$dag))
List of 8
 $ : chr [1:2] "asia" "tub"
 $ : chr [1:2] "tub" "either"
 $ : chr [1:2] "smoke" "lung"
 $ : chr [1:2] "smoke" "bronc"
 $ : chr [1:2] "lung" "either"
 $ : chr [1:2] "bronc" "dysp"
 $ : chr [1:2] "either" "xray"
 $ : chr [1:2] "either" "dysp"

d-separate

d.separates<-function(a,b,c,dag){
  separates(a,b,c,
            gRbase::moralize(ancestralGraph(union(union(a,b),c), dag)))
}
d.separates("smoke","xray",c("lung"),chestdag$dag)
[1] TRUE
d.separates("smoke","xray",c("lung","dysp"),chestdag$dag)
[1] FALSE

Moralize

mor.chest=moralize(chestdag$dag)
closure('bronc',mor.chest)
[1] "bronc"  "smoke"  "either" "dysp"  
is.complete(mor.chest,c('smoke','lung','tub'))
[1] FALSE
is.complete(mor.chest,c('smoke','lung'))
[1] TRUE
str(getCliques(mor.chest))
List of 6
 $ : chr [1:3] "either" "tub" "lung"
 $ : chr [1:3] "either" "bronc" "dysp"
 $ : chr [1:2] "either" "xray"
 $ : chr [1:2] "smoke" "lung"
 $ : chr [1:2] "smoke" "bronc"
 $ : chr [1:2] "asia" "tub"

Simplicial nodes

is.simplicial('lung',chestdag$dag)
[1] TRUE
simplicialNodes(mor.chest)
[1] "asia" "xray" "dysp"
plot(removeNode('asia',mor.chest))

plot of chunk unnamed-chunk-9

rem=removeNode('dysp',removeNode('xray',removeNode('tub',removeNode('asia',mor.chest))))
simplicialNodes(rem)
character(0)
plot(rem)

plot of chunk unnamed-chunk-10

Triangulated = Chordal

is.triangulated(mor.chest)
[1] FALSE
tg=triangulate(mor.chest)
is.triangulated(tg)
[1] TRUE
str(getCliques(mor.chest))
List of 6
 $ : chr [1:3] "either" "tub" "lung"
 $ : chr [1:3] "either" "bronc" "dysp"
 $ : chr [1:2] "either" "xray"
 $ : chr [1:2] "smoke" "lung"
 $ : chr [1:2] "smoke" "bronc"
 $ : chr [1:2] "asia" "tub"
plot(tg)

plot of chunk unnamed-chunk-12

Forced edges

tg2=removeEdge('bronc','either',mor.chest)
tg2=addEdge('smoke','either',tg2)
tg2=addEdge('smoke','dysp',tg2)
is.triangulated(tg2)
[1] TRUE
#str(getCliques(tg2))
plot(tg2)

plot of chunk unnamed-chunk-13

is.simplicial('smoke',tg2)
[1] FALSE
rip(tg2)
cliques
  1 : asia tub 
  2 : either tub lung 
  3 : either smoke lung 
  4 : either smoke dysp 
  5 : bronc smoke dysp 
  6 : either xray 
separators
  1 :  
  2 : tub 
  3 : either lung 
  4 : either smoke 
  5 : smoke dysp 
  6 : either 
parents
  1 : 0 
  2 : 1 
  3 : 2 
  4 : 3 
  5 : 4 
  6 : 4 
simplicialNodes(tg)
[1] "asia"  "smoke" "xray"  "dysp" 
simplicialNodes(tg2)
[1] "asia"  "bronc" "xray" 

Forced clique

m3=compile(grain(plist),root=c('lung','bronc','tub'), propagate=TRUE)
plot(chestdag$ug)

plot of chunk unnamed-chunk-15

plot(m3)

plot of chunk unnamed-chunk-16

m3$rip
cliques
  1 : asia tub 
  2 : bronc lung tub either 
  3 : bronc lung smoke 
  4 : bronc dysp either 
  5 : xray either 
separators
  1 :  
  2 : tub 
  3 : bronc lung 
  4 : bronc either 
  5 : either 
parents
  1 : 0 
  2 : 1 
  3 : 2 
  4 : 2 
  5 : 4 

Ekvipot

chestdag=propagate(chestdag)
chestdag$equipot
[[1]]
     asia
tub      yes     no
  yes 0.0005 0.0099
  no  0.0095 0.9801

[[2]]
, , lung = yes

     either
tub        yes no
  yes 0.000572  0
  no  0.054428  0

, , lung = no

     either
tub        yes       no
  yes 0.009828 0.000000
  no  0.000000 0.935172


[[3]]
, , bronc = yes

      lung
either    yes        no
   yes 0.0315 0.0043524
   no  0.0000 0.4141476

, , bronc = no

      lung
either    yes        no
   yes 0.0235 0.0054756
   no  0.0000 0.5210244


[[4]]
, , smoke = yes

     bronc
lung   yes   no
  yes 0.03 0.02
  no  0.27 0.18

, , smoke = no

     bronc
lung     yes     no
  yes 0.0015 0.0035
  no  0.1485 0.3465


[[5]]
, , dysp = yes

      bronc
either        yes         no
   yes 0.03226716 0.02028292
   no  0.33131808 0.05210244

, , dysp = no

      bronc
either        yes         no
   yes 0.00358524 0.00869268
   no  0.08282952 0.46892196


[[6]]
      xray
either        yes         no
   yes 0.06353144 0.00129656
   no  0.04675860 0.88841340

attr(,"pEvidence")
[1] 1

Usefull functions

chestdag$cptlist
chestdag$evidence
chestdag$isPropagated
chestdag$temppot

separates('tub','lung','smoke',mor.chest)

#dal uz jen seznam pro pripad 'mohlo by se hodit'
edges(tg)
summary(tg)
saveHuginNet(chestdag, "test.net")
ttt <- loadHuginNet("monty_hall.net")
ttt$universe

chestdag$universe$nodes
chestdag$cptlist$either
chestdag$isCompiled

str(maxClique(mor.chest))
str(getCliques(mor.chest))

names(chestdag)
getCliques(moralize(chestdag$dag))

Soft evidence

Soft evidence

vv=querygrain(chestdag,nodes=c('lung','smoke','xray'),
              evidence=list("xray"=c(.9, .1),"asia"=c(1,0)),type='marginal')
vv
$lung
lung
      yes        no 
0.2250155 0.7749845 

$smoke
smoke
      yes        no 
0.5735998 0.4264002 

Hard evidence

querygrain(chestdag,nodes=c('lung','smoke','xray'),
           evidence=list("xray"=c(1, 0),"asia"=c(1,0)),type='marginal')
$lung
lung
      yes        no 
0.3714872 0.6285128 

$smoke
smoke
      yes        no 
0.6370074 0.3629926 

Only Asia

vv=querygrain(chestdag,nodes=c('lung','smoke','xray'),
              evidence=list("asia"=c(1,0)),type='marginal')
vv
$lung
lung
  yes    no 
0.055 0.945 

$smoke
smoke
yes  no 
0.5 0.5 

$xray
xray
      yes        no 
0.1450925 0.8549075 

xtable, table

xtable(as.matrix(querygrain(chestdag,nodes=c('lung'),type='marginal')$lung))
% latex table generated in R 3.4.1 by xtable 1.8-3 package
% Wed Jan 09 16:54:45 2019
\begin{table}[ht]
\centering
\begin{tabular}{rr}
  \hline
 & x \\ 
  \hline
yes & 0.06 \\ 
  no & 0.95 \\ 
   \hline
\end{tabular}
\end{table}
as(chestdag,'matrix')
             [,1]  
universe     List,3
data         NULL  
dag          ?     
cptlist      List,8
isCompiled   TRUE  
isPropagated TRUE  
evidence     NULL  
control      List,1
details      0     
rip          List,8
ug           ?     
equipot      List,6
temppot      List,6
origpot      List,6
details      0     

Simulace

stats::simulate(chestdag,nsim=5)
  asia tub smoke lung bronc either xray dysp
1   no  no    no   no    no     no   no   no
2   no  no   yes   no    no     no   no   no
3   no  no   yes   no    no     no   no   no
4   no  no   yes   no   yes     no   no   no
5   no  no    no   no   yes     no   no  yes
xtabs(~lung+bronc,data=simulate(chestdag,nsim=nnn<-10000))/nnn
     bronc
lung     yes     no
  yes 0.0302 0.0258
  no  0.4206 0.5234
my.sim=function(f.bnet,f.nodes,f.x){
  pravda=querygrain(f.bnet,nodes=f.nodes,type='joint')
  sim.orig=simulate(f.bnet,n=f.x)
  novy=xtabs(paste0('~',paste0(f.nodes,collapse='+'))
                    ,sim.orig)#from case list
  return(KL.empirical(novy,pravda,unit='log2'))
}
#tady zaciname
setwd('C:/Users/marta/Desktop/pgm/2016')

bnet <- loadHuginNet("two_coins_2.net")
bnet<-propagate(compile(bnet))
plot(bnet$dag)

plot of chunk unnamed-chunk-24

Simulated data

set.seed(1)
sim.orig=simulate(bnet,n=1000)

#case list
head(sim.orig)
   Coin SecondFlip FirstFlip
1 penny       tail      tail
2 penny       tail      tail
3  dime       head      head
4 penny       head      tail
5 penny       tail      head
6  dime       tail      tail
#table
xtabs(~Coin+FirstFlip,sim.orig)#from case list
       FirstFlip
Coin    head tail
  penny  103  412
  dime   334  151
(tmp.tab<-xtabs(~Coin+FirstFlip,sim.orig))/apply(tmp.tab,1,sum)
       FirstFlip
Coin         head      tail
  penny 0.2000000 0.8000000
  dime  0.6886598 0.3113402
pravda=querygrain(bnet,nodes=c('Coin','FirstFlip'),type='joint')
novy=xtabs(~Coin+FirstFlip,sim.orig)
KL.empirical(novy,pravda,unit='log2')
[1] 0.0008620495

Number of samples

#aggregated case list
ftable(sim.orig)#from raw case list
                 FirstFlip head tail
Coin  SecondFlip                    
penny head                   24   86
      tail                   79  326
dime  head                  238  109
      tail                   96   42
#data frame from aggregated case list
as.data.frame(  ftable(sim.orig)  )
   Coin SecondFlip FirstFlip Freq
1 penny       head      head   24
2  dime       head      head  238
3 penny       tail      head   79
4  dime       tail      head   96
5 penny       head      tail   86
6  dime       head      tail  109
7 penny       tail      tail  326
8  dime       tail      tail   42
my.sim(bnet,c('Coin','FirstFlip'),10)
[1] 0.07427909
n.samples=c(10,15,30,50,100,500,1000,5000)
n.samples=c(rep(10,10),rep(20,10),rep(50,10))

kl=sapply(n.samples
          ,FUN=function(x)my.sim(bnet,c('Coin','FirstFlip'),x))
boxplot(kl~n.samples,xlab='Number of Samples',ylab='KL-divergence',main='KL-divergence wrt. # Samples')

plot of chunk unnamed-chunk-28

Learning other (wrong) model

bnet2=gRain::loadHuginNet("..//two_coins_1.net")
sim2=simulate(bnet2,n=10000)
novy.dag<-dag(~TwiceAHead,~Penny:TwiceAHead,~Dime:TwiceAHead)
novy.dag<-bnet2$dag
md=grain(novy.dag,data=sim2,smooth=0)
plot(novy.dag)

plot of chunk unnamed-chunk-29

round(ftable(querygrain(md,md$universe$nodes,type='joint')),2)#*1000
           TwiceAHead  yes   no
Penny Dime                     
head  head            0.15 0.00
      tail            0.00 0.06
tail  head            0.00 0.56
      tail            0.00 0.23
md=grain(novy.dag,data=sim2,smooth=0.001)

querygrain(md,md$universe$nodes,type='conditional')#*1000
, , Penny = head

      TwiceAHead
Dime         yes           no
  head 0.9999993 1.794999e-07
  tail 0.4975509 2.071999e-01

, , Penny = tail

      TwiceAHead
Dime            yes        no
  head 6.839922e-07 0.9999998
  tail 5.024491e-01 0.7928001
ftable(querygrain(md,md$universe$nodes,type='conditional'))#*1000
                Penny         head         tail
Dime TwiceAHead                                
head yes              9.999993e-01 6.839922e-07
     no               1.794999e-07 9.999998e-01
tail yes              4.975509e-01 5.024491e-01
     no               2.071999e-01 7.928001e-01
md=grain(novy.dag,data=sim2,smooth=0.001)

KL divergence

bnet <- loadHuginNet("..//two_coins_2.net")
bnet<-propagate(compile(bnet))
plot(bnet$dag)

plot of chunk unnamed-chunk-31

sim.orig=simulate(bnet,n=10000)
novy.dag<-dag(~Coin,~Coin:FirstFlip,~Coin:SecondFlip)
md=grain(novy.dag,data=sim.orig,smooth=0.2)
plot(novy.dag)

plot of chunk unnamed-chunk-31

kl=sapply(c(10,30,50,100,500,1000,5000),FUN=function(x){
  sim.orig=simulate(bnet,n=x)
  md=gRain::grain(novy.dag,data=sim.orig,smooth=0.002)
  novy=gRain::querygrain(md,nodes=c('SecondFlip','FirstFlip'),type='joint')
  return(KL.empirical(novy,pravda,unit='log2'))
})
plot(kl,ylim=c(0,0.35))

plot of chunk unnamed-chunk-32

Task

#ukol - uceni
preg.net <- loadHuginNet("preg4.net")
plot(preg.net)

plot of chunk unnamed-chunk-33

#simulujte data, naucte sit bez Ho uzlu, zkuste popsat, jestli a jak se 
#puvodni a nova sit lisi, kdyz v puvodni siti Ho nikdy nepozorujeme.

Usefull functions

# DALE UZ JE ARCHIV, NEMELO BY BYT POTREBA
#ukazky prevodu
#table
xtabs(~Coin+FirstFlip,sim.orig)#from case list
       FirstFlip
Coin    head tail
  penny  994 4043
  dime  3456 1507
as.table(ftable(sim.orig))#from aggregated case list
, , FirstFlip = head

       SecondFlip
Coin    head tail
  penny  175  819
  dime  2437 1019

, , FirstFlip = tail

       SecondFlip
Coin    head tail
  penny  826 3217
  dime  1070  437
xtabs(Freq~.,ftable(sim.orig))
, , FirstFlip = head

       SecondFlip
Coin    head tail
  penny  175  819
  dime  2437 1019

, , FirstFlip = tail

       SecondFlip
Coin    head tail
  penny  826 3217
  dime  1070  437
xtabs(Freq~Coin+FirstFlip,ftable(sim.orig))
       FirstFlip
Coin    head tail
  penny  994 4043
  dime  3456 1507
#aggregated case list
ftable(sim.orig)#from raw case list
                 FirstFlip head tail
Coin  SecondFlip                    
penny head                  175  826
      tail                  819 3217
dime  head                 2437 1070
      tail                 1019  437
ftable(xtabs(~.,sim.orig))#from table
                 FirstFlip head tail
Coin  SecondFlip                    
penny head                  175  826
      tail                  819 3217
dime  head                 2437 1070
      tail                 1019  437

dataframe

#aggregated case list from table
#as.data.frame(as.table(  ftable(sim.orig)  ))
as.data.frame(as.table(  ftable(sim.orig)  ))
   Coin SecondFlip FirstFlip Freq
1 penny       head      head  175
2  dime       head      head 2437
3 penny       tail      head  819
4  dime       tail      head 1019
5 penny       head      tail  826
6  dime       head      tail 1070
7 penny       tail      tail 3217
8  dime       tail      tail  437

Carcass

#tuk v porazenych prasatec, tloustka masa mezi zebry; maso/celkova hmotnost s tukem,?kostmi a slachami
data(carcass)
head(carcass)
  Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
1    17     51    12     51    12     61 56.52475
2    17     49    15     48    15     54 57.57958
3    14     38    11     34    11     40 55.88994
4    17     58    12     58    11     58 61.81719
5    14     51    12     48    13     54 62.95964
6    20     40    14     40    14     45 54.57870
ellipse <- function(s,t){
  u<-c(s,t)-center; 
  e <- (-1) * (u %*% sigma.inv %*% u) / 2; 
  exp(e)/(2 * pi * sqrt(det(sigma.inv)))}

Carcass - marginal Meat13, LeanMeat

 #saturovany plny model
Y1=6
Y2=7
n=60
x <- (0:(n-1))*2 - 50
y <- (0:(n-1))*2 - 50

plot(carcass[,c(Y1,Y2)], xlab=names(carcass)[Y1], ylab=names(carcass)[Y2], col = "lightgray",pch='+')
center <- apply(carcass[,c(Y1,Y2)], 2, mean)   ## sample mean vector
sigma <- cov(carcass[,c(Y1,Y2)])               ## sample variance-covariance matrix
sigma.inv = solve(sigma, matrix(c(1,0,0,1),2,2))
z <- mapply(ellipse, as.vector(rep(x,n)), as.vector(outer(rep(0,n), y, `+`)))
#round(log(matrix(z,n,n)))
contour(x,y,matrix(z,n,n), levels=(0:50), col = rainbow(50), add=TRUE, lwd = 1)

plot of chunk unnamed-chunk-38

Carcass - means

round( data.frame(apply(carcass, 2, mean)),2)   ## sample mean vector
         apply.carcass..2..mean.
Fat11                      16.49
Meat11                     51.97
Fat12                      13.96
Meat12                     52.00
Fat13                      12.95
Meat13                     55.69
LeanMeat                   59.38
biv <- kde2d(carcass[,c(Y1)],carcass[,c(Y2)], n = 50 )
persp(biv, phi = 45, theta = 30, shade = .1, border = NA,xlab=names(carcass)[Y1], ylab=names(carcass)[Y2],zlab='density')

plot of chunk unnamed-chunk-40

Covariance, Concentration

S.carc=cov.wt(carcass, method='ML')$cov
round(S.carc,2)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11    11.34   0.74  8.42   2.06  7.66  -0.76    -9.08
Meat11    0.74  32.97  0.67  35.94  2.01  31.97     5.33
Fat12     8.42   0.67  8.91   0.31  6.84  -0.60    -7.95
Meat12    2.06  35.94  0.31  51.79  2.18  41.47     6.03
Fat13     7.66   2.01  6.84   2.18  7.62   0.38    -6.93
Meat13   -0.76  31.97 -0.60  41.47  0.38  41.44     7.23
LeanMeat -9.08   5.33 -7.95   6.03 -6.93   7.23    12.90
K.carc=solve(S.carc)
round(K.carc,2)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11     0.44   0.03 -0.20  -0.07 -0.16   0.04     0.10
Meat11    0.03   0.16 -0.03  -0.06 -0.06  -0.06    -0.03
Fat12    -0.20  -0.03  0.54   0.06 -0.21  -0.05     0.09
Meat12   -0.07  -0.06  0.06   0.14 -0.01  -0.09     0.00
Fat13    -0.16  -0.06 -0.21  -0.01  0.56   0.03     0.07
Meat13    0.04  -0.06 -0.05  -0.09  0.03   0.16    -0.01
LeanMeat  0.10  -0.03  0.09   0.00  0.07  -0.01     0.26

Partial corellation - normalized Concentration

PC.carc=cov2pcor(S.carc)
round(100*PC.carc)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11      100    -11    41     30    32    -16      -29
Meat11     -11    100     9     41    19     35       16
Fat12       41      9   100    -24    38     18      -24
Meat12      30     41   -24    100     2     61        2
Fat13       32     19    38      2   100     -9      -18
Meat13     -16     35    18     61    -9    100        7
LeanMeat   -29     16   -24      2   -18      7      100
round(100*K.carc)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11       44      3   -20     -7   -16      4       10
Meat11       3     16    -3     -6    -6     -6       -3
Fat12      -20     -3    54      6   -21     -5        9
Meat12      -7     -6     6     14    -1     -9        0
Fat13      -16     -6   -21     -1    56      3        7
Meat13       4     -6    -5     -9     3     16       -1
LeanMeat    10     -3     9      0     7     -1       26

Saturated model, Stepwise learning main=AIC, BIC

sat.carc=cmod(~.^., data=carcass)
aic.carc=stepwise(sat.carc)
plot(as(aic.carc,'graphNEL'),'fdp',main='AIC')

plot of chunk unnamed-chunk-44

str(edgeList(as(aic.carc,'graphNEL')))
List of 19
 $ : chr [1:2] "Fat11" "Meat11"
 $ : chr [1:2] "Fat11" "Fat12"
 $ : chr [1:2] "Fat11" "Meat13"
 $ : chr [1:2] "Fat11" "LeanMeat"
 $ : chr [1:2] "Fat11" "Fat13"
 $ : chr [1:2] "Fat11" "Meat12"
 $ : chr [1:2] "Meat11" "Fat12"
 $ : chr [1:2] "Meat11" "Meat13"
 $ : chr [1:2] "Meat11" "LeanMeat"
 $ : chr [1:2] "Meat11" "Fat13"
 $ : chr [1:2] "Meat11" "Meat12"
 $ : chr [1:2] "Fat12" "Meat13"
 $ : chr [1:2] "Fat12" "LeanMeat"
 $ : chr [1:2] "Fat12" "Fat13"
 $ : chr [1:2] "Fat12" "Meat12"
 $ : chr [1:2] "Meat13" "LeanMeat"
 $ : chr [1:2] "Meat13" "Fat13"
 $ : chr [1:2] "Meat13" "Meat12"
 $ : chr [1:2] "LeanMeat" "Fat13"
bic.carc=stepwise(sat.carc,k=log(nrow(carcass)))
plot(as(bic.carc,'graphNEL'),'fdp',main='BIC')

plot of chunk unnamed-chunk-45

str(edgeList(as(bic.carc,'graphNEL')))
List of 17
 $ : chr [1:2] "Fat11" "Meat11"
 $ : chr [1:2] "Fat11" "Fat12"
 $ : chr [1:2] "Fat11" "Meat13"
 $ : chr [1:2] "Fat11" "Meat12"
 $ : chr [1:2] "Fat11" "Fat13"
 $ : chr [1:2] "Fat11" "LeanMeat"
 $ : chr [1:2] "Meat11" "Fat12"
 $ : chr [1:2] "Meat11" "Meat13"
 $ : chr [1:2] "Meat11" "Meat12"
 $ : chr [1:2] "Meat11" "Fat13"
 $ : chr [1:2] "Meat11" "LeanMeat"
 $ : chr [1:2] "Fat12" "Meat13"
 $ : chr [1:2] "Fat12" "Meat12"
 $ : chr [1:2] "Fat12" "Fat13"
 $ : chr [1:2] "Fat12" "LeanMeat"
 $ : chr [1:2] "Meat13" "Meat12"
 $ : chr [1:2] "Fat13" "LeanMeat"

BodyFat data

data(BodyFat)
head(BodyFat)
  Density BodyFat Age Weight Height Neck Chest Abdomen   Hip Thigh Knee
1  1.0708    12.3  23 154.25  67.75 36.2  93.1    85.2  94.5  59.0 37.3
2  1.0853     6.1  22 173.25  72.25 38.5  93.6    83.0  98.7  58.7 37.3
3  1.0414    25.3  22 154.00  66.25 34.0  95.8    87.9  99.2  59.6 38.9
4  1.0751    10.4  26 184.75  72.25 37.4 101.8    86.4 101.2  60.1 37.3
5  1.0340    28.7  24 184.25  71.25 34.4  97.3   100.0 101.9  63.2 42.2
6  1.0502    20.9  24 210.25  74.75 39.0 104.5    94.4 107.8  66.0 42.0
  Ankle Biceps Forearm Wrist
1  21.9   32.0    27.4  17.1
2  23.4   30.5    28.9  18.2
3  24.0   28.8    25.2  16.6
4  22.8   32.4    29.4  18.2
5  24.0   32.2    27.7  17.7
6  25.6   35.7    30.6  18.8
plot(BodyFat$BodyFat,1/BodyFat$Density)

plot of chunk unnamed-chunk-47

BodyFat=BodyFat[-c(31,42,48,76,86,96,159,169,175,182,206),]
plot(BodyFat$BodyFat,1/BodyFat$Density)

plot of chunk unnamed-chunk-47

Covariance, Partial covariance, (Concentration)

BodyFat$Age=sqrt(BodyFat$Age)
BodyFat$Weight=sqrt(BodyFat$Weight)
gRbodyfat=BodyFat[,2:15]

S.body=cov.wt(gRbodyfat, method='ML')$cov
round(S.body)
        BodyFat Age Weight Height Neck Chest Abdomen Hip Thigh Knee Ankle
BodyFat      68   2      5      0   10    48      71  35    22   10     3
Age           2   1      0     -1    0     1       2   0    -1    0     0
Weight        5   0      1      1    2     8      10   7     5    2     1
Height        0  -1      1      7    2     4       5   7     5    3     2
Neck         10   0      2      2    6    15      19  12     8    4     2
Chest        48   1      8      4   15    68      80  47    30   14     7
Abdomen      71   2     10      5   19    80     112  64    40   18     8
Hip          35   0      7      7   12    47      64  49    31   14     7
Thigh        22  -1      5      5    8    30      40  31    25   10     5
Knee         10   0      2      3    4    14      18  14    10    6     2
Ankle         3   0      1      2    2     7       8   7     5    2     2
Biceps       12   0      3      2    5    18      21  15    11    5     2
Forearm       6   0      1      2    3    10      11   8     6    3     2
Wrist         3   0      1      1    2     5       6   4     3    1     1
        Biceps Forearm Wrist
BodyFat     12       6     3
Age          0       0     0
Weight       3       1     1
Height       2       2     1
Neck         5       3     2
Chest       18      10     5
Abdomen     21      11     6
Hip         15       8     4
Thigh       11       6     3
Knee         5       3     1
Ankle        2       2     1
Biceps       9       4     2
Forearm      4       4     1
Wrist        2       1     1
K.body=solve(S.body)
#round(K.body,2)
PC.body=cov2pcor(S.body)
round(PC.body,1)
        BodyFat  Age Weight Height Neck Chest Abdomen  Hip Thigh Knee
BodyFat     1.0  0.1    0.0   -0.1 -0.2  -0.1     0.5 -0.2   0.1  0.0
Age         0.1  1.0   -0.2    0.0  0.1   0.1     0.3 -0.1  -0.3  0.3
Weight      0.0 -0.2    1.0    0.7  0.3   0.5     0.3  0.5   0.2  0.1
Height     -0.1  0.0    0.7    1.0 -0.2  -0.4    -0.3 -0.2  -0.3  0.2
Neck       -0.2  0.1    0.3   -0.2  1.0   0.0     0.1 -0.2   0.1 -0.1
Chest      -0.1  0.1    0.5   -0.4  0.0   1.0     0.3 -0.2  -0.2  0.0
Abdomen     0.5  0.3    0.3   -0.3  0.1   0.3     1.0  0.2   0.0  0.0
Hip        -0.2 -0.1    0.5   -0.2 -0.2  -0.2     0.2  1.0   0.3  0.1
Thigh       0.1 -0.3    0.2   -0.3  0.1  -0.2     0.0  0.3   1.0  0.3
Knee        0.0  0.3    0.1    0.2 -0.1   0.0     0.0  0.1   0.3  1.0
Ankle       0.0 -0.2    0.2   -0.1 -0.2   0.0    -0.1 -0.1   0.1  0.3
Biceps      0.0  0.1    0.2   -0.1  0.0   0.0    -0.1  0.0   0.2  0.0
Forearm     0.1 -0.2    0.2   -0.1  0.1   0.1    -0.2 -0.2   0.0  0.1
Wrist      -0.2  0.4    0.1    0.0  0.2  -0.1     0.1  0.0  -0.1  0.0
        Ankle Biceps Forearm Wrist
BodyFat   0.0    0.0     0.1  -0.2
Age      -0.2    0.1    -0.2   0.4
Weight    0.2    0.2     0.2   0.1
Height   -0.1   -0.1    -0.1   0.0
Neck     -0.2    0.0     0.1   0.2
Chest     0.0    0.0     0.1  -0.1
Abdomen  -0.1   -0.1    -0.2   0.1
Hip      -0.1    0.0    -0.2   0.0
Thigh     0.1    0.2     0.0  -0.1
Knee      0.3    0.0     0.1   0.0
Ankle     1.0   -0.1    -0.1   0.4
Biceps   -0.1    1.0     0.4   0.0
Forearm  -0.1    0.4     1.0   0.3
Wrist     0.4    0.0     0.3   1.0

Model Fit

sat.body=cmod(~.^., data=gRbodyfat)
aic.body=stepwise(sat.body)
plot(as(aic.body,'graphNEL'),'fdp',main='AIC')

plot of chunk unnamed-chunk-50

bic.body=stepwise(sat.body,k=log(nrow(gRbodyfat)))
plot(as(bic.body,'graphNEL'),'fdp',main='BIC')

plot of chunk unnamed-chunk-51

graph::degree(as(bic.body,'graphNEL'))
 Weight   Thigh  Height Abdomen Forearm BodyFat    Neck     Hip     Age 
     12      12      11       9      11      10      10       8      10 
  Chest    Knee   Ankle  Biceps   Wrist 
      5       5      10       4       5 

Direct model specification (cliques, edges), Carcass data

gen.carc=cmod(~Fat11*Fat12*Meat12*Meat13+
                Fat11*Fat12*Fat13*LeanMeat+
                Meat11*Meat12*Meat13+
                Meat11*Fat13*LeanMeat
                ,data=carcass)
gen.carc
Model: A cModel with 7 variables
 -2logL    :       11387.24 mdim :   22 aic :     11431.24 
 ideviance :        2453.99 idf  :   15 bic :     11515.73 
 deviance  :          19.79 df   :    6 
plot(gen.carc,'neato')

plot of chunk unnamed-chunk-52

edge.carc=cmod(edgeList(as(gen.carc,'graphNEL')) ,data=carcass)
edge.carc
Model: A cModel with 7 variables
 -2logL    :       11387.24 mdim :   22 aic :     11431.24 
 ideviance :        2453.99 idf  :   15 bic :     11515.73 
 deviance  :          19.79 df   :    6 

Parameter fitting (Iterative proportional fitting)

Edges: 774 iterations

carcfit1=ggmfit(S.carc, n=nrow(carcass),edgeList(as(gen.carc,'graphNEL')))
carcfit1[c('dev','df','iter')]
$dev
[1] 19.78537

$df
[1] 6

$iter
[1] 774

Cliques: 61 iterations

carcfit2=ggmfit(S.carc, n=nrow(carcass),maxClique(as(gen.carc,'graphNEL'))$maxCliques)
carcfit2[c('dev','df','iter')]
$dev
[1] 19.78537

$df
[1] 6

$iter
[1] 61

Model comparison: Likelihood ration test

comparemodels=function(m1,m2){
  lrt=m2$fitinfo$dev-m1$fitinfo$dev
  dfdiff=m2$fitinfo$dimension[4]-m1$fitinfo$dimension[4]
  names(dfdiff)=NULL
  list('lrt'=lrt,'df'=dfdiff)
}
aic.carc
Model: A cModel with 7 variables
 -2logL    :       11367.70 mdim :   26 aic :     11419.70 
 ideviance :        2473.53 idf  :   19 bic :     11519.56 
 deviance  :           0.25 df   :    2 
bic.carc
Model: A cModel with 7 variables
 -2logL    :       11376.07 mdim :   24 aic :     11424.07 
 ideviance :        2465.16 idf  :   17 bic :     11516.25 
 deviance  :           8.62 df   :    4 
comparemodels(aic.carc,bic.carc)
$lrt
[1] 8.372649

$df
[1] 2

Conditional independence tests

Nonsignificant

ciTest_mvn(list(cov=S.carc,n.obs=nrow(carcass)),
  set=~LeanMeat+Meat13+Meat11+Meat12+Fat11+Fat12+Fat13)
Testing LeanMeat _|_ Meat13 | Meat11 Meat12 Fat11 Fat12 Fat13 
Statistic (DEV):    1.687 df: 1 p-value: 0.1940 method: CHISQ

Significant

ciTest_mvn(list(cov=S.carc,n.obs=nrow(carcass)),
           set=~LeanMeat+Meat11+Meat12+Meat13+Fat11+Fat12+Fat13)
Testing LeanMeat _|_ Meat11 | Meat12 Meat13 Fat11 Fat12 Fat13 
Statistic (DEV):    8.931 df: 1 p-value: 0.0028 method: CHISQ
C.carc=cov2cor(S.carc)
gaussCItest(7,2,c(1,3,4,5,6),list(C=C.carc,n=nrow(carcass)))
[1] 0.003077247

Gaussian graphical model as Linear regression

#regresni koeficienty
-K.carc[7,-7]/K.carc[7,7]
      Fat11      Meat11       Fat12      Meat12       Fat13      Meat13 
-0.37714577  0.12421685 -0.33552335  0.01518311 -0.26893064  0.05413811 
#rozptyl rezidui
1/K.carc[7,7]
[1] 3.794228
r.LeanMeat=residuals(lm(LeanMeat~Meat11+Meat13+Fat11+Fat12+Fat13,
                        data=carcass))
r.Meat12=residuals(lm(Meat12~Meat11+Meat13+Fat11+Fat12+Fat13,
                        data=carcass))
#par(mfcol=c(1,2))
plot(r.Meat12,r.LeanMeat)
abline(lm(r.LeanMeat~r.Meat12),lwd=2)

plot of chunk unnamed-chunk-59

r.LeanMeat3=residuals(lm(LeanMeat~Meat11+Meat12+Fat11+Fat12+Fat13,
                        data=carcass))
r.Meat13=residuals(lm(Meat13~Meat11+Meat12+Fat11+Fat12+Fat13,
                      data=carcass))
plot(r.Meat13,r.LeanMeat3)
abline(lm(r.LeanMeat3~r.Meat13),lwd=2)

plot of chunk unnamed-chunk-60

par(mfcol=c(1,1))

Decomposition of a GGM

#decomposition
K.hat=S.carc
K.hat[]=0
AC=c('Fat11','Fat12','Fat13','Meat11','LeanMeat')
BC=c('Meat11','Meat12','Meat13','Fat11','Fat12')
C=c('Fat11','Fat12','Meat11')
K.hat[AC,AC]=K.hat[AC,AC]+solve(S.carc[AC,AC])
K.hat[BC,BC]=K.hat[BC,BC]+solve(S.carc[BC,BC])
K.hat[C,C]=K.hat[C,C]-solve(S.carc[C,C])
round(100*K.hat,-1)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11       40      0   -20    -10   -20     10       10
Meat11       0     20     0    -10     0    -10      -10
Fat12      -20      0    50     10   -20      0       10
Meat12     -10    -10    10     10     0    -10        0
Fat13      -20      0   -20      0    60      0       10
Meat13      10    -10     0    -10     0     20        0
LeanMeat    10    -10    10      0    10      0       30

Estimated and original sigma

Sigma.hat=solve(K.hat)
round(Sigma.hat,2)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11    11.34   0.74  8.42   2.06  7.66  -0.76    -9.08
Meat11    0.74  32.97  0.67  35.94  2.01  31.97     5.33
Fat12     8.42   0.67  8.91   0.31  6.84  -0.60    -7.95
Meat12    2.06  35.94  0.31  51.79  2.45  41.47     5.41
Fat13     7.66   2.01  6.84   2.45  7.62   0.89    -6.93
Meat13   -0.76  31.97 -0.60  41.47  0.89  41.44     6.43
LeanMeat -9.08   5.33 -7.95   5.41 -6.93   6.43    12.90
round(S.carc,2)
         Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11    11.34   0.74  8.42   2.06  7.66  -0.76    -9.08
Meat11    0.74  32.97  0.67  35.94  2.01  31.97     5.33
Fat12     8.42   0.67  8.91   0.31  6.84  -0.60    -7.95
Meat12    2.06  35.94  0.31  51.79  2.18  41.47     6.03
Fat13     7.66   2.01  6.84   2.18  7.62   0.38    -6.93
Meat13   -0.76  31.97 -0.60  41.47  0.38  41.44     7.23
LeanMeat -9.08   5.33 -7.95   6.03 -6.93   7.23    12.90

Verbose stepwise, Headlong stepwise

#stepwise
test.carc=stepwise(sat.carc,details=1,'test')
STEPWISE:  
 criterion: test 
 direction: backward 
 type     : decomposable 
 search   : all 
 steps    : 1000 
  p.value    0.7089 Edge deleted: Meat12 Fat13
  p.value    0.7414 Edge deleted: LeanMeat Meat12
  p.value    0.0812 Edge deleted: Meat13 Fat13
plot(test.carc,'neato')

plot of chunk unnamed-chunk-63

ind.carc=gRim::cmod(~.^1,data=carcass)
set.seed(123)
forw.carc=stepwise(ind.carc,search='headlong',
                   direction='forward',
                   k=log(nrow(carcass)),details=0)
plot(forw.carc,'neato')

plot of chunk unnamed-chunk-64

forw.carc
Model: A cModel with 7 variables
 -2logL    :       11393.53 mdim :   23 aic :     11439.53 
 ideviance :        2447.70 idf  :   16 bic :     11527.87 
 deviance  :          26.08 df   :    5 

Lasso model fitting

#laso
C.body=cov2cor(S.body)
res.lasso=glasso(C.body,rho=0.1)
AM=res.lasso$wi !=0
diag(AM)=F
g.lasso=as(AM,'graphNEL')
nodes(g.lasso)=names(gRbodyfat)
glasso.body=gRim::cmod(g.lasso,data=gRbodyfat)
plot(glasso.body,'neato')

plot of chunk unnamed-chunk-66

graph::degree(as(glasso.body,'graphNEL'))
 Weight    Knee   Thigh     Hip Abdomen   Ankle  Biceps Forearm  Height 
     11       9      10       9       8       8       8      10       7 
  Wrist    Neck   Chest     Age BodyFat 
      9       8       9       8       6 

Thresholding

#thresholding
threshold=0.1
Z=abs(PC.carc)
Z[Z<threshold]=0
diag(Z)=0
Z[Z>0]=1
g.thresh=as(Z,'graphNEL')
thresh.carc=gRim::cmod(g.thresh,data=carcass)
thresh.carc
Model: A cModel with 7 variables
 -2logL    :       11384.93 mdim :   23 aic :     11430.93 
 ideviance :        2456.30 idf  :   16 bic :     11519.27 
 deviance  :          17.48 df   :    5 
plot(thresh.carc,'neato')

plot of chunk unnamed-chunk-68

SIN - Significant, intermediate, nonsignificant edges

#SIN - hrany urcite pritomne
psin.carc=sinUG(S.carc,n=nrow(carcass))
gsin.carc=as(getgraph(psin.carc,0.3),'graphNEL')
#pdf('sin.pdf')
def.par <- par(no.readonly = TRUE) # save default, for resetting...
w=5
#nf <- layout(matrix(c(1,2),1,2), widths = c(lcm(w),lcm(w)), heights = c(lcm(w),lcm(w)))
#layout.show(nf)
#par(mfcol=c(1,2))
plotUGpvalues(psin.carc)

plot of chunk unnamed-chunk-69

psin.body=sinUG(S.body,n=nrow(gRbodyfat))
plotUGpvalues(psin.body)

plot of chunk unnamed-chunk-70

SIN model

gsin.body=as(getgraph(psin.body,0.3),'graphNEL')
plot(gsin.body,'neato')

plot of chunk unnamed-chunk-71

graph::degree(gsin.body)
BodyFat     Age  Weight  Height    Neck   Chest Abdomen     Hip   Thigh 
      2       5       8       5       2       5       7       6       7 
   Knee   Ankle  Biceps Forearm   Wrist 
      3       4       2       5       5 
sin.body=gRim::cmod(gsin.body, data=gRbodyfat)
sin.body$fitinfo$dev
[1] 244.5741
sin.body$fitinfo$dimension[4]
df 
58 

Edges in all models

Carcass

#summary
commonedges.carc=intersection(as(aic.carc,'graphNEL'),as(bic.carc,'graphNEL'))
othermodels=list(test.carc, forw.carc, thresh.carc, gsin.carc)
othermodels=lapply(othermodels, as, 'graphNEL')

for(ii in 1:length(othermodels))
{
  commonedges.carc=intersection(commonedges.carc, 
                                othermodels[[ii]])
}
plot(commonedges.carc,'fdp')

plot of chunk unnamed-chunk-72

Body fat

commonedges.body=intersection(as(aic.body,'graphNEL'),as(bic.body,'graphNEL'))
commonedges.carc=intersection(commonedges.body,  gsin.body)
plot(commonedges.body,'fdp')

plot of chunk unnamed-chunk-73

Lasso Regression

lasoreg=lars::lars(base::as.matrix(gRbodyfat[,1:13]),
             base::as.vector(gRbodyfat[,14]))
lasoreg

Call:
lars::lars(x = base::as.matrix(gRbodyfat[, 1:13]), y = base::as.vector(gRbodyfat[, 
    14]))
R-squared: 0.751 
Sequence of LASSO moves:
     Neck Weight Ankle Forearm Age Weight Height BodyFat Weight Thigh Hip
Var     5      3    11      13   2     -3      4       1      3     9   8
Step    1      2     3       4   5      6      7       8      9    10  11
     Abdomen Chest Biceps Height Knee Height
Var        7     6     12     -4   10      4
Step      12    13     14     15   16     17
#pdf('lasoreg.pdf')
plot(lasoreg)

plot of chunk unnamed-chunk-75

#dev.off()

Metropolis Hastings

# simple Metropolis Hastings sampling
# Thanks to Florian Hartig https://theoreticalecology.wordpress.com/2010/09/17/metropolis-hastings-mcmc-in-r/
#
require(gRain)
require(Rgraphviz)
require('entropy')

val=function(bnet,inode, ivalue)
  {return(unlist(bnet$universe$levels[inode])[ivalue])
}
likelihood <- function(bnet, my.nodes, par){
  my.states=sapply(1:length(par),FUN=function(x){return(val(bnet,x,par[x]))})
  p.e=pEvidence(setEvidence(bnet,nodes=my.nodes,states=my.states,propagate=TRUE))
  return(p.e)
}
# Prior distribution
prior <- function(param){
  return(1)
}
posterior <- function(bnet, my.nodes, param){
  return (likelihood(bnet, my.nodes, param))
}

######## Metropolis algorithm ################

proposalfunction <- function(bnet, my.nodes, param){
  node=sample.int(length(my.nodes),1)
  val=sample.int(length(unlist(bnet$universe$levels[node])),1)
  param[node]=val
  return(param)
}
run_metropolis_MCMC <- function(bnet,startvalue, iterations){
  my.nodes=bnet$universe$nodes
  chain = array(dim = c(iterations+1,length(startvalue)))
  chain[1,] = startvalue
  for (i in 1:iterations){
    proposal = proposalfunction(bnet, my.nodes,chain[i,])

    probab = posterior(bnet, my.nodes,proposal) / posterior(bnet, my.nodes,chain[i,])
    if (runif(1) < probab){
      chain[i+1,] = proposal
    }else{
      chain[i+1,] = chain[i,]
    }
  }
  return(chain)
}
run_gibbs <- function(bnet,startvalue, iterations){
  my.nodes=bnet$universe$nodes
  chain = array(dim = c(iterations+1,length(startvalue)))
  colnames(chain)=my.nodes
  chain[1,] = startvalue

  for (i in 1:iterations){
    node=sample.int(length(startvalue),size=1)
    e.nodes=my.nodes[-node]
    e.states=chain[i,-node]
    probab=querygrain(setEvidence(bnet,nodes=e.nodes,states=e.states,propagate=TRUE)
                      ,nodes=my.nodes[node])
    probab=unlist(probab)
    #      cum.prob=sapply(1:length(probab),FUN=function(x)sum(probab[1:x]))
    new.states=chain[i,]
    if(runif(1)>probab[1])
      new.states[node]=val(bnet, node,1)
    else
      new.states[node]=val(bnet, node,2)
    #        new.states[node]=val(bnet, node,which(cum.prob>runif(1))[1])
    chain[i+1,] = new.states
  }
  return(chain)
}

MH

#zacatek Metropolis Hastings sampling
startvalue = c(1,1,1)
burnIn = 0

chain = run_metropolis_MCMC(bnet,startvalue, 100)

x1x2=xtabs(~X1+X2,data.frame(chain[-(1:burnIn),]))
KL.empirical(x1x2,pravda,unit='log2')
[1] 0.4678841
acceptance = 1-mean(duplicated(chain[-(1:burnIn),]))
x1x2/  apply(x1x2,1,sum)#1 radky
   X2
X1          1         2
  1 0.1882353 0.8117647
  2 1.0000000 0.0000000
startvalue = c(val(bnet,1,1),val(bnet,2,1),val(bnet,3,1))
chain = run_gibbs(bnet,startvalue, 10000)
burnIn=100
x1x2=xtabs(~Coin+FirstFlip,data.frame(chain[burnIn:nrow(chain),]))
KL.empirical(x1x2,pravda,unit='log2')
[1] 0.09539248