Marta Vomlelova
9.1.2019
require(gRain)
require(Rgraphviz)
require(RBGL) #grafy, ne pravdep., maxClique, ansestralSet
require(xtable)
require(gRim) #mmod, dmod, cmod
#setwd('C:/Users/marta/Desktop/pgm/2016')
#install.packages('ggm','deal','SIN')
require(bnlearn)
require(gRim)
library(xtable)
library(gRbase)
require(MASS)
library(lars)
require('entropy')
library(pcalg)
library(glasso)
library(SIN)
A soubory .net:
loadHuginNet(“monty_hall.net”)
loadHuginNet(“two_coins_2.net”)
loadHuginNet(“two_coins_1.net”)
loadHuginNet(“preg4.net”)
Tables and Bayesian network
yn <- c("yes","no")
a <- cptable(~asia, values=c(1,99),levels=yn)
t.a <- cptable(~tub+asia, values=c(5,95,1,99),levels=yn)
s <- cptable(~smoke, values=c(5,5), levels=yn)
l.s <- cptable(~lung+smoke, values=c(1,9,1,99), levels=yn)
b.s <- cptable(~bronc+smoke, values=c(6,4,3,7), levels=yn)
x.e <- cptable(~xray+either, values=c(98,2,5,95), levels=yn)
d.be <- cptable(~dysp+bronc+either, values=c(9,1,7,3,8,2,1,9), levels=yn)
#e.lt <- cptable(~either+lung+tub,values=c(1,0,1,0,1,0,0,1),levels=yn)
e.lt<- ortable(~either+lung+tub,levels=yn)
plist <- compileCPT(list(a, t.a, s, l.s, b.s, e.lt, x.e, d.be))
chestdag <- grain(plist)
chestdag<-compile(chestdag)
par(mfrow=c(1,4)) #4 obrazky dohromady
plot(chestdag,type='dag',main='DAG')
plot(moralize(chestdag$dag),main='Moral')
plot(chestdag$ug,main='Triangulate')
plot(chestdag,type='jt',main='Join Tree')
chestdag$rip
cliques
1 : asia tub
2 : either lung tub
3 : either lung bronc
4 : smoke lung bronc
5 : either dysp bronc
6 : either xray
separators
1 :
2 : tub
3 : either lung
4 : lung bronc
5 : either bronc
6 : either
parents
1 : 0
2 : 1
3 : 2
4 : 3
5 : 3
6 : 5
gRbase::parents('either',chestdag$dag)
[1] "tub" "lung"
gRbase::children('tub',chestdag$dag)
[1] "either"
gRbase::ancestralSet('either',chestdag$dag)
[1] "asia" "tub" "smoke" "lung" "either"
str(edgeList(chestdag$dag))
List of 8
$ : chr [1:2] "asia" "tub"
$ : chr [1:2] "tub" "either"
$ : chr [1:2] "smoke" "lung"
$ : chr [1:2] "smoke" "bronc"
$ : chr [1:2] "lung" "either"
$ : chr [1:2] "bronc" "dysp"
$ : chr [1:2] "either" "xray"
$ : chr [1:2] "either" "dysp"
d-separate
d.separates<-function(a,b,c,dag){
separates(a,b,c,
gRbase::moralize(ancestralGraph(union(union(a,b),c), dag)))
}
d.separates("smoke","xray",c("lung"),chestdag$dag)
[1] TRUE
d.separates("smoke","xray",c("lung","dysp"),chestdag$dag)
[1] FALSE
mor.chest=moralize(chestdag$dag)
closure('bronc',mor.chest)
[1] "bronc" "smoke" "either" "dysp"
is.complete(mor.chest,c('smoke','lung','tub'))
[1] FALSE
is.complete(mor.chest,c('smoke','lung'))
[1] TRUE
str(getCliques(mor.chest))
List of 6
$ : chr [1:3] "either" "tub" "lung"
$ : chr [1:3] "either" "bronc" "dysp"
$ : chr [1:2] "either" "xray"
$ : chr [1:2] "smoke" "lung"
$ : chr [1:2] "smoke" "bronc"
$ : chr [1:2] "asia" "tub"
is.simplicial('lung',chestdag$dag)
[1] TRUE
simplicialNodes(mor.chest)
[1] "asia" "xray" "dysp"
plot(removeNode('asia',mor.chest))
rem=removeNode('dysp',removeNode('xray',removeNode('tub',removeNode('asia',mor.chest))))
simplicialNodes(rem)
character(0)
plot(rem)
is.triangulated(mor.chest)
[1] FALSE
tg=triangulate(mor.chest)
is.triangulated(tg)
[1] TRUE
str(getCliques(mor.chest))
List of 6
$ : chr [1:3] "either" "tub" "lung"
$ : chr [1:3] "either" "bronc" "dysp"
$ : chr [1:2] "either" "xray"
$ : chr [1:2] "smoke" "lung"
$ : chr [1:2] "smoke" "bronc"
$ : chr [1:2] "asia" "tub"
plot(tg)
tg2=removeEdge('bronc','either',mor.chest)
tg2=addEdge('smoke','either',tg2)
tg2=addEdge('smoke','dysp',tg2)
is.triangulated(tg2)
[1] TRUE
#str(getCliques(tg2))
plot(tg2)
is.simplicial('smoke',tg2)
[1] FALSE
rip(tg2)
cliques
1 : asia tub
2 : either tub lung
3 : either smoke lung
4 : either smoke dysp
5 : bronc smoke dysp
6 : either xray
separators
1 :
2 : tub
3 : either lung
4 : either smoke
5 : smoke dysp
6 : either
parents
1 : 0
2 : 1
3 : 2
4 : 3
5 : 4
6 : 4
simplicialNodes(tg)
[1] "asia" "smoke" "xray" "dysp"
simplicialNodes(tg2)
[1] "asia" "bronc" "xray"
m3=compile(grain(plist),root=c('lung','bronc','tub'), propagate=TRUE)
plot(chestdag$ug)
plot(m3)
m3$rip
cliques
1 : asia tub
2 : bronc lung tub either
3 : bronc lung smoke
4 : bronc dysp either
5 : xray either
separators
1 :
2 : tub
3 : bronc lung
4 : bronc either
5 : either
parents
1 : 0
2 : 1
3 : 2
4 : 2
5 : 4
chestdag=propagate(chestdag)
chestdag$equipot
[[1]]
asia
tub yes no
yes 0.0005 0.0099
no 0.0095 0.9801
[[2]]
, , lung = yes
either
tub yes no
yes 0.000572 0
no 0.054428 0
, , lung = no
either
tub yes no
yes 0.009828 0.000000
no 0.000000 0.935172
[[3]]
, , bronc = yes
lung
either yes no
yes 0.0315 0.0043524
no 0.0000 0.4141476
, , bronc = no
lung
either yes no
yes 0.0235 0.0054756
no 0.0000 0.5210244
[[4]]
, , smoke = yes
bronc
lung yes no
yes 0.03 0.02
no 0.27 0.18
, , smoke = no
bronc
lung yes no
yes 0.0015 0.0035
no 0.1485 0.3465
[[5]]
, , dysp = yes
bronc
either yes no
yes 0.03226716 0.02028292
no 0.33131808 0.05210244
, , dysp = no
bronc
either yes no
yes 0.00358524 0.00869268
no 0.08282952 0.46892196
[[6]]
xray
either yes no
yes 0.06353144 0.00129656
no 0.04675860 0.88841340
attr(,"pEvidence")
[1] 1
chestdag$cptlist
chestdag$evidence
chestdag$isPropagated
chestdag$temppot
separates('tub','lung','smoke',mor.chest)
#dal uz jen seznam pro pripad 'mohlo by se hodit'
edges(tg)
summary(tg)
saveHuginNet(chestdag, "test.net")
ttt <- loadHuginNet("monty_hall.net")
ttt$universe
chestdag$universe$nodes
chestdag$cptlist$either
chestdag$isCompiled
str(maxClique(mor.chest))
str(getCliques(mor.chest))
names(chestdag)
getCliques(moralize(chestdag$dag))
Soft evidence
vv=querygrain(chestdag,nodes=c('lung','smoke','xray'),
evidence=list("xray"=c(.9, .1),"asia"=c(1,0)),type='marginal')
vv
$lung
lung
yes no
0.2250155 0.7749845
$smoke
smoke
yes no
0.5735998 0.4264002
Hard evidence
querygrain(chestdag,nodes=c('lung','smoke','xray'),
evidence=list("xray"=c(1, 0),"asia"=c(1,0)),type='marginal')
$lung
lung
yes no
0.3714872 0.6285128
$smoke
smoke
yes no
0.6370074 0.3629926
Only Asia
vv=querygrain(chestdag,nodes=c('lung','smoke','xray'),
evidence=list("asia"=c(1,0)),type='marginal')
vv
$lung
lung
yes no
0.055 0.945
$smoke
smoke
yes no
0.5 0.5
$xray
xray
yes no
0.1450925 0.8549075
xtable(as.matrix(querygrain(chestdag,nodes=c('lung'),type='marginal')$lung))
% latex table generated in R 3.4.1 by xtable 1.8-3 package
% Wed Jan 09 16:54:45 2019
\begin{table}[ht]
\centering
\begin{tabular}{rr}
\hline
& x \\
\hline
yes & 0.06 \\
no & 0.95 \\
\hline
\end{tabular}
\end{table}
as(chestdag,'matrix')
[,1]
universe List,3
data NULL
dag ?
cptlist List,8
isCompiled TRUE
isPropagated TRUE
evidence NULL
control List,1
details 0
rip List,8
ug ?
equipot List,6
temppot List,6
origpot List,6
details 0
stats::simulate(chestdag,nsim=5)
asia tub smoke lung bronc either xray dysp
1 no no no no no no no no
2 no no yes no no no no no
3 no no yes no no no no no
4 no no yes no yes no no no
5 no no no no yes no no yes
xtabs(~lung+bronc,data=simulate(chestdag,nsim=nnn<-10000))/nnn
bronc
lung yes no
yes 0.0302 0.0258
no 0.4206 0.5234
my.sim=function(f.bnet,f.nodes,f.x){
pravda=querygrain(f.bnet,nodes=f.nodes,type='joint')
sim.orig=simulate(f.bnet,n=f.x)
novy=xtabs(paste0('~',paste0(f.nodes,collapse='+'))
,sim.orig)#from case list
return(KL.empirical(novy,pravda,unit='log2'))
}
#tady zaciname
setwd('C:/Users/marta/Desktop/pgm/2016')
bnet <- loadHuginNet("two_coins_2.net")
bnet<-propagate(compile(bnet))
plot(bnet$dag)
set.seed(1)
sim.orig=simulate(bnet,n=1000)
#case list
head(sim.orig)
Coin SecondFlip FirstFlip
1 penny tail tail
2 penny tail tail
3 dime head head
4 penny head tail
5 penny tail head
6 dime tail tail
#table
xtabs(~Coin+FirstFlip,sim.orig)#from case list
FirstFlip
Coin head tail
penny 103 412
dime 334 151
(tmp.tab<-xtabs(~Coin+FirstFlip,sim.orig))/apply(tmp.tab,1,sum)
FirstFlip
Coin head tail
penny 0.2000000 0.8000000
dime 0.6886598 0.3113402
pravda=querygrain(bnet,nodes=c('Coin','FirstFlip'),type='joint')
novy=xtabs(~Coin+FirstFlip,sim.orig)
KL.empirical(novy,pravda,unit='log2')
[1] 0.0008620495
#aggregated case list
ftable(sim.orig)#from raw case list
FirstFlip head tail
Coin SecondFlip
penny head 24 86
tail 79 326
dime head 238 109
tail 96 42
#data frame from aggregated case list
as.data.frame( ftable(sim.orig) )
Coin SecondFlip FirstFlip Freq
1 penny head head 24
2 dime head head 238
3 penny tail head 79
4 dime tail head 96
5 penny head tail 86
6 dime head tail 109
7 penny tail tail 326
8 dime tail tail 42
my.sim(bnet,c('Coin','FirstFlip'),10)
[1] 0.07427909
n.samples=c(10,15,30,50,100,500,1000,5000)
n.samples=c(rep(10,10),rep(20,10),rep(50,10))
kl=sapply(n.samples
,FUN=function(x)my.sim(bnet,c('Coin','FirstFlip'),x))
boxplot(kl~n.samples,xlab='Number of Samples',ylab='KL-divergence',main='KL-divergence wrt. # Samples')
bnet2=gRain::loadHuginNet("..//two_coins_1.net")
sim2=simulate(bnet2,n=10000)
novy.dag<-dag(~TwiceAHead,~Penny:TwiceAHead,~Dime:TwiceAHead)
novy.dag<-bnet2$dag
md=grain(novy.dag,data=sim2,smooth=0)
plot(novy.dag)
round(ftable(querygrain(md,md$universe$nodes,type='joint')),2)#*1000
TwiceAHead yes no
Penny Dime
head head 0.15 0.00
tail 0.00 0.06
tail head 0.00 0.56
tail 0.00 0.23
md=grain(novy.dag,data=sim2,smooth=0.001)
querygrain(md,md$universe$nodes,type='conditional')#*1000
, , Penny = head
TwiceAHead
Dime yes no
head 0.9999993 1.794999e-07
tail 0.4975509 2.071999e-01
, , Penny = tail
TwiceAHead
Dime yes no
head 6.839922e-07 0.9999998
tail 5.024491e-01 0.7928001
ftable(querygrain(md,md$universe$nodes,type='conditional'))#*1000
Penny head tail
Dime TwiceAHead
head yes 9.999993e-01 6.839922e-07
no 1.794999e-07 9.999998e-01
tail yes 4.975509e-01 5.024491e-01
no 2.071999e-01 7.928001e-01
md=grain(novy.dag,data=sim2,smooth=0.001)
bnet <- loadHuginNet("..//two_coins_2.net")
bnet<-propagate(compile(bnet))
plot(bnet$dag)
sim.orig=simulate(bnet,n=10000)
novy.dag<-dag(~Coin,~Coin:FirstFlip,~Coin:SecondFlip)
md=grain(novy.dag,data=sim.orig,smooth=0.2)
plot(novy.dag)
kl=sapply(c(10,30,50,100,500,1000,5000),FUN=function(x){
sim.orig=simulate(bnet,n=x)
md=gRain::grain(novy.dag,data=sim.orig,smooth=0.002)
novy=gRain::querygrain(md,nodes=c('SecondFlip','FirstFlip'),type='joint')
return(KL.empirical(novy,pravda,unit='log2'))
})
plot(kl,ylim=c(0,0.35))
#ukol - uceni
preg.net <- loadHuginNet("preg4.net")
plot(preg.net)
#simulujte data, naucte sit bez Ho uzlu, zkuste popsat, jestli a jak se
#puvodni a nova sit lisi, kdyz v puvodni siti Ho nikdy nepozorujeme.
# DALE UZ JE ARCHIV, NEMELO BY BYT POTREBA
#ukazky prevodu
#table
xtabs(~Coin+FirstFlip,sim.orig)#from case list
FirstFlip
Coin head tail
penny 994 4043
dime 3456 1507
as.table(ftable(sim.orig))#from aggregated case list
, , FirstFlip = head
SecondFlip
Coin head tail
penny 175 819
dime 2437 1019
, , FirstFlip = tail
SecondFlip
Coin head tail
penny 826 3217
dime 1070 437
xtabs(Freq~.,ftable(sim.orig))
, , FirstFlip = head
SecondFlip
Coin head tail
penny 175 819
dime 2437 1019
, , FirstFlip = tail
SecondFlip
Coin head tail
penny 826 3217
dime 1070 437
xtabs(Freq~Coin+FirstFlip,ftable(sim.orig))
FirstFlip
Coin head tail
penny 994 4043
dime 3456 1507
#aggregated case list
ftable(sim.orig)#from raw case list
FirstFlip head tail
Coin SecondFlip
penny head 175 826
tail 819 3217
dime head 2437 1070
tail 1019 437
ftable(xtabs(~.,sim.orig))#from table
FirstFlip head tail
Coin SecondFlip
penny head 175 826
tail 819 3217
dime head 2437 1070
tail 1019 437
dataframe
#aggregated case list from table
#as.data.frame(as.table( ftable(sim.orig) ))
as.data.frame(as.table( ftable(sim.orig) ))
Coin SecondFlip FirstFlip Freq
1 penny head head 175
2 dime head head 2437
3 penny tail head 819
4 dime tail head 1019
5 penny head tail 826
6 dime head tail 1070
7 penny tail tail 3217
8 dime tail tail 437
#tuk v porazenych prasatec, tloustka masa mezi zebry; maso/celkova hmotnost s tukem,?kostmi a slachami
data(carcass)
head(carcass)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
1 17 51 12 51 12 61 56.52475
2 17 49 15 48 15 54 57.57958
3 14 38 11 34 11 40 55.88994
4 17 58 12 58 11 58 61.81719
5 14 51 12 48 13 54 62.95964
6 20 40 14 40 14 45 54.57870
ellipse <- function(s,t){
u<-c(s,t)-center;
e <- (-1) * (u %*% sigma.inv %*% u) / 2;
exp(e)/(2 * pi * sqrt(det(sigma.inv)))}
#saturovany plny model
Y1=6
Y2=7
n=60
x <- (0:(n-1))*2 - 50
y <- (0:(n-1))*2 - 50
plot(carcass[,c(Y1,Y2)], xlab=names(carcass)[Y1], ylab=names(carcass)[Y2], col = "lightgray",pch='+')
center <- apply(carcass[,c(Y1,Y2)], 2, mean) ## sample mean vector
sigma <- cov(carcass[,c(Y1,Y2)]) ## sample variance-covariance matrix
sigma.inv = solve(sigma, matrix(c(1,0,0,1),2,2))
z <- mapply(ellipse, as.vector(rep(x,n)), as.vector(outer(rep(0,n), y, `+`)))
#round(log(matrix(z,n,n)))
contour(x,y,matrix(z,n,n), levels=(0:50), col = rainbow(50), add=TRUE, lwd = 1)
round( data.frame(apply(carcass, 2, mean)),2) ## sample mean vector
apply.carcass..2..mean.
Fat11 16.49
Meat11 51.97
Fat12 13.96
Meat12 52.00
Fat13 12.95
Meat13 55.69
LeanMeat 59.38
biv <- kde2d(carcass[,c(Y1)],carcass[,c(Y2)], n = 50 )
persp(biv, phi = 45, theta = 30, shade = .1, border = NA,xlab=names(carcass)[Y1], ylab=names(carcass)[Y2],zlab='density')
S.carc=cov.wt(carcass, method='ML')$cov
round(S.carc,2)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 11.34 0.74 8.42 2.06 7.66 -0.76 -9.08
Meat11 0.74 32.97 0.67 35.94 2.01 31.97 5.33
Fat12 8.42 0.67 8.91 0.31 6.84 -0.60 -7.95
Meat12 2.06 35.94 0.31 51.79 2.18 41.47 6.03
Fat13 7.66 2.01 6.84 2.18 7.62 0.38 -6.93
Meat13 -0.76 31.97 -0.60 41.47 0.38 41.44 7.23
LeanMeat -9.08 5.33 -7.95 6.03 -6.93 7.23 12.90
K.carc=solve(S.carc)
round(K.carc,2)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 0.44 0.03 -0.20 -0.07 -0.16 0.04 0.10
Meat11 0.03 0.16 -0.03 -0.06 -0.06 -0.06 -0.03
Fat12 -0.20 -0.03 0.54 0.06 -0.21 -0.05 0.09
Meat12 -0.07 -0.06 0.06 0.14 -0.01 -0.09 0.00
Fat13 -0.16 -0.06 -0.21 -0.01 0.56 0.03 0.07
Meat13 0.04 -0.06 -0.05 -0.09 0.03 0.16 -0.01
LeanMeat 0.10 -0.03 0.09 0.00 0.07 -0.01 0.26
PC.carc=cov2pcor(S.carc)
round(100*PC.carc)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 100 -11 41 30 32 -16 -29
Meat11 -11 100 9 41 19 35 16
Fat12 41 9 100 -24 38 18 -24
Meat12 30 41 -24 100 2 61 2
Fat13 32 19 38 2 100 -9 -18
Meat13 -16 35 18 61 -9 100 7
LeanMeat -29 16 -24 2 -18 7 100
round(100*K.carc)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 44 3 -20 -7 -16 4 10
Meat11 3 16 -3 -6 -6 -6 -3
Fat12 -20 -3 54 6 -21 -5 9
Meat12 -7 -6 6 14 -1 -9 0
Fat13 -16 -6 -21 -1 56 3 7
Meat13 4 -6 -5 -9 3 16 -1
LeanMeat 10 -3 9 0 7 -1 26
sat.carc=cmod(~.^., data=carcass)
aic.carc=stepwise(sat.carc)
plot(as(aic.carc,'graphNEL'),'fdp',main='AIC')
str(edgeList(as(aic.carc,'graphNEL')))
List of 19
$ : chr [1:2] "Fat11" "Meat11"
$ : chr [1:2] "Fat11" "Fat12"
$ : chr [1:2] "Fat11" "Meat13"
$ : chr [1:2] "Fat11" "LeanMeat"
$ : chr [1:2] "Fat11" "Fat13"
$ : chr [1:2] "Fat11" "Meat12"
$ : chr [1:2] "Meat11" "Fat12"
$ : chr [1:2] "Meat11" "Meat13"
$ : chr [1:2] "Meat11" "LeanMeat"
$ : chr [1:2] "Meat11" "Fat13"
$ : chr [1:2] "Meat11" "Meat12"
$ : chr [1:2] "Fat12" "Meat13"
$ : chr [1:2] "Fat12" "LeanMeat"
$ : chr [1:2] "Fat12" "Fat13"
$ : chr [1:2] "Fat12" "Meat12"
$ : chr [1:2] "Meat13" "LeanMeat"
$ : chr [1:2] "Meat13" "Fat13"
$ : chr [1:2] "Meat13" "Meat12"
$ : chr [1:2] "LeanMeat" "Fat13"
bic.carc=stepwise(sat.carc,k=log(nrow(carcass)))
plot(as(bic.carc,'graphNEL'),'fdp',main='BIC')
str(edgeList(as(bic.carc,'graphNEL')))
List of 17
$ : chr [1:2] "Fat11" "Meat11"
$ : chr [1:2] "Fat11" "Fat12"
$ : chr [1:2] "Fat11" "Meat13"
$ : chr [1:2] "Fat11" "Meat12"
$ : chr [1:2] "Fat11" "Fat13"
$ : chr [1:2] "Fat11" "LeanMeat"
$ : chr [1:2] "Meat11" "Fat12"
$ : chr [1:2] "Meat11" "Meat13"
$ : chr [1:2] "Meat11" "Meat12"
$ : chr [1:2] "Meat11" "Fat13"
$ : chr [1:2] "Meat11" "LeanMeat"
$ : chr [1:2] "Fat12" "Meat13"
$ : chr [1:2] "Fat12" "Meat12"
$ : chr [1:2] "Fat12" "Fat13"
$ : chr [1:2] "Fat12" "LeanMeat"
$ : chr [1:2] "Meat13" "Meat12"
$ : chr [1:2] "Fat13" "LeanMeat"
data(BodyFat)
head(BodyFat)
Density BodyFat Age Weight Height Neck Chest Abdomen Hip Thigh Knee
1 1.0708 12.3 23 154.25 67.75 36.2 93.1 85.2 94.5 59.0 37.3
2 1.0853 6.1 22 173.25 72.25 38.5 93.6 83.0 98.7 58.7 37.3
3 1.0414 25.3 22 154.00 66.25 34.0 95.8 87.9 99.2 59.6 38.9
4 1.0751 10.4 26 184.75 72.25 37.4 101.8 86.4 101.2 60.1 37.3
5 1.0340 28.7 24 184.25 71.25 34.4 97.3 100.0 101.9 63.2 42.2
6 1.0502 20.9 24 210.25 74.75 39.0 104.5 94.4 107.8 66.0 42.0
Ankle Biceps Forearm Wrist
1 21.9 32.0 27.4 17.1
2 23.4 30.5 28.9 18.2
3 24.0 28.8 25.2 16.6
4 22.8 32.4 29.4 18.2
5 24.0 32.2 27.7 17.7
6 25.6 35.7 30.6 18.8
plot(BodyFat$BodyFat,1/BodyFat$Density)
BodyFat=BodyFat[-c(31,42,48,76,86,96,159,169,175,182,206),]
plot(BodyFat$BodyFat,1/BodyFat$Density)
BodyFat$Age=sqrt(BodyFat$Age)
BodyFat$Weight=sqrt(BodyFat$Weight)
gRbodyfat=BodyFat[,2:15]
S.body=cov.wt(gRbodyfat, method='ML')$cov
round(S.body)
BodyFat Age Weight Height Neck Chest Abdomen Hip Thigh Knee Ankle
BodyFat 68 2 5 0 10 48 71 35 22 10 3
Age 2 1 0 -1 0 1 2 0 -1 0 0
Weight 5 0 1 1 2 8 10 7 5 2 1
Height 0 -1 1 7 2 4 5 7 5 3 2
Neck 10 0 2 2 6 15 19 12 8 4 2
Chest 48 1 8 4 15 68 80 47 30 14 7
Abdomen 71 2 10 5 19 80 112 64 40 18 8
Hip 35 0 7 7 12 47 64 49 31 14 7
Thigh 22 -1 5 5 8 30 40 31 25 10 5
Knee 10 0 2 3 4 14 18 14 10 6 2
Ankle 3 0 1 2 2 7 8 7 5 2 2
Biceps 12 0 3 2 5 18 21 15 11 5 2
Forearm 6 0 1 2 3 10 11 8 6 3 2
Wrist 3 0 1 1 2 5 6 4 3 1 1
Biceps Forearm Wrist
BodyFat 12 6 3
Age 0 0 0
Weight 3 1 1
Height 2 2 1
Neck 5 3 2
Chest 18 10 5
Abdomen 21 11 6
Hip 15 8 4
Thigh 11 6 3
Knee 5 3 1
Ankle 2 2 1
Biceps 9 4 2
Forearm 4 4 1
Wrist 2 1 1
K.body=solve(S.body)
#round(K.body,2)
PC.body=cov2pcor(S.body)
round(PC.body,1)
BodyFat Age Weight Height Neck Chest Abdomen Hip Thigh Knee
BodyFat 1.0 0.1 0.0 -0.1 -0.2 -0.1 0.5 -0.2 0.1 0.0
Age 0.1 1.0 -0.2 0.0 0.1 0.1 0.3 -0.1 -0.3 0.3
Weight 0.0 -0.2 1.0 0.7 0.3 0.5 0.3 0.5 0.2 0.1
Height -0.1 0.0 0.7 1.0 -0.2 -0.4 -0.3 -0.2 -0.3 0.2
Neck -0.2 0.1 0.3 -0.2 1.0 0.0 0.1 -0.2 0.1 -0.1
Chest -0.1 0.1 0.5 -0.4 0.0 1.0 0.3 -0.2 -0.2 0.0
Abdomen 0.5 0.3 0.3 -0.3 0.1 0.3 1.0 0.2 0.0 0.0
Hip -0.2 -0.1 0.5 -0.2 -0.2 -0.2 0.2 1.0 0.3 0.1
Thigh 0.1 -0.3 0.2 -0.3 0.1 -0.2 0.0 0.3 1.0 0.3
Knee 0.0 0.3 0.1 0.2 -0.1 0.0 0.0 0.1 0.3 1.0
Ankle 0.0 -0.2 0.2 -0.1 -0.2 0.0 -0.1 -0.1 0.1 0.3
Biceps 0.0 0.1 0.2 -0.1 0.0 0.0 -0.1 0.0 0.2 0.0
Forearm 0.1 -0.2 0.2 -0.1 0.1 0.1 -0.2 -0.2 0.0 0.1
Wrist -0.2 0.4 0.1 0.0 0.2 -0.1 0.1 0.0 -0.1 0.0
Ankle Biceps Forearm Wrist
BodyFat 0.0 0.0 0.1 -0.2
Age -0.2 0.1 -0.2 0.4
Weight 0.2 0.2 0.2 0.1
Height -0.1 -0.1 -0.1 0.0
Neck -0.2 0.0 0.1 0.2
Chest 0.0 0.0 0.1 -0.1
Abdomen -0.1 -0.1 -0.2 0.1
Hip -0.1 0.0 -0.2 0.0
Thigh 0.1 0.2 0.0 -0.1
Knee 0.3 0.0 0.1 0.0
Ankle 1.0 -0.1 -0.1 0.4
Biceps -0.1 1.0 0.4 0.0
Forearm -0.1 0.4 1.0 0.3
Wrist 0.4 0.0 0.3 1.0
sat.body=cmod(~.^., data=gRbodyfat)
aic.body=stepwise(sat.body)
plot(as(aic.body,'graphNEL'),'fdp',main='AIC')
bic.body=stepwise(sat.body,k=log(nrow(gRbodyfat)))
plot(as(bic.body,'graphNEL'),'fdp',main='BIC')
graph::degree(as(bic.body,'graphNEL'))
Weight Thigh Height Abdomen Forearm BodyFat Neck Hip Age
12 12 11 9 11 10 10 8 10
Chest Knee Ankle Biceps Wrist
5 5 10 4 5
gen.carc=cmod(~Fat11*Fat12*Meat12*Meat13+
Fat11*Fat12*Fat13*LeanMeat+
Meat11*Meat12*Meat13+
Meat11*Fat13*LeanMeat
,data=carcass)
gen.carc
Model: A cModel with 7 variables
-2logL : 11387.24 mdim : 22 aic : 11431.24
ideviance : 2453.99 idf : 15 bic : 11515.73
deviance : 19.79 df : 6
plot(gen.carc,'neato')
edge.carc=cmod(edgeList(as(gen.carc,'graphNEL')) ,data=carcass)
edge.carc
Model: A cModel with 7 variables
-2logL : 11387.24 mdim : 22 aic : 11431.24
ideviance : 2453.99 idf : 15 bic : 11515.73
deviance : 19.79 df : 6
Edges: 774 iterations
carcfit1=ggmfit(S.carc, n=nrow(carcass),edgeList(as(gen.carc,'graphNEL')))
carcfit1[c('dev','df','iter')]
$dev
[1] 19.78537
$df
[1] 6
$iter
[1] 774
Cliques: 61 iterations
carcfit2=ggmfit(S.carc, n=nrow(carcass),maxClique(as(gen.carc,'graphNEL'))$maxCliques)
carcfit2[c('dev','df','iter')]
$dev
[1] 19.78537
$df
[1] 6
$iter
[1] 61
comparemodels=function(m1,m2){
lrt=m2$fitinfo$dev-m1$fitinfo$dev
dfdiff=m2$fitinfo$dimension[4]-m1$fitinfo$dimension[4]
names(dfdiff)=NULL
list('lrt'=lrt,'df'=dfdiff)
}
aic.carc
Model: A cModel with 7 variables
-2logL : 11367.70 mdim : 26 aic : 11419.70
ideviance : 2473.53 idf : 19 bic : 11519.56
deviance : 0.25 df : 2
bic.carc
Model: A cModel with 7 variables
-2logL : 11376.07 mdim : 24 aic : 11424.07
ideviance : 2465.16 idf : 17 bic : 11516.25
deviance : 8.62 df : 4
comparemodels(aic.carc,bic.carc)
$lrt
[1] 8.372649
$df
[1] 2
Nonsignificant
ciTest_mvn(list(cov=S.carc,n.obs=nrow(carcass)),
set=~LeanMeat+Meat13+Meat11+Meat12+Fat11+Fat12+Fat13)
Testing LeanMeat _|_ Meat13 | Meat11 Meat12 Fat11 Fat12 Fat13
Statistic (DEV): 1.687 df: 1 p-value: 0.1940 method: CHISQ
Significant
ciTest_mvn(list(cov=S.carc,n.obs=nrow(carcass)),
set=~LeanMeat+Meat11+Meat12+Meat13+Fat11+Fat12+Fat13)
Testing LeanMeat _|_ Meat11 | Meat12 Meat13 Fat11 Fat12 Fat13
Statistic (DEV): 8.931 df: 1 p-value: 0.0028 method: CHISQ
C.carc=cov2cor(S.carc)
gaussCItest(7,2,c(1,3,4,5,6),list(C=C.carc,n=nrow(carcass)))
[1] 0.003077247
#regresni koeficienty
-K.carc[7,-7]/K.carc[7,7]
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13
-0.37714577 0.12421685 -0.33552335 0.01518311 -0.26893064 0.05413811
#rozptyl rezidui
1/K.carc[7,7]
[1] 3.794228
r.LeanMeat=residuals(lm(LeanMeat~Meat11+Meat13+Fat11+Fat12+Fat13,
data=carcass))
r.Meat12=residuals(lm(Meat12~Meat11+Meat13+Fat11+Fat12+Fat13,
data=carcass))
#par(mfcol=c(1,2))
plot(r.Meat12,r.LeanMeat)
abline(lm(r.LeanMeat~r.Meat12),lwd=2)
r.LeanMeat3=residuals(lm(LeanMeat~Meat11+Meat12+Fat11+Fat12+Fat13,
data=carcass))
r.Meat13=residuals(lm(Meat13~Meat11+Meat12+Fat11+Fat12+Fat13,
data=carcass))
plot(r.Meat13,r.LeanMeat3)
abline(lm(r.LeanMeat3~r.Meat13),lwd=2)
par(mfcol=c(1,1))
#decomposition
K.hat=S.carc
K.hat[]=0
AC=c('Fat11','Fat12','Fat13','Meat11','LeanMeat')
BC=c('Meat11','Meat12','Meat13','Fat11','Fat12')
C=c('Fat11','Fat12','Meat11')
K.hat[AC,AC]=K.hat[AC,AC]+solve(S.carc[AC,AC])
K.hat[BC,BC]=K.hat[BC,BC]+solve(S.carc[BC,BC])
K.hat[C,C]=K.hat[C,C]-solve(S.carc[C,C])
round(100*K.hat,-1)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 40 0 -20 -10 -20 10 10
Meat11 0 20 0 -10 0 -10 -10
Fat12 -20 0 50 10 -20 0 10
Meat12 -10 -10 10 10 0 -10 0
Fat13 -20 0 -20 0 60 0 10
Meat13 10 -10 0 -10 0 20 0
LeanMeat 10 -10 10 0 10 0 30
Estimated and original sigma
Sigma.hat=solve(K.hat)
round(Sigma.hat,2)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 11.34 0.74 8.42 2.06 7.66 -0.76 -9.08
Meat11 0.74 32.97 0.67 35.94 2.01 31.97 5.33
Fat12 8.42 0.67 8.91 0.31 6.84 -0.60 -7.95
Meat12 2.06 35.94 0.31 51.79 2.45 41.47 5.41
Fat13 7.66 2.01 6.84 2.45 7.62 0.89 -6.93
Meat13 -0.76 31.97 -0.60 41.47 0.89 41.44 6.43
LeanMeat -9.08 5.33 -7.95 5.41 -6.93 6.43 12.90
round(S.carc,2)
Fat11 Meat11 Fat12 Meat12 Fat13 Meat13 LeanMeat
Fat11 11.34 0.74 8.42 2.06 7.66 -0.76 -9.08
Meat11 0.74 32.97 0.67 35.94 2.01 31.97 5.33
Fat12 8.42 0.67 8.91 0.31 6.84 -0.60 -7.95
Meat12 2.06 35.94 0.31 51.79 2.18 41.47 6.03
Fat13 7.66 2.01 6.84 2.18 7.62 0.38 -6.93
Meat13 -0.76 31.97 -0.60 41.47 0.38 41.44 7.23
LeanMeat -9.08 5.33 -7.95 6.03 -6.93 7.23 12.90
#stepwise
test.carc=stepwise(sat.carc,details=1,'test')
STEPWISE:
criterion: test
direction: backward
type : decomposable
search : all
steps : 1000
p.value 0.7089 Edge deleted: Meat12 Fat13
p.value 0.7414 Edge deleted: LeanMeat Meat12
p.value 0.0812 Edge deleted: Meat13 Fat13
plot(test.carc,'neato')
ind.carc=gRim::cmod(~.^1,data=carcass)
set.seed(123)
forw.carc=stepwise(ind.carc,search='headlong',
direction='forward',
k=log(nrow(carcass)),details=0)
plot(forw.carc,'neato')
forw.carc
Model: A cModel with 7 variables
-2logL : 11393.53 mdim : 23 aic : 11439.53
ideviance : 2447.70 idf : 16 bic : 11527.87
deviance : 26.08 df : 5
#laso
C.body=cov2cor(S.body)
res.lasso=glasso(C.body,rho=0.1)
AM=res.lasso$wi !=0
diag(AM)=F
g.lasso=as(AM,'graphNEL')
nodes(g.lasso)=names(gRbodyfat)
glasso.body=gRim::cmod(g.lasso,data=gRbodyfat)
plot(glasso.body,'neato')
graph::degree(as(glasso.body,'graphNEL'))
Weight Knee Thigh Hip Abdomen Ankle Biceps Forearm Height
11 9 10 9 8 8 8 10 7
Wrist Neck Chest Age BodyFat
9 8 9 8 6
#thresholding
threshold=0.1
Z=abs(PC.carc)
Z[Z<threshold]=0
diag(Z)=0
Z[Z>0]=1
g.thresh=as(Z,'graphNEL')
thresh.carc=gRim::cmod(g.thresh,data=carcass)
thresh.carc
Model: A cModel with 7 variables
-2logL : 11384.93 mdim : 23 aic : 11430.93
ideviance : 2456.30 idf : 16 bic : 11519.27
deviance : 17.48 df : 5
plot(thresh.carc,'neato')
#SIN - hrany urcite pritomne
psin.carc=sinUG(S.carc,n=nrow(carcass))
gsin.carc=as(getgraph(psin.carc,0.3),'graphNEL')
#pdf('sin.pdf')
def.par <- par(no.readonly = TRUE) # save default, for resetting...
w=5
#nf <- layout(matrix(c(1,2),1,2), widths = c(lcm(w),lcm(w)), heights = c(lcm(w),lcm(w)))
#layout.show(nf)
#par(mfcol=c(1,2))
plotUGpvalues(psin.carc)
psin.body=sinUG(S.body,n=nrow(gRbodyfat))
plotUGpvalues(psin.body)
gsin.body=as(getgraph(psin.body,0.3),'graphNEL')
plot(gsin.body,'neato')
graph::degree(gsin.body)
BodyFat Age Weight Height Neck Chest Abdomen Hip Thigh
2 5 8 5 2 5 7 6 7
Knee Ankle Biceps Forearm Wrist
3 4 2 5 5
sin.body=gRim::cmod(gsin.body, data=gRbodyfat)
sin.body$fitinfo$dev
[1] 244.5741
sin.body$fitinfo$dimension[4]
df
58
Carcass
#summary
commonedges.carc=intersection(as(aic.carc,'graphNEL'),as(bic.carc,'graphNEL'))
othermodels=list(test.carc, forw.carc, thresh.carc, gsin.carc)
othermodels=lapply(othermodels, as, 'graphNEL')
for(ii in 1:length(othermodels))
{
commonedges.carc=intersection(commonedges.carc,
othermodels[[ii]])
}
plot(commonedges.carc,'fdp')
Body fat
commonedges.body=intersection(as(aic.body,'graphNEL'),as(bic.body,'graphNEL'))
commonedges.carc=intersection(commonedges.body, gsin.body)
plot(commonedges.body,'fdp')
lasoreg=lars::lars(base::as.matrix(gRbodyfat[,1:13]),
base::as.vector(gRbodyfat[,14]))
lasoreg
Call:
lars::lars(x = base::as.matrix(gRbodyfat[, 1:13]), y = base::as.vector(gRbodyfat[,
14]))
R-squared: 0.751
Sequence of LASSO moves:
Neck Weight Ankle Forearm Age Weight Height BodyFat Weight Thigh Hip
Var 5 3 11 13 2 -3 4 1 3 9 8
Step 1 2 3 4 5 6 7 8 9 10 11
Abdomen Chest Biceps Height Knee Height
Var 7 6 12 -4 10 4
Step 12 13 14 15 16 17
#pdf('lasoreg.pdf')
plot(lasoreg)
#dev.off()
# simple Metropolis Hastings sampling
# Thanks to Florian Hartig https://theoreticalecology.wordpress.com/2010/09/17/metropolis-hastings-mcmc-in-r/
#
require(gRain)
require(Rgraphviz)
require('entropy')
val=function(bnet,inode, ivalue)
{return(unlist(bnet$universe$levels[inode])[ivalue])
}
likelihood <- function(bnet, my.nodes, par){
my.states=sapply(1:length(par),FUN=function(x){return(val(bnet,x,par[x]))})
p.e=pEvidence(setEvidence(bnet,nodes=my.nodes,states=my.states,propagate=TRUE))
return(p.e)
}
# Prior distribution
prior <- function(param){
return(1)
}
posterior <- function(bnet, my.nodes, param){
return (likelihood(bnet, my.nodes, param))
}
######## Metropolis algorithm ################
proposalfunction <- function(bnet, my.nodes, param){
node=sample.int(length(my.nodes),1)
val=sample.int(length(unlist(bnet$universe$levels[node])),1)
param[node]=val
return(param)
}
run_metropolis_MCMC <- function(bnet,startvalue, iterations){
my.nodes=bnet$universe$nodes
chain = array(dim = c(iterations+1,length(startvalue)))
chain[1,] = startvalue
for (i in 1:iterations){
proposal = proposalfunction(bnet, my.nodes,chain[i,])
probab = posterior(bnet, my.nodes,proposal) / posterior(bnet, my.nodes,chain[i,])
if (runif(1) < probab){
chain[i+1,] = proposal
}else{
chain[i+1,] = chain[i,]
}
}
return(chain)
}
run_gibbs <- function(bnet,startvalue, iterations){
my.nodes=bnet$universe$nodes
chain = array(dim = c(iterations+1,length(startvalue)))
colnames(chain)=my.nodes
chain[1,] = startvalue
for (i in 1:iterations){
node=sample.int(length(startvalue),size=1)
e.nodes=my.nodes[-node]
e.states=chain[i,-node]
probab=querygrain(setEvidence(bnet,nodes=e.nodes,states=e.states,propagate=TRUE)
,nodes=my.nodes[node])
probab=unlist(probab)
# cum.prob=sapply(1:length(probab),FUN=function(x)sum(probab[1:x]))
new.states=chain[i,]
if(runif(1)>probab[1])
new.states[node]=val(bnet, node,1)
else
new.states[node]=val(bnet, node,2)
# new.states[node]=val(bnet, node,which(cum.prob>runif(1))[1])
chain[i+1,] = new.states
}
return(chain)
}
#zacatek Metropolis Hastings sampling
startvalue = c(1,1,1)
burnIn = 0
chain = run_metropolis_MCMC(bnet,startvalue, 100)
x1x2=xtabs(~X1+X2,data.frame(chain[-(1:burnIn),]))
KL.empirical(x1x2,pravda,unit='log2')
[1] 0.4678841
acceptance = 1-mean(duplicated(chain[-(1:burnIn),]))
x1x2/ apply(x1x2,1,sum)#1 radky
X2
X1 1 2
1 0.1882353 0.8117647
2 1.0000000 0.0000000
startvalue = c(val(bnet,1,1),val(bnet,2,1),val(bnet,3,1))
chain = run_gibbs(bnet,startvalue, 10000)
burnIn=100
x1x2=xtabs(~Coin+FirstFlip,data.frame(chain[burnIn:nrow(chain),]))
KL.empirical(x1x2,pravda,unit='log2')
[1] 0.09539248