a=5
a
## [1] 5
ls()
## [1] "a"
load('C:/Users/Marta/Desktop/tirpitz/R/fratings3000.RData')
summary(mratings)
## USERID MOVIEID RATING TS
## Min. : 195 Min. : 1 Min. :1.000 Min. :1999-12-30
## 1st Qu.: 663373 1st Qu.: 4686 1st Qu.:3.000 1st Qu.:2004-07-05
## Median :1347602 Median : 9189 Median :4.000 Median :2005-02-18
## Mean :1331206 Mean : 9135 Mean :3.633 Mean :2004-11-22
## 3rd Qu.:2000221 3rd Qu.:13748 3rd Qu.:4.000 3rd Qu.:2005-07-25
## Max. :2648758 Max. :17770 Max. :5.000 Max. :2005-12-31
##
## K MAVG CNT BAYESAVG
## Min. : 1.0 Min. :1.000 Min. : 1.0 Min. :2.820
## 1st Qu.: 42.0 1st Qu.:3.389 1st Qu.: 105.0 1st Qu.:3.472
## Median :105.0 Median :3.640 Median : 306.0 Median :3.637
## Mean :142.7 Mean :3.632 Mean : 392.7 Mean :3.657
## 3rd Qu.:212.0 3rd Qu.:3.890 3rd Qu.: 630.0 3rd Qu.:3.833
## Max. :598.0 Max. :5.000 Max. :1398.0 Max. :4.532
## NA's :118 NA's :118 NA's :118
## TT IMDBRATING GENRE1 YEAR
## Length:471074 Min. :1.90 Length:471074 Length:471074
## Class :character 1st Qu.:6.40 Class :character Class :character
## Mode :character Median :7.00 Mode :character Mode :character
## Mean :6.95
## 3rd Qu.:7.60
## Max. :9.20
## NA's :60932
## USHIFT USHIFTB UGENRE GENREMATCH
## Min. :-2.02088 Min. :-1.10638 Length:471074 Mode :logical
## 1st Qu.:-0.30507 1st Qu.:-0.23972 Class :array FALSE:258414
## Median :-0.03721 Median :-0.02774 Mode :character TRUE :149315
## Mean :-0.02477 Mean :-0.02363 NA's :63345
## 3rd Qu.: 0.23167 3rd Qu.: 0.17260
## Max. : 1.43803 Max. : 1.06262
##
attach(mratings)
#min(mratings$USERID)
median(RATING)
## [1] 4
hist(RATING)

hist(IMDBRATING,main='FLIX - IMDBRATING')

plot(CNT,MAVG)

cor.test(RATING,MAVG,method='pearson')
##
## Pearson's product-moment correlation
##
## data: RATING and MAVG
## t = 277.26, df = 470950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3721412 0.3770517
## sample estimates:
## cor
## 0.3745991
cor.test(RATING, IMDBRATING)
##
## Pearson's product-moment correlation
##
## data: RATING and IMDBRATING
## t = 173, df = 410140, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2579317 0.2636362
## sample estimates:
## cor
## 0.2607862
boxplot(IMDBRATING~RATING,data=mratings,xlab='RATING',ylab='IMDBRATING')

boxplot(MAVG~RATING,data=mratings,xlab='RATING',ylab='MAVG',main="MAVG vs. RATING")
linModel=lm(MAVG~RATING, data=mratings)
abline(linModel$coefficients, col='blue')
mtext(linModel$coefficients[2],1,col='blue')

boxplot(RATING~GENRE1,data=mratings,ylab='RATING',xlab='', main='RATING vs. GENRE', las=2,text.cex=0.5)

par(mfrow=c(2,3))
for(i in 1:5)hist(mratings[RATING==i,'IMDBRATING'],xlab='IMDBRATING',main=paste0('RATING==',i))

imdb_range=sort(unique(IMDBRATING))
rating_range=sort(unique(RATING))
tab_rat=table(IMDBRATING,RATING)
#table(IMDBRATING)
#mratings[RATING==4 & IMDBRATING]
#contour(imdb_range,rating_range,tab_rat,nlevels=45,add=T)
#image(imdb_range,rating_range,tab_rat)
persp(imdb_range,rating_range,tab_rat,theta=40,phi=20)

ucounts=tapply(RATING,USERID,FUN=length)
hist(ucounts,main="# Rated Movies per User",xlab='#ratings',ylab='#users with the #ratings')
