Why are we learning about mixed models?
Case study: Regression for several individuals
Assumptions of the General Linear Model
Assumptions of the General Linear Mixed Model
Increasing assumptions:
\(y_i \sim N(\mu_i, \sigma^2)\), \(\mu_i = \beta_0 + \beta_1 x_i\)
\(y_{ij} \sim N(\mu_{ij}, \sigma^2)\), \(\mu_{ij} = \beta_{0i} + \beta_1 x_{ij}\)
\(y_{ij}|b_{0i} \sim N(\mu_{ij}, \sigma^2)\), \(\mu_{ij} = \beta_{0} + b_{0i} + \beta_1 x_{ij}\), \(b_{0i} \sim N(0, \sigma^2_0)\)
\(y_{ij}|b_{0i},b_{1i} \sim N(\mu_{ij}, \sigma^2)\), \(\mu_{ij} = \beta_{0} + b_{0i} + \beta_1 x_{ij}\), \(\begin{bmatrix}b_{0i} \\ b_{1i} \end{bmatrix} \sim N \left( \begin{bmatrix}0 \\ 0 \end{bmatrix}, \begin{bmatrix}\sigma^2_{0} & \sigma^2_{01} \\ \sigma^2_{01} & \sigma^2_{1} \end{bmatrix} \right)\).
What implications do random effects have over modeling as fixed effects?
What minimum information would we need?
Data can be found here.
library(tidyverse)
ggplot2::theme_set(
theme_classic()+
theme(aspect.ratio = 1))
dd <- read.csv("data/student_data.csv") %>%
rownames_to_column("student_ID") %>%
pivot_longer(cols = c(G1, G2), values_to = "prev_grade", names_to = "term")
dd %>%
ggplot(aes(prev_grade, G3))+
geom_point()+
facet_wrap(~school)
library(nlme)
m1 <- lm(G3 ~ school + prev_grade : school, data = dd)
m2 <- lm(G3 ~ school+ prev_grade : school + student_ID , data = dd)
m3 <- lm(G3 ~ school + prev_grade : school : student_ID , data = dd)
coef(m3)[1:10]
## (Intercept) schoolMS
## 2.0328301 4.7690565
## schoolGP:prev_grade:student_ID1 schoolMS:prev_grade:student_ID1
## 0.7153913 NA
## schoolGP:prev_grade:student_ID10 schoolMS:prev_grade:student_ID10
## 0.8932255 NA
## schoolGP:prev_grade:student_ID100 schoolMS:prev_grade:student_ID100
## 0.7344209 NA
## schoolGP:prev_grade:student_ID101 schoolMS:prev_grade:student_ID101
## 0.4238814 NA
m4 <- lme(G3 ~ prev_grade : school, random = ~ 1|student_ID, data = dd)
m4
## Linear mixed-effects model fit by REML
## Data: dd
## Log-restricted-likelihood: 11599.37
## Fixed: G3 ~ prev_grade:school
## (Intercept) prev_grade:schoolGP prev_grade:schoolMS
## 8.241470e+00 1.440638e-16 5.131282e-16
##
## Random effects:
## Formula: ~1 | student_ID
## (Intercept) Residual
## StdDev: 5.069187 1.203865e-15
##
## Number of Observations: 790
## Number of Groups: 395
m5 <- lme(G3 ~ school + prev_grade : school, random = ~ 0+prev_grade|student_ID, data = dd)
m5
## Linear mixed-effects model fit by REML
## Data: dd
## Log-restricted-likelihood: -1703.618
## Fixed: G3 ~ school + prev_grade:school
## (Intercept) schoolMS schoolGP:prev_grade schoolMS:prev_grade
## 1.1265131 -0.5328518 0.8166789 0.8526936
##
## Random effects:
## Formula: ~0 + prev_grade | student_ID
## prev_grade Residual
## StdDev: 0.3015872 0.9551379
##
## Number of Observations: 790
## Number of Groups: 395
dd$p1 <- predict(m1, dd)
dd$p2 <- predict(m2, dd)
dd$p3 <- predict(m3, dd)
dd$p4 <- predict(m4, dd)
dd$p5 <- predict(m5, dd)
dd %>%
ggplot(aes())+
facet_wrap(~school)+
geom_point(aes(x =prev_grade, y=G3))+
geom_point(aes(x=prev_grade, y=p2, group = school), color = 'lightblue')+
geom_point(aes(x=prev_grade, y=p3, group = school), color = 'orange')+
geom_point(aes(x=prev_grade, y=p4, group = school), color = 'gold')+
geom_point(aes(x=prev_grade, y=p5, group = school), color = 'lightgreen')+
geom_point(aes(x=prev_grade, y=p1), color = 'tomato')