Necessary steps to define the statistical model:
In general, the GLM can model any distribution from the exponential family:
library(tidyverse)
library(ggpubr)
n <- 2000
set.seed(42)
snorm <- rnorm(n, mean = 1, sd = sqrt(1))
spois <- rpois(n, lambda = 1)
sbinom <- rbinom(n, size = 12, prob = .3)
sbeta <- rbeta(n, shape1 = 2, shape2 = 4)
st <- rt(n, df = 10, ncp = 1)
sgamma <- rgamma(n, shape = 10, rate = 2)
data.frame(normal = snorm,
poisson = spois,
binomial = sbinom,
beta = sbeta,
t = st,
gamma = sgamma) %>%
pivot_longer(cols = everything()) %>%
mutate(name = factor(name,
levels = c("normal", "t", "gamma", "beta",
"poisson", "binomial"))) %>%
ggplot(aes(value))+
geom_histogram()+
theme_pubclean()+
facet_wrap(~name, scales = "free_x")+
theme(aspect.ratio = 1)
library(agridat)
data(crowder.seeds)
dd <- crowder.seeds %>%
unite("trt", c(gen, extract), remove = FALSE)
str(dd)
## 'data.frame': 21 obs. of 6 variables:
## $ plate : Factor w/ 21 levels "P1","P10","P11",..: 1 12 15 16 17 18 19 20 21 2 ...
## $ trt : chr "O75_bean" "O75_bean" "O75_bean" "O75_bean" ...
## $ gen : Factor w/ 2 levels "O73","O75": 2 2 2 2 2 1 1 1 1 1 ...
## $ extract: Factor w/ 2 levels "bean","cucumber": 1 1 1 1 1 1 1 1 1 1 ...
## $ germ : int 10 23 23 26 17 8 10 8 23 0 ...
## $ n : int 39 62 81 51 39 16 30 28 45 4 ...
dd %>%
ggplot(aes(trt, germ, group = plate))+
geom_linerange(aes(xmin=trt, xmax =trt, ymin = germ, ymax = n, group = plate), alpha = .85,
position = position_dodge(width = .25))+
geom_point(aes(fill = germ/n), shape =21, size =2.5, position = position_dodge(width = .25), alpha = .85)+
theme_classic()+
scale_fill_viridis_c()+
labs(y = "Germination (y, n)",
x = "Genotype",
fill = "Germination (%)")+
theme(legend.position = "bottom")
library(mgcv)
m1 <- gam(cbind(germ, n-germ) ~ extract*gen + s(plate, bs = "re"),
family = binomial(link = "logit"),
data = dd)
summary(m1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## cbind(germ, n - germ) ~ extract * gen + s(plate, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.46649 0.24364 -1.915 0.0555 .
## extractcucumber 0.51297 0.33625 1.526 0.1271
## genO75 -0.07568 0.31012 -0.244 0.8072
## extractcucumber:genO75 0.82630 0.43249 1.911 0.0561 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(plate) 7.055 17 14.6 0.02 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.794 Deviance explained = 88%
## UBRE = 0.61564 Scale est. = 1 n = 21