Necessary steps to define the statistical model:
In general, the GLM can model any distribution from the exponential family:
library(tidyverse)
library(ggpubr)
n <- 2000
set.seed(42)
snorm <- rnorm(n, mean = 1, sd = sqrt(1))
spois <- rpois(n, lambda = 1)
sbinom <- rbinom(n, size = 12, prob = .3)
sbeta <- rbeta(n, shape1 = 2, shape2 = 4)
st <- rt(n, df = 10, ncp = 1)
sgamma <- rgamma(n, shape = 10, rate = 2)
data.frame(normal = snorm,
poisson = spois,
binomial = sbinom,
beta = sbeta,
t = st,
gamma = sgamma) %>%
pivot_longer(cols = everything()) %>%
mutate(name = factor(name,
levels = c("normal", "t", "gamma", "beta",
"poisson", "binomial"))) %>%
ggplot(aes(value))+
geom_histogram()+
theme_pubclean()+
facet_wrap(~name, scales = "free_x")+
theme(aspect.ratio = 1)
library(agridat)
data(beall.webworms)
dd <- beall.webworms
str(dd)
## 'data.frame': 1300 obs. of 7 variables:
## $ row : int 1 2 3 4 5 6 7 8 9 10 ...
## $ col : int 1 1 1 1 1 1 1 1 1 1 ...
## $ y : int 1 0 1 3 6 0 2 2 1 3 ...
## $ block: Factor w/ 13 levels "B1","B10","B11",..: 1 1 1 1 1 6 6 6 6 6 ...
## $ trt : Factor w/ 4 levels "T1","T2","T3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ spray: Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ lead : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
unique(dd %>%
dplyr::select(trt, spray, lead))
## trt spray lead
## 1 T1 N N
## 326 T2 Y N
## 651 T3 N Y
## 976 T4 Y Y
dd %>%
ggplot(aes(y , trt))+
geom_jitter(alpha = .25, width = 0.1)+
geom_boxplot(aes(color = trt, fill = trt), alpha = .4, outliers = FALSE)+
theme_classic()+
labs(y = "Treatment",
x = "Webworm Counts",
fill = "Treatment",
color = "Treatment")+
scale_fill_manual(values = c("#FC4C02", "#B47978", "#008E97", "#99D17B"))+
scale_color_manual(values = c("#FC4C02", "#B47978", "#008E97", "#99D17B"))+
scale_x_continuous(breaks = 0:9)+
theme(legend.position = "bottom")
library(mgcv)
m1 <- gam(y ~ spray*lead,
family = poisson(link = "log"),
data = dd)
# summary(m1)
anova(m1)
##
## Family: poisson
## Link function: log
##
## Formula:
## y ~ spray * lead
##
## Parametric Terms:
## df Chi.sq p-value
## spray 1 125.53 < 2e-16
## lead 1 42.41 7.41e-11
## spray:lead 1 4.47 0.0345