Recall Figure 2 in Hooten et al. (2015).
\[\hat{\beta}_{OLS}=(\mathbf{X}^\top\mathbf{X})^{-1}\mathbf{X}^\top\mathbf{y}\]
library(glmnet)
This data example was used in Zou and Hastie (2005) and is available for easy download here.
dd <- read.csv("data/prostate.csv")
names(dd)
## [1] "lcavol" "lweight" "age" "lbph" "svi" "lcp" "gleason"
## [8] "pgg45" "lpsa" "train"
y <- dd$lpsa # log prostate-specific antigen
X <- model.matrix(lpsa ~ ., dd[,-10])[, -1]
m_ridge <- glmnet(X, y, family = c("gaussian"), alpha = 0, standardize = FALSE)
par(mfrow = c(1, 2))
plot(m_ridge)
plot(m_ridge, xvar = "lambda", label = TRUE)
fit_lasso <- glmnet(X, y, family = c("gaussian"), alpha = 1, standardize = FALSE)
par(mfrow = c(1, 2))
plot(fit_lasso)
plot(fit_lasso, xvar = "lambda", label = TRUE)
fit_elasticnet <- glmnet(X, y, family = c("gaussian"), alpha = .5, standardize = FALSE)
par(mfrow = c(1, 2))
plot(fit_elasticnet)
plot(fit_elasticnet, xvar = "lambda", label = TRUE)
pca <- prcomp(dd)
pca
## Standard deviations (1, .., p=10):
## [1] 28.3128508 7.1567817 1.5647572 1.3808222 0.8129917 0.5710866
## [7] 0.4881765 0.4136752 0.3298806 0.2713449
##
## Rotation (n x k) = (10 x 10):
## PC1 PC2 PC3 PC4 PC5
## lcavol 0.018209408 0.016873128 -0.60226447 -0.096028751 -0.189062877
## lweight 0.001733282 0.020076745 -0.09288546 0.109978129 -0.076095426
## age 0.077590211 0.993864995 0.01728889 -0.072237987 0.007021371
## lbph 0.004375509 0.071633091 -0.06293385 0.970367573 0.157836437
## svi 0.006704749 -0.001112326 -0.13664589 -0.051679105 0.049232261
## lcp 0.031208032 -0.012011062 -0.54156574 -0.127311476 0.775311057
## gleason 0.019215746 0.004958514 -0.04232931 -0.007612331 -0.007549060
## pgg45 0.995959332 -0.077956766 0.03836989 0.005570729 -0.012100079
## lpsa 0.017321304 0.008215582 -0.55417421 0.109018476 -0.572842885
## train 0.001694013 0.009922213 0.04529459 -0.031603567 -0.040244907
## PC6 PC7 PC8 PC9 PC10
## lcavol 0.671290542 0.358944315 0.094572994 0.011688492 -0.056055777
## lweight -0.154894134 0.021776013 0.179714119 0.946642705 -0.142072328
## age -0.014059758 -0.011933186 0.009168881 -0.013345716 0.006583614
## lbph 0.094407842 0.064132049 -0.037880421 -0.092203422 -0.037432071
## svi -0.197564040 0.007911475 0.005382256 -0.179720175 -0.951191208
## lcp -0.226551635 -0.070686058 -0.060086870 0.025118571 0.166461277
## gleason 0.384150965 -0.659546669 -0.610006091 0.172544720 -0.115093780
## pgg45 -0.001450991 0.011953094 0.013798218 -0.000957456 0.001330172
## lpsa -0.490045822 -0.233226076 -0.085297103 -0.146235623 0.171142213
## train -0.195422946 0.609779686 -0.757645677 0.104707457 0.014718193
# factoextra::fviz_pca_biplot(pca, label = "var", repel = TRUE)