## R commands for STAT 485 ## Intermediate Statistical Techniques for Machine Learning and Big Data ## Version: 2024.01.26 ## Reference: http://homepages.math.uic.edu/~jyang06/stat485/stat485.html ## R package: ElemStatLearn, "ElemStatLearn_2015.6.26.2.tar" ## Source: https://cran.r-project.org/src/contrib/Archive/ElemStatLearn/ ## Data Sets, Functions and Examples from the Book (ESL): ## "The Elements of Statistical Learning, Data Mining, Inference, and Prediction" ## by Trevor Hastie, Robert Tibshirani and Jerome Friedman, Second Edition, 2017 (corrected 12th printing) ## Source: https://hastie.su.domains/ElemStatLearn/download.html ## Reference: Section 3.8.4 in the ESL book ## install the package "grpreg" # install.packages("grpreg") ## Birth weight data: Raw data in "MASS" library(MASS) data(birthwt) dim(birthwt) # 189 10 str(birthwt) ## R package: grpreg library("grpreg") ## Birth weight data: Raw data in "grpreg" data(Birthwt) X <- Birthwt$X # Matrix of predictors dim(X) # 189 16 group <- Birthwt$group # Vector describing how the columns of X are grouped ## Group lasso: Linear regression y <- Birthwt$bwt fit <- grpreg(X, y, group, penalty="grLasso") plot(fit, label=T) # plot coefficients with predictor labels plot(fit, norm=TRUE, label=TRUE) # plot coefficients with group labels # choose tuning parameter set.seed(325) cvfit <- cv.grpreg(X, y, group, nfolds=10, penalty="grLasso") plot(cvfit, norm=TRUE, label=TRUE) summary(cvfit) coef(cvfit) ## Beta at minimum CVE ## Group lasso: Logistic regression y.low <- Birthwt$low fit.logit <- grpreg(X, y.low, group, penalty="grLasso", family="binomial") plot(fit.logit, norm=TRUE, label=TRUE) # choose tuning parameter set.seed(598) cvfit.logit <- cv.grpreg(X, y.low, group, nfolds=10, penalty="grLasso", family="binomial") plot(cvfit.logit, norm=TRUE, label=TRUE) summary(cvfit) coef(cvfit) ## Beta at minimum CVE