## R code for multinomial logistic models ## Example: Trauma clinical trial ## Reference: Chuang-Stein, C. and Agresti, A. (1997). ## Tutorial in Biostatistics-A Review of tests for detecting a monotone dose-response relationship with ordinal response data, ## Statistics in Medicine, 16, 2599-2618. ## Five ordered response categories: death, vegetative state, major disability, minor disability, and good recovery ## Often called the Glasgow Outcome Scale (GOS) in the literature on critical care ## Treatment groups: placebo, low dose, medium dose, and high dose trauma <- read.table("trauma.txt", header=TRUE) trauma library(VGAM) fit.cumpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative(parallel=TRUE), data=trauma) fit.cumnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative, data=trauma) fit.crpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=sratio(parallel=T), trauma) fit.crnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=sratio, trauma) fit.acpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=acat(parallel=T), trauma) fit.acnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=acat(reverse=T), trauma) fit.nompo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=multinomial(parallel=TRUE), trauma) fit.nomnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=multinomial, trauma) # model selection output=rbind( c(AIC(fit.cumpo1),AIC(fit.cumnpo1),AIC(fit.crpo1),AIC(fit.crnpo1),AIC(fit.acpo1),AIC(fit.acnpo1),AIC(fit.nompo1),AIC(fit.nomnpo1)), c(BIC(fit.cumpo1),BIC(fit.cumnpo1),BIC(fit.crpo1),BIC(fit.crnpo1),BIC(fit.acpo1),BIC(fit.acnpo1),BIC(fit.nompo1),BIC(fit.nomnpo1)) ) rownames(output)=c("AIC","BIC") colnames(output)=c("Cumulative po", "Cumulative npo", "Continuation-ratio po", "Continuation-ratio npo", "Adjacent-categories po", "Adjacent-categories npo", "Baseline-category po", "Baseline-category npo") output #best: cumnpo # Cumulative po Cumulative npo Continuation-ratio po Continuation-ratio npo # AIC 107.7456 99.41481 108.9825 101.36385 # BIC 104.6771 94.50517 105.9140 96.45421 # Adjacent-categories po Adjacent-categories npo Baseline-category po Baseline-category npo # AIC 107.6684 101.54164 214.3567 101.54164 # BIC 104.5999 96.63199 213.1293 96.63199 fit.cumnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative, data=trauma) AIC(fit.cumnpo1) #[1] 99.41481 BIC(fit.cumnpo1) #[1] 94.50517 round(coef(fit.cumnpo1),3) # (Intercept):1 (Intercept):2 (Intercept):3 (Intercept):4 # -0.865 -0.094 0.706 1.909 # dose:1 dose:2 dose:3 dose:4 # -0.113 -0.269 -0.182 -0.119 temp=coef(fit.cumnpo1) dim(temp)=c(4,2) rownames(temp)=c("death", "vegetative state", "major disability", "minor disability") colnames(temp)=c("Intercept", "Dose") round(temp,3) ## recover the raw data from the summarized data x <- c() y <- c() for(i in 1:4) for(j in 1:5) { x=c(x, rep(i, trauma[i,j+1])); y=c(y, rep(j, trauma[i,j+1])); } table(x,y) ## generate 5-fold partition set.seed(333) par.label <- c(rep(1:5, floor(802/5)),1,2) par.label <- sample(par.label, size=802, replace=F) error.count <- 0 for(ipar in 1:5) { x.train=x[par.label != ipar]; y.train=y[par.label != ipar]; x.test=x[par.label==ipar]; y.test=y[par.label==ipar]; data.train = trauma; data.train[1:4,2:6]=table(x.train, y.train)[1:4,1:5]; fit.cumpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative(parallel=TRUE), data=data.train); data.test = trauma; data.test[1:4,2:6]=table(x.test, y.test)[1:4,1:5]; pred.test = predictvglm(fit.cumpo1, newdata=data.test, type="response") fittedlabel <- apply(pred.test, 1, which.max); data.temp=data.test[,2:6]; for(i in 1:4) error.count = error.count + sum(data.temp[i,-fittedlabel[i]]); } ## In order to perform 5-fold CV ## you may use (1) error count (how many wrong predictions) ## (2) cross-entropy loss ## check training errors pred=apply(fit.cumpo1@fitted.values,1,which.max) x=trauma[,2:6] sum(trauma) sum(x[cbind(1:4,pred)]) #228 predictvglm(fit.cumpo1, newdata=trauma, type="response")