## R code for multinomial logistic models

## Example: Trauma clinical trial
## Reference: Chuang-Stein, C. and Agresti, A. (1997). 
## Tutorial in Biostatistics-A Review of tests for detecting a monotone dose-response relationship with ordinal response data, 
## Statistics in Medicine, 16, 2599-2618.

## Five ordered response categories: death, vegetative state, major disability, minor disability, and good recovery
## Often called the Glasgow Outcome Scale (GOS) in the literature on critical care
## Treatment groups:  placebo, low dose, medium dose, and high dose
trauma <- read.table("trauma.txt", header=TRUE)
trauma

library(VGAM) 
fit.cumpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative(parallel=TRUE), data=trauma)
fit.cumnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative, data=trauma)
fit.crpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=sratio(parallel=T), trauma)
fit.crnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=sratio, trauma)
fit.acpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=acat(parallel=T), trauma)
fit.acnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=acat(reverse=T), trauma)
fit.nompo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=multinomial(parallel=TRUE), trauma)
fit.nomnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose, family=multinomial, trauma)
# model selection
output=rbind(
c(AIC(fit.cumpo1),AIC(fit.cumnpo1),AIC(fit.crpo1),AIC(fit.crnpo1),AIC(fit.acpo1),AIC(fit.acnpo1),AIC(fit.nompo1),AIC(fit.nomnpo1)),
c(BIC(fit.cumpo1),BIC(fit.cumnpo1),BIC(fit.crpo1),BIC(fit.crnpo1),BIC(fit.acpo1),BIC(fit.acnpo1),BIC(fit.nompo1),BIC(fit.nomnpo1))
)
rownames(output)=c("AIC","BIC")
colnames(output)=c("Cumulative po", "Cumulative npo", "Continuation-ratio po", "Continuation-ratio npo", "Adjacent-categories po", "Adjacent-categories npo", "Baseline-category po", "Baseline-category npo")
output
#best: cumnpo
# Cumulative po Cumulative npo Continuation-ratio po Continuation-ratio npo
# AIC      107.7456       99.41481              108.9825              101.36385
# BIC      104.6771       94.50517              105.9140               96.45421
# Adjacent-categories po Adjacent-categories npo Baseline-category po Baseline-category npo
# AIC               107.6684               101.54164             214.3567             101.54164
# BIC               104.5999                96.63199             213.1293              96.63199


fit.cumnpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative, data=trauma)
AIC(fit.cumnpo1)
#[1] 99.41481
BIC(fit.cumnpo1)
#[1] 94.50517
round(coef(fit.cumnpo1),3)
# (Intercept):1 (Intercept):2 (Intercept):3 (Intercept):4 
# -0.865        -0.094         0.706         1.909 
# dose:1        dose:2        dose:3        dose:4 
# -0.113        -0.269        -0.182        -0.119 
temp=coef(fit.cumnpo1)
dim(temp)=c(4,2)
rownames(temp)=c("death", "vegetative state", "major disability", "minor disability")
colnames(temp)=c("Intercept", "Dose")
round(temp,3)


## recover the raw data from the summarized data
x <- c()
y <- c()
for(i in 1:4) for(j in 1:5) {
  x=c(x, rep(i, trauma[i,j+1]));
  y=c(y, rep(j, trauma[i,j+1]));
}
table(x,y)

## generate 5-fold partition
set.seed(333)
par.label <- c(rep(1:5, floor(802/5)),1,2)
par.label <- sample(par.label, size=802, replace=F)

error.count <- 0
for(ipar in 1:5) {
  x.train=x[par.label != ipar];
  y.train=y[par.label != ipar];
  x.test=x[par.label==ipar];
  y.test=y[par.label==ipar];
  
  data.train = trauma;
  data.train[1:4,2:6]=table(x.train, y.train)[1:4,1:5];
  fit.cumpo1 <- vglm(cbind(y1,y2,y3,y4,y5) ~ dose,family=cumulative(parallel=TRUE), data=data.train);

  data.test = trauma;
  data.test[1:4,2:6]=table(x.test, y.test)[1:4,1:5];
  pred.test = predictvglm(fit.cumpo1, newdata=data.test, type="response")
  fittedlabel <- apply(pred.test, 1, which.max);
  data.temp=data.test[,2:6];
  for(i in 1:4) error.count = error.count + sum(data.temp[i,-fittedlabel[i]]);
  
}

## In order to perform 5-fold CV
## you may use (1) error count (how many wrong predictions)
##             (2) cross-entropy loss

## check training errors
pred=apply(fit.cumpo1@fitted.values,1,which.max)
x=trauma[,2:6]
sum(trauma)
sum(x[cbind(1:4,pred)]) #228

predictvglm(fit.cumpo1, newdata=trauma, type="response")