#Ed401D week 2, dichotomous outcome # get and set up the respiratory data ####################################part a > library(HSAUR2) Loading required package: tools > data(respiratory) > head(respiratory) centre treatment gender age status month subject 1 1 placebo female 46 poor 0 1 112 1 placebo female 46 poor 1 1 223 1 placebo female 46 poor 2 1 334 1 placebo female 46 poor 3 1 445 1 placebo female 46 poor 4 1 2 1 placebo female 28 poor 0 2 > #Data manip from HSAUR #The baseline status, i.e., the status for month == 0, needs to > # enter the models as an explanatory variable (HSAUR) > #rearrange the data.frame respiratory in order to create a new variable baseline > resp <- subset(respiratory, month > "0") > resp$baseline <- rep(subset(respiratory, month == "0")$status, rep(4, 111)) > resp$nstat <- as.numeric(resp$status == "good") > #new variable nstat is simply a dummy coding for a poor respiratory status > resp$month <- resp$month[, drop = TRUE] > head(resp) centre treatment gender age status month subject baseline nstat 112 1 placebo female 46 poor 1 1 poor 0 223 1 placebo female 46 poor 2 1 poor 0 334 1 placebo female 46 poor 3 1 poor 0 445 1 placebo female 46 poor 4 1 poor 0 113 1 placebo female 28 poor 1 2 poor 0 224 1 placebo female 28 poor 2 2 poor 0 > str(resp) 'data.frame': 444 obs. of 9 variables: $ centre : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ... $ treatment: Factor w/ 2 levels "placebo","treatment": 1 1 1 1 1 1 1 1 2 2 ... $ gender : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 1 1 ... $ age : num 46 46 46 46 28 28 28 28 23 23 ... $ status : Factor w/ 2 levels "poor","good": 1 1 1 1 1 1 1 1 2 2 ... $ month : Ord.factor w/ 4 levels "1"<"2"<"3"<"4": 1 2 3 4 1 2 3 4 1 2 ... $ subject : Factor w/ 111 levels "1","2","3","4",..: 1 1 1 1 2 2 2 2 3 3 ... $ baseline : Factor w/ 2 levels "poor","good": 1 1 1 1 1 1 1 1 2 2 ... $ nstat : num 0 0 0 0 0 0 0 0 1 1 ... > resp$monthN = as.numeric(resp$month) # we could use this for trends but we don't > head(resp) centre treatment gender age status month subject baseline nstat monthN 112 1 placebo female 46 poor 1 1 poor 0 1 223 1 placebo female 46 poor 2 1 poor 0 2 334 1 placebo female 46 poor 3 1 poor 0 3 445 1 placebo female 46 poor 4 1 poor 0 4 113 1 placebo female 28 poor 1 2 poor 0 1 224 1 placebo female 28 poor 2 2 poor 0 2 > library(lme4) Loading required package: lattice Loading required package: Matrix > resp$statusN = as.numeric(resp$status) - 1 # we need to do this to get lmList to work correctly > resp[1:32,] centre treatment gender age status month subject baseline nstat monthN statusN 112 1 placebo female 46 poor 1 1 poor 0 1 0 223 1 placebo female 46 poor 2 1 poor 0 2 0 334 1 placebo female 46 poor 3 1 poor 0 3 0 445 1 placebo female 46 poor 4 1 poor 0 4 0 113 1 placebo female 28 poor 1 2 poor 0 1 0 224 1 placebo female 28 poor 2 2 poor 0 2 0 335 1 placebo female 28 poor 3 2 poor 0 3 0 446 1 placebo female 28 poor 4 2 poor 0 4 0 114 1 treatment female 23 good 1 3 good 1 1 1 225 1 treatment female 23 good 2 3 good 1 2 1 336 1 treatment female 23 good 3 3 good 1 3 1 447 1 treatment female 23 good 4 3 good 1 4 1 115 1 placebo female 44 good 1 4 good 1 1 1 226 1 placebo female 44 good 2 4 good 1 2 1 337 1 placebo female 44 good 3 4 good 1 3 1 448 1 placebo female 44 poor 4 4 good 0 4 0 116 1 placebo male 13 good 1 5 good 1 1 1 227 1 placebo male 13 good 2 5 good 1 2 1 338 1 placebo male 13 good 3 5 good 1 3 1 449 1 placebo male 13 good 4 5 good 1 4 1 117 1 treatment female 34 poor 1 6 poor 0 1 0 228 1 treatment female 34 poor 2 6 poor 0 2 0 339 1 treatment female 34 poor 3 6 poor 0 3 0 450 1 treatment female 34 poor 4 6 poor 0 4 0 118 1 placebo female 43 good 1 7 poor 1 1 1 229 1 placebo female 43 poor 2 7 poor 0 2 0 340 1 placebo female 43 good 3 7 poor 1 3 1 451 1 placebo female 43 good 4 7 poor 1 4 1 119 1 treatment female 28 poor 1 8 poor 0 1 0 230 1 treatment female 28 poor 2 8 poor 0 2 0 341 1 treatment female 28 poor 3 8 poor 0 3 0 452 1 treatment female 28 poor 4 8 poor 0 4 0 > respList_1 = lmList(statusN ~ 1 | subject, family = binomial, data = resp) > coef(respList_1)[1:8,] #in log-odds metric [1] -23.566069 -23.566069 23.566069 1.098612 23.566069 -23.566069 1.098612 -23.566069 > exp(coef(respList_1))[1:8,] # in odds metric [1] 5.826215e-11 5.826215e-11 1.716381e+10 3.000000e+00 1.716381e+10 5.826215e-11 3.000000e+00 5.826215e-11 > resp[1:16,] # first four cases; these match lmList results centre treatment gender age status month subject baseline nstat monthN statusN 112 1 placebo female 46 poor 1 1 poor 0 1 0 223 1 placebo female 46 poor 2 1 poor 0 2 0 334 1 placebo female 46 poor 3 1 poor 0 3 0 445 1 placebo female 46 poor 4 1 poor 0 4 0 113 1 placebo female 28 poor 1 2 poor 0 1 0 224 1 placebo female 28 poor 2 2 poor 0 2 0 335 1 placebo female 28 poor 3 2 poor 0 3 0 446 1 placebo female 28 poor 4 2 poor 0 4 0 114 1 treatment female 23 good 1 3 good 1 1 1 225 1 treatment female 23 good 2 3 good 1 2 1 336 1 treatment female 23 good 3 3 good 1 3 1 447 1 treatment female 23 good 4 3 good 1 4 1 115 1 placebo female 44 good 1 4 good 1 1 1 226 1 placebo female 44 good 2 4 good 1 2 1 337 1 placebo female 44 good 3 4 good 1 3 1 448 1 placebo female 44 poor 4 4 good 0 4 0 > attach(resp) > table(statusN, subject) subject statusN 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 0 4 4 0 1 0 4 1 4 0 2 0 1 3 4 0 3 4 1 4 2 2 0 0 3 4 4 1 2 2 4 4 0 1 2 3 4 3 4 4 4 3 3 0 4 1 4 1 0 0 4 3 4 0 3 0 4 2 4 3 1 0 4 1 0 3 0 2 2 4 4 1 0 0 3 2 2 0 0 4 3 2 1 0 1 0 0 0 1 1 4 0 3 0 subject statusN 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 0 4 0 0 1 1 3 3 1 0 1 4 1 0 1 2 3 0 4 0 0 4 0 0 2 0 4 3 3 0 0 2 0 0 2 1 0 1 0 0 0 0 2 2 1 0 4 4 3 3 1 1 3 4 3 0 3 4 3 2 1 4 0 4 4 0 4 4 2 4 0 1 1 4 4 2 4 4 2 3 4 3 4 4 4 4 2 2 subject statusN 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 0 1 4 0 4 1 0 3 0 0 1 4 1 3 0 3 3 4 0 0 0 0 0 1 3 0 4 0 3 4 1 4 4 3 0 3 1 4 1 1 0 4 4 4 4 4 #a. try to do lmList on these data to get odds(good) for each of the each 111 subjects. Investigate effectiveness of treatment. ###SFYS might do lmList for treatment and placebo and then compare to two distributions > respList_1P = lmList(statusN ~ 1 | subject, family = binomial, subset = treatment == "placebo" , data = resp) > respList_1T = lmList(statusN ~ 1 | subject, family = binomial, subset = treatment == "treatment" , data = resp) ##compare odds(good) > fivenum(exp(coef(respList_1P))[,1]) [1] 5.826215e-11 5.826215e-11 1.000000e+00 3.000000e+00 1.716381e+10 > fivenum(exp(coef(respList_1T))[,1]) [1] 5.826215e-11 3.333333e-01 3.000000e+00 1.716381e+10 1.716381e+10 > boxplot(coef(respList_1T)[,1], coef(respList_1P)[,1]) # in log-odds metric > fivenum(coef(respList_1P)[,1]) [1] -2.356607e+01 -2.356607e+01 -1.110223e-16 1.098612e+00 2.356607e+01 > fivenum(coef(respList_1T)[,1]) [1] -23.566069 -1.098612 1.098612 23.566069 23.566069 # b Use lmer analyses to compare treament and placebo. Obtain a confidence interval for effectiveness of treament. # Investigate gender differences in response to the intervention (i.e. the treatment) # In terms of our modeling: Level 1 is simply the mean (no trend) param alph_0. Level 2 says alph_0 depent on treatment indicator. > resp_lmera = glmer(status ~ treatment + (1| subject), family = binomial, data = resp) > summary(resp_lmera) Generalized linear mixed model fit by maximum likelihood ['glmerMod'] Family: binomial ( logit ) Formula: status ~ treatment + (1 | subject) Data: resp AIC BIC logLik deviance 482.6303 494.9178 -238.3152 476.6303 Random effects: Groups Name Variance Std.Dev. subject (Intercept) 6.4 2.53 Number of obs: 444, groups: subject, 111 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.4714 0.3875 -1.217 0.223775 treatmenttreatment 2.0533 0.5660 3.628 0.000286 *** # only slightly larger standard error than large model in HSAUR --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Correlation of Fixed Effects: (Intr) trtmnttrtmn -0.685 > exp(fixef(resp_lmera)) (Intercept) treatmenttreatment 0.624135 7.793621 # not far from the 8.8 from the class ex > confint(resp_lmera, method = "boot", nsim = 1000, boot.type = "perc") Computing bootstrap confidence intervals ... 2.5 % 97.5 % sd_(Intercept)|subject 1.7460690 3.2564314 (Intercept) -1.3960714 0.3816819 treatmenttreatment 0.9364879 3.6391380 > exp(.9365) [1] 2.551037 # endpoints of 95% CI for increase in odds of 'good' going from placebo to treatment > exp(3.639) [1] 38.05376 > # gender issues # one way to look at thios is to make the Level 2 model a 2x2 factorial design, where the genderXtreament interaction is the differential effectiveness for males and females > resp_lmeraG = glmer(status ~ gender*treatment + (1| subject), family = binomial, data = resp) > summary(resp_lmeraG) Generalized linear mixed model fit by maximum likelihood ['glmerMod'] Family: binomial ( logit ) Formula: status ~ gender * treatment + (1 | subject) Data: resp AIC BIC logLik deviance 484.3427 504.8218 -237.1713 474.3427 Random effects: Groups Name Variance Std.Dev. subject (Intercept) 6.324 2.515 Number of obs: 444, groups: subject, 111 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.2486 0.4587 -0.542 0.58792 gendermale -0.7533 0.8496 -0.887 0.37528 treatmenttreatment 1.6449 0.6297 2.612 0.00899 ** gendermale:treatmenttreatment 2.5456 1.7194 1.480 0.13875 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Correlation of Fixed Effects: (Intr) gndrml trtmnt gendermale -0.540 trtmnttrtmn -0.729 0.393 gndrml:trtm 0.267 -0.494 -0.366 ### so looks like there is an indication that males respond better to treatment but not significant ## preferable perhaps to simply straify on gender and do the treatment comparison within each gender. > resp_lmeraM = glmer(status ~ treatment + (1| subject), family = binomial, subset = gender == "male" , data = resp) > summary(resp_lmeraM) Generalized linear mixed model fit by maximum likelihood ['glmerMod'] Family: binomial ( logit ) Formula: status ~ treatment + (1 | subject) Data: resp Subset: gender == "male" AIC BIC logLik deviance 98.2350 105.8003 -46.1175 92.2350 Random effects: Groups Name Variance Std.Dev. subject (Intercept) 6.775 2.603 Number of obs: 92, groups: subject, 23 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -1.0278 0.7369 -1.395 0.16307 treatmenttreatment 4.2948 1.6510 2.601 0.00929 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Correlation of Fixed Effects: (Intr) trtmnttrtmn -0.446 > resp_lmeraF = glmer(status ~ treatment + (1| subject), family = binomial, subset = gender == "female" , data = resp) > summary(resp_lmeraF) Generalized linear mixed model fit by maximum likelihood ['glmerMod'] Family: binomial ( logit ) Formula: status ~ treatment + (1 | subject) Data: resp Subset: gender == "female" AIC BIC logLik deviance 388.0958 399.6867 -191.0479 382.0958 Random effects: Groups Name Variance Std.Dev. subject (Intercept) 6.228 2.496 Number of obs: 352, groups: subject, 88 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.2474 0.4557 -0.543 0.58728 treatmenttreatment 1.6365 0.6256 2.616 0.00889 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Correlation of Fixed Effects: (Intr) trtmnttrtmn -0.729 ### you can see the effect of treatment is significant for both males and females, though perhaps stronger for males. ###############part c ## Extend the lmer model in part b by adding the age and baseline measurements to the level 2 model. Compare with part b results. > resp_lmerb = glmer(status ~ treatment + baseline + age + (1| subject), family = binomial, data = resp) > summary(resp_lmerb) Generalized linear mixed model fit by maximum likelihood ['glmerMod'] Family: binomial ( logit ) Formula: status ~ treatment + baseline + age + (1 | subject) Data: resp AIC BIC logLik deviance 443.1055 463.5846 -216.5527 433.1055 Random effects: Groups Name Variance Std.Dev. subject (Intercept) 4.09 2.022 Number of obs: 444, groups: subject, 111 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -1.59513 0.77049 -2.070 0.0384 * treatmenttreatment 2.13684 0.51282 4.167 3.09e-05 *** baselinegood 3.39489 0.52020 6.526 6.75e-11 *** age -0.01387 0.01886 -0.735 0.4622 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Correlation of Fixed Effects: (Intr) trtmnt bslngd trtmnttrtmn -0.419 baselinegod -0.373 0.206 age -0.832 0.070 0.045 > exp(fixef(resp_lmerb)) (Intercept) treatmenttreatment baselinegood age 0.2028818 8.4726311 29.8113932 0.9862289 > confint(resp_lmerb, method = "boot", nsim = 1000, boot.type = "perc") Computing bootstrap confidence intervals ... 2.5 % 97.5 % sd_(Intercept)|subject 1.31101566 2.75257231 (Intercept) -3.24127917 -0.10195345 treatmenttreatment 1.08187999 3.45206820 baselinegood 2.29674612 4.81058305 age -0.05647499 0.02243799 > exp(1.0819) [1] 2.95028 > exp(3.4521) [1] 31.56661 # so extended part c model CI is a little narrower (2.95, 31.57) than (2.55, 38.05) from part b > # model comparison is more dramatic than CI comparison > anova(resp_lmera, resp_lmerb) Data: resp Models: resp_lmera: status ~ treatment + (1 | subject) resp_lmerb: status ~ treatment + baseline + age + (1 | subject) Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) resp_lmera 3 482.63 494.92 -238.31 476.63 resp_lmerb 5 443.11 463.58 -216.55 433.11 43.525 2 3.537e-10 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1