#Ed401D week 2, dichotomous outcome

# get and set up the respiratory data

####################################part a
> library(HSAUR2)
Loading required package: tools
> data(respiratory)
> head(respiratory)
    centre treatment gender age status month subject
1        1   placebo female  46   poor     0       1
112      1   placebo female  46   poor     1       1
223      1   placebo female  46   poor     2       1
334      1   placebo female  46   poor     3       1
445      1   placebo female  46   poor     4       1
2        1   placebo female  28   poor     0       2

>  #Data manip from HSAUR #The baseline status, i.e., the status for month == 0, needs to  
>  #  enter the models as an explanatory variable (HSAUR)
>  #rearrange the data.frame respiratory in order to create a new variable baseline
>  resp <- subset(respiratory, month > "0")
>  resp$baseline <- rep(subset(respiratory, month == "0")$status, rep(4, 111))
>  resp$nstat <- as.numeric(resp$status == "good")
>  #new variable nstat is simply a dummy coding for a poor respiratory status
>  resp$month <- resp$month[, drop = TRUE]
> head(resp)
    centre treatment gender age status month subject baseline nstat
112      1   placebo female  46   poor     1       1     poor     0
223      1   placebo female  46   poor     2       1     poor     0
334      1   placebo female  46   poor     3       1     poor     0
445      1   placebo female  46   poor     4       1     poor     0
113      1   placebo female  28   poor     1       2     poor     0
224      1   placebo female  28   poor     2       2     poor     0

> str(resp)
'data.frame':   444 obs. of  9 variables:
 $ centre   : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
 $ treatment: Factor w/ 2 levels "placebo","treatment": 1 1 1 1 1 1 1 1 2 2 ...
 $ gender   : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 1 1 ...
 $ age      : num  46 46 46 46 28 28 28 28 23 23 ...
 $ status   : Factor w/ 2 levels "poor","good": 1 1 1 1 1 1 1 1 2 2 ...
 $ month    : Ord.factor w/ 4 levels "1"<"2"<"3"<"4": 1 2 3 4 1 2 3 4 1 2 ...
 $ subject  : Factor w/ 111 levels "1","2","3","4",..: 1 1 1 1 2 2 2 2 3 3 ...
 $ baseline : Factor w/ 2 levels "poor","good": 1 1 1 1 1 1 1 1 2 2 ...
 $ nstat    : num  0 0 0 0 0 0 0 0 1 1 ...
> resp$monthN = as.numeric(resp$month) # we could use this for trends but we don't

> head(resp)
    centre treatment gender age status month subject baseline nstat monthN
112      1   placebo female  46   poor     1       1     poor     0      1
223      1   placebo female  46   poor     2       1     poor     0      2
334      1   placebo female  46   poor     3       1     poor     0      3
445      1   placebo female  46   poor     4       1     poor     0      4
113      1   placebo female  28   poor     1       2     poor     0      1
224      1   placebo female  28   poor     2       2     poor     0      2
> library(lme4)
Loading required package: lattice
Loading required package: Matrix



> resp$statusN = as.numeric(resp$status) - 1 # we need to do this to get lmList to work correctly
> resp[1:32,]
    centre treatment gender age status month subject baseline nstat monthN statusN
112      1   placebo female  46   poor     1       1     poor     0      1       0
223      1   placebo female  46   poor     2       1     poor     0      2       0
334      1   placebo female  46   poor     3       1     poor     0      3       0
445      1   placebo female  46   poor     4       1     poor     0      4       0
113      1   placebo female  28   poor     1       2     poor     0      1       0
224      1   placebo female  28   poor     2       2     poor     0      2       0
335      1   placebo female  28   poor     3       2     poor     0      3       0
446      1   placebo female  28   poor     4       2     poor     0      4       0
114      1 treatment female  23   good     1       3     good     1      1       1
225      1 treatment female  23   good     2       3     good     1      2       1
336      1 treatment female  23   good     3       3     good     1      3       1
447      1 treatment female  23   good     4       3     good     1      4       1
115      1   placebo female  44   good     1       4     good     1      1       1
226      1   placebo female  44   good     2       4     good     1      2       1
337      1   placebo female  44   good     3       4     good     1      3       1
448      1   placebo female  44   poor     4       4     good     0      4       0
116      1   placebo   male  13   good     1       5     good     1      1       1
227      1   placebo   male  13   good     2       5     good     1      2       1
338      1   placebo   male  13   good     3       5     good     1      3       1
449      1   placebo   male  13   good     4       5     good     1      4       1
117      1 treatment female  34   poor     1       6     poor     0      1       0
228      1 treatment female  34   poor     2       6     poor     0      2       0
339      1 treatment female  34   poor     3       6     poor     0      3       0
450      1 treatment female  34   poor     4       6     poor     0      4       0
118      1   placebo female  43   good     1       7     poor     1      1       1
229      1   placebo female  43   poor     2       7     poor     0      2       0
340      1   placebo female  43   good     3       7     poor     1      3       1
451      1   placebo female  43   good     4       7     poor     1      4       1
119      1 treatment female  28   poor     1       8     poor     0      1       0
230      1 treatment female  28   poor     2       8     poor     0      2       0
341      1 treatment female  28   poor     3       8     poor     0      3       0
452      1 treatment female  28   poor     4       8     poor     0      4       0

> respList_1 = lmList(statusN ~ 1 | subject, family = binomial, data = resp)
> coef(respList_1)[1:8,]  #in log-odds metric
[1] -23.566069 -23.566069  23.566069   1.098612  23.566069 -23.566069   1.098612 -23.566069
> exp(coef(respList_1))[1:8,]  # in odds metric
[1] 5.826215e-11 5.826215e-11 1.716381e+10 3.000000e+00 1.716381e+10 5.826215e-11 3.000000e+00 5.826215e-11

> resp[1:16,] # first four cases; these match lmList results
    centre treatment gender age status month subject baseline nstat monthN statusN
112      1   placebo female  46   poor     1       1     poor     0      1       0
223      1   placebo female  46   poor     2       1     poor     0      2       0
334      1   placebo female  46   poor     3       1     poor     0      3       0
445      1   placebo female  46   poor     4       1     poor     0      4       0
113      1   placebo female  28   poor     1       2     poor     0      1       0
224      1   placebo female  28   poor     2       2     poor     0      2       0
335      1   placebo female  28   poor     3       2     poor     0      3       0
446      1   placebo female  28   poor     4       2     poor     0      4       0
114      1 treatment female  23   good     1       3     good     1      1       1
225      1 treatment female  23   good     2       3     good     1      2       1
336      1 treatment female  23   good     3       3     good     1      3       1
447      1 treatment female  23   good     4       3     good     1      4       1
115      1   placebo female  44   good     1       4     good     1      1       1
226      1   placebo female  44   good     2       4     good     1      2       1
337      1   placebo female  44   good     3       4     good     1      3       1
448      1   placebo female  44   poor     4       4     good     0      4       0

> attach(resp)
> table(statusN, subject)
       subject
statusN 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
      0 4 4 0 1 0 4 1 4 0  2  0  1  3  4  0  3  4  1  4  2  2  0  0  3  4  4  1  2  2  4  4  0  1  2  3  4  3  4  4  4  3  3  0  4  1  4
      1 0 0 4 3 4 0 3 0 4  2  4  3  1  0  4  1  0  3  0  2  2  4  4  1  0  0  3  2  2  0  0  4  3  2  1  0  1  0  0  0  1  1  4  0  3  0
       subject
statusN 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
      0  4  0  0  1  1  3  3  1  0  1  4  1  0  1  2  3  0  4  0  0  4  0  0  2  0  4  3  3  0  0  2  0  0  2  1  0  1  0  0  0  0  2  2
      1  0  4  4  3  3  1  1  3  4  3  0  3  4  3  2  1  4  0  4  4  0  4  4  2  4  0  1  1  4  4  2  4  4  2  3  4  3  4  4  4  4  2  2
       subject
statusN 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
      0  1  4  0  4  1  0  3  0  0  1   4   1   3   0   3   3   4   0   0   0   0   0
      1  3  0  4  0  3  4  1  4  4  3   0   3   1   4   1   1   0   4   4   4   4   4

#a. try to do lmList on these data to get odds(good) for each of the each 111 subjects. Investigate effectiveness of treatment.
 
###SFYS might do lmList for treatment and placebo and then compare to two distributions
> respList_1P = lmList(statusN ~ 1 | subject, family = binomial, subset = treatment == "placebo" , data = resp)
> respList_1T = lmList(statusN ~ 1 | subject, family = binomial, subset = treatment == "treatment" , data = resp)

##compare odds(good)
> fivenum(exp(coef(respList_1P))[,1])
[1] 5.826215e-11 5.826215e-11 1.000000e+00 3.000000e+00 1.716381e+10
> fivenum(exp(coef(respList_1T))[,1])
[1] 5.826215e-11 3.333333e-01 3.000000e+00 1.716381e+10 1.716381e+10

> boxplot(coef(respList_1T)[,1], coef(respList_1P)[,1]) # in log-odds metric
> fivenum(coef(respList_1P)[,1])
[1] -2.356607e+01 -2.356607e+01 -1.110223e-16  1.098612e+00  2.356607e+01
> fivenum(coef(respList_1T)[,1])
[1] -23.566069  -1.098612   1.098612  23.566069  23.566069

# b Use lmer analyses to compare treament and placebo. Obtain a confidence interval for effectiveness of treament. 
#      Investigate gender differences in response to the intervention (i.e. the treatment)
   
# In terms of our modeling: Level 1 is simply the mean (no trend) param alph_0. Level 2 says alph_0 depent on treatment indicator.
> resp_lmera = glmer(status ~ treatment + (1| subject), family = binomial, data = resp)
> summary(resp_lmera)
Generalized linear mixed model fit by maximum likelihood ['glmerMod']
 Family: binomial ( logit )
Formula: status ~ treatment + (1 | subject) 
   Data: resp 

      AIC       BIC    logLik  deviance 
 482.6303  494.9178 -238.3152  476.6303 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 6.4      2.53    
Number of obs: 444, groups: subject, 111

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -0.4714     0.3875  -1.217 0.223775    
treatmenttreatment   2.0533     0.5660   3.628 0.000286 *** # only slightly larger standard error than large model in HSAUR
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr)
trtmnttrtmn -0.685

> exp(fixef(resp_lmera))
       (Intercept) treatmenttreatment 
          0.624135           7.793621  # not far from the 8.8 from the class ex

> confint(resp_lmera, method = "boot", nsim = 1000, boot.type = "perc")
Computing bootstrap confidence intervals ...
                            2.5 %    97.5 %
sd_(Intercept)|subject  1.7460690 3.2564314
(Intercept)            -1.3960714 0.3816819
treatmenttreatment      0.9364879 3.6391380
> exp(.9365)
[1] 2.551037    # endpoints of 95% CI for increase in odds of 'good' going from placebo to treatment
> exp(3.639)
[1] 38.05376

> # gender issues
# one way to look at thios is to make the Level 2 model a 2x2 factorial design, where the genderXtreament interaction is
the differential effectiveness for males and females

> resp_lmeraG = glmer(status ~ gender*treatment + (1| subject), family = binomial, data = resp)
> summary(resp_lmeraG)
Generalized linear mixed model fit by maximum likelihood ['glmerMod']
 Family: binomial ( logit )
Formula: status ~ gender * treatment + (1 | subject) 
   Data: resp 

      AIC       BIC    logLik  deviance 
 484.3427  504.8218 -237.1713  474.3427 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 6.324    2.515   
Number of obs: 444, groups: subject, 111

Fixed effects:
                              Estimate Std. Error z value Pr(>|z|)   
(Intercept)                    -0.2486     0.4587  -0.542  0.58792   
gendermale                     -0.7533     0.8496  -0.887  0.37528   
treatmenttreatment              1.6449     0.6297   2.612  0.00899 **
gendermale:treatmenttreatment   2.5456     1.7194   1.480  0.13875   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) gndrml trtmnt
gendermale  -0.540              
trtmnttrtmn -0.729  0.393       
gndrml:trtm  0.267 -0.494 -0.366
### so looks like there is an indication that males respond better to treatment but not significant

## preferable perhaps to simply straify on gender and do the treatment comparison within each gender.
> resp_lmeraM = glmer(status ~ treatment + (1| subject), family = binomial, subset = gender == "male" , data = resp)
> summary(resp_lmeraM)
Generalized linear mixed model fit by maximum likelihood ['glmerMod']
 Family: binomial ( logit )
Formula: status ~ treatment + (1 | subject) 
   Data: resp 
 Subset: gender == "male" 

     AIC      BIC   logLik deviance 
 98.2350 105.8003 -46.1175  92.2350 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 6.775    2.603   
Number of obs: 92, groups: subject, 23

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)   
(Intercept)         -1.0278     0.7369  -1.395  0.16307   
treatmenttreatment   4.2948     1.6510   2.601  0.00929 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr)
trtmnttrtmn -0.446

> resp_lmeraF = glmer(status ~ treatment + (1| subject), family = binomial, subset = gender == "female" , data = resp)
> summary(resp_lmeraF)
Generalized linear mixed model fit by maximum likelihood ['glmerMod']
 Family: binomial ( logit )
Formula: status ~ treatment + (1 | subject) 
   Data: resp 
 Subset: gender == "female" 

      AIC       BIC    logLik  deviance 
 388.0958  399.6867 -191.0479  382.0958 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 6.228    2.496   
Number of obs: 352, groups: subject, 88

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)   
(Intercept)         -0.2474     0.4557  -0.543  0.58728   
treatmenttreatment   1.6365     0.6256   2.616  0.00889 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr)
trtmnttrtmn -0.729
### you can see the effect of treatment is significant for both males and females, though perhaps stronger for males.

###############part c
## Extend the lmer model in part b by adding the age and baseline measurements to the level 2 model. Compare with part b results.

> resp_lmerb = glmer(status ~ treatment + baseline + age + (1| subject), family = binomial, data = resp)
> summary(resp_lmerb)
Generalized linear mixed model fit by maximum likelihood ['glmerMod']
 Family: binomial ( logit )
Formula: status ~ treatment + baseline + age + (1 | subject) 
   Data: resp 

      AIC       BIC    logLik  deviance 
 443.1055  463.5846 -216.5527  433.1055 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 4.09     2.022   
Number of obs: 444, groups: subject, 111

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -1.59513    0.77049  -2.070   0.0384 *  
treatmenttreatment  2.13684    0.51282   4.167 3.09e-05 ***
baselinegood        3.39489    0.52020   6.526 6.75e-11 ***
age                -0.01387    0.01886  -0.735   0.4622    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmnt bslngd
trtmnttrtmn -0.419              
baselinegod -0.373  0.206       
age         -0.832  0.070  0.045
> exp(fixef(resp_lmerb))
       (Intercept) treatmenttreatment       baselinegood                age 
         0.2028818          8.4726311         29.8113932          0.9862289 

> confint(resp_lmerb, method = "boot", nsim = 1000, boot.type = "perc")
Computing bootstrap confidence intervals ...
                             2.5 %      97.5 %
sd_(Intercept)|subject  1.31101566  2.75257231
(Intercept)            -3.24127917 -0.10195345
treatmenttreatment      1.08187999  3.45206820
baselinegood            2.29674612  4.81058305
age                    -0.05647499  0.02243799
> exp(1.0819)
[1] 2.95028
> exp(3.4521)
[1] 31.56661
# so extended part c model CI is a little narrower (2.95, 31.57) than (2.55, 38.05) from part b


> # model comparison is more dramatic than CI comparison
> anova(resp_lmera, resp_lmerb)
Data: resp
Models:
resp_lmera: status ~ treatment + (1 | subject)
resp_lmerb: status ~ treatment + baseline + age + (1 | subject)
           Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
resp_lmera  3 482.63 494.92 -238.31   476.63                             
resp_lmerb  5 443.11 463.58 -216.55   433.11 43.525      2  3.537e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1