Ordinal association

Job satisfaction (response) against age (predictor)

job_satis = matrix(c(34, 53, 88,
                     80, 174, 304,
                     29, 75, 172), 3, 3, byrow=TRUE) # Table 2.8
rownames(job_satis) = c('<30', '30-50', '>50')
colnames(job_satis) = c(1,2,3)
job_satis
A matrix: 3 × 3 of type dbl
123
<3034 53 88
30-5080174304
>5029 75172

Happiness and Political Ideology

happiness = matrix(c(13, 29, 15,
                     23, 59, 47,
                     14, 67, 54), 3, 3, byrow=TRUE) # Table 3.7
rownames(happiness) = c('Liberal', 'Moderate', 'Conservative')
colnames(happiness) = c('Not too happy', 'Pretty happy', 'Very happy')
happiness
sum(happiness)
A matrix: 3 × 3 of type dbl
Not too happyPretty happyVery happy
Liberal132915
Moderate235947
Conservative146754
321

Kruskal \(\gamma\)

install.packages('MESS', repos='http://cloud.r-project.org')
The downloaded binary packages are in
	/var/folders/r1/yzxk5h4j5r327ymmt5msy3n00000gr/T//RtmpfZdZnA/downloaded_packages
library(MESS)
gkgamma(happiness)
	Goodman-Kruskal's gamma for ordinal categorical data

data:  happiness
Z = 2.3386, p-value = 0.01936
95 percent confidence interval:
 0.03229489 0.33750511
sample estimates:
Goodman-Kruskal's gamma 
                 0.1849 
  • How to compute \(p\)-value?


Bootstrap to find SE

gamma = function(data_table) {
    N_c = 0
    N_d = 0
    for (i in 1:nrow(data_table)) {
        for (j in 1:ncol(data_table)) {
            for (h in 1:nrow(data_table)) {
                for (k in 1:ncol(data_table)) {
                    if ((h > i) & (k > j)) {
                       N_c = N_c + data_table[i, j] * data_table[h, k]
                    }

                }
            }
            for (h in 1:nrow(data_table)) {
                for (k in 1:ncol(data_table)) {
                    if ((h > i) & (k < j)) {
                       N_d = N_d + data_table[i, j] * data_table[h, k]
                    }
                }
            }
            
        }
    }
    return((N_c-N_d)/(N_c+N_d))
}
gamma(happiness)
0.1849

  • To bootstrap, we’ll make a flat file representation

flat_df = function(data_table) {
    flat_ = matrix(NA, sum(data_table), 2)
    idx = 1
    for(i in 1:nrow(data_table)) {
        for (j in 1:ncol(data_table)) {
            for (k in 1:data_table[i,j]) {
            flat_[idx, 1] = i
            flat_[idx, 2] = j
            idx = idx + 1
            }
        }
    }
    flat_ = data.frame(X=flat_[,1], Y=flat_[,2])
    return(flat_)
}

df_job_satis = flat_df(job_satis)
table(df_job_satis)
job_satis
   Y
X     1   2   3
  1  34  53  88
  2  80 174 304
  3  29  75 172
A matrix: 3 × 3 of type dbl
123
<3034 53 88
30-5080174304
>5029 75172
df_happiness = flat_df(happiness)
table(df_happiness)
happiness
   Y
X    1  2  3
  1 13 29 15
  2 23 59 47
  3 14 67 54
A matrix: 3 × 3 of type dbl
Not too happyPretty happyVery happy
Liberal132915
Moderate235947
Conservative146754

  • Equivalently we could sample multinomial from observed frequencies.

  • Resample rows with replacement from our flatfile

B = 2000
df_happiness = flat_df(happiness)
gamma_star = numeric(B)
for (b in 1:B) {
    idx_star = sample(1:nrow(df_happiness), nrow(df_happiness), replace=TRUE)
    df_star = df_happiness[idx_star,]
    gamma_star[b] = gamma(table(df_star))
}
sd(gamma_star)
0.0774743588459348

Compare p-values

Z = gamma(happiness) / sd(gamma_star)
Z
P = 2 * (1 - pnorm(abs(Z)))
P
gkgamma(happiness)
2.38659606551493
0.0170051639094564
	Goodman-Kruskal's gamma for ordinal categorical data

data:  happiness
Z = 2.3386, p-value = 0.01936
95 percent confidence interval:
 0.03229489 0.33750511
sample estimates:
Goodman-Kruskal's gamma 
                 0.1849 

A Pearson correlation coefficient

cor(df_happiness)
cor(df_happiness)[1,2]^2
A matrix: 2 × 2 of type dbl
XY
X1.00000000.1352282
Y0.13522821.0000000
0.0182866684706643
summary(lm(Y ~ X, data=df_happiness))
Call:
lm(formula = Y ~ X, data = df_happiness)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3017 -0.3017 -0.1748  0.6983  0.9522 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.92093    0.12288  15.633   <2e-16 ***
X            0.12692    0.05207   2.438   0.0153 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6849 on 319 degrees of freedom
Multiple R-squared:  0.01829,	Adjusted R-squared:  0.01521 
F-statistic: 5.942 on 1 and 319 DF,  p-value: 0.01533
M = 319*0.13522^2
M
5.8327390396
(gamma(happiness) / sd(gamma_star))^2
5.69584077993136
chisq.test(happiness)
	Pearson's Chi-squared test

data:  happiness
X-squared = 7.0681, df = 4, p-value = 0.1323