Ordinal association¶

Job satisfaction (response) against age (predictor)¶

job_satis = matrix(c(34, 53, 88,
                     80, 174, 304,
                     29, 75, 172), 3, 3, byrow=TRUE) # Table 2.8
rownames(job_satis) = c('<30', '30-50', '>50')
colnames(job_satis) = c(1,2,3)
job_satis

A matrix: 3 × 3 of type dbl
	1	2	3
<30	34	53	88
30-50	80	174	304
>50	29	75	172

Happiness and Political Ideology¶

happiness = matrix(c(13, 29, 15,
                     23, 59, 47,
                     14, 67, 54), 3, 3, byrow=TRUE) # Table 3.7
rownames(happiness) = c('Liberal', 'Moderate', 'Conservative')
colnames(happiness) = c('Not too happy', 'Pretty happy', 'Very happy')
happiness
sum(happiness)

A matrix: 3 × 3 of type dbl
	Not too happy	Pretty happy	Very happy
Liberal	13	29	15
Moderate	23	59	47
Conservative	14	67	54

321

Kruskal \(\gamma\)¶

install.packages('MESS', repos='http://cloud.r-project.org')

The downloaded binary packages are in
	/var/folders/r1/yzxk5h4j5r327ymmt5msy3n00000gr/T//RtmpfZdZnA/downloaded_packages

library(MESS)
gkgamma(happiness)

	Goodman-Kruskal's gamma for ordinal categorical data

data:  happiness
Z = 2.3386, p-value = 0.01936
95 percent confidence interval:
 0.03229489 0.33750511
sample estimates:
Goodman-Kruskal's gamma 
                 0.1849 

How to compute \(p\)-value?

Bootstrap to find SE¶

gamma = function(data_table) {
    N_c = 0
    N_d = 0
    for (i in 1:nrow(data_table)) {
        for (j in 1:ncol(data_table)) {
            for (h in 1:nrow(data_table)) {
                for (k in 1:ncol(data_table)) {
                    if ((h > i) & (k > j)) {
                       N_c = N_c + data_table[i, j] * data_table[h, k]
                    }

                }
            }
            for (h in 1:nrow(data_table)) {
                for (k in 1:ncol(data_table)) {
                    if ((h > i) & (k < j)) {
                       N_d = N_d + data_table[i, j] * data_table[h, k]
                    }
                }
            }
            
        }
    }
    return((N_c-N_d)/(N_c+N_d))
}
gamma(happiness)

0.1849

To bootstrap, we’ll make a flat file representation

flat_df = function(data_table) {
    flat_ = matrix(NA, sum(data_table), 2)
    idx = 1
    for(i in 1:nrow(data_table)) {
        for (j in 1:ncol(data_table)) {
            for (k in 1:data_table[i,j]) {
            flat_[idx, 1] = i
            flat_[idx, 2] = j
            idx = idx + 1
            }
        }
    }
    flat_ = data.frame(X=flat_[,1], Y=flat_[,2])
    return(flat_)
}

df_job_satis = flat_df(job_satis)
table(df_job_satis)
job_satis

   Y
X     1   2   3
  1  34  53  88
  2  80 174 304
  3  29  75 172

A matrix: 3 × 3 of type dbl
	1	2	3
<30	34	53	88
30-50	80	174	304
>50	29	75	172

df_happiness = flat_df(happiness)
table(df_happiness)
happiness

   Y
X    1  2  3
  1 13 29 15
  2 23 59 47
  3 14 67 54

A matrix: 3 × 3 of type dbl
	Not too happy	Pretty happy	Very happy
Liberal	13	29	15
Moderate	23	59	47
Conservative	14	67	54

Equivalently we could sample multinomial from observed frequencies.
Resample rows with replacement from our flatfile

B = 2000
df_happiness = flat_df(happiness)
gamma_star = numeric(B)
for (b in 1:B) {
    idx_star = sample(1:nrow(df_happiness), nrow(df_happiness), replace=TRUE)
    df_star = df_happiness[idx_star,]
    gamma_star[b] = gamma(table(df_star))
}

sd(gamma_star)

0.0774743588459348

Compare p-values¶

Z = gamma(happiness) / sd(gamma_star)
Z
P = 2 * (1 - pnorm(abs(Z)))
P
gkgamma(happiness)

2.38659606551493

0.0170051639094564

	Goodman-Kruskal's gamma for ordinal categorical data

data:  happiness
Z = 2.3386, p-value = 0.01936
95 percent confidence interval:
 0.03229489 0.33750511
sample estimates:
Goodman-Kruskal's gamma 
                 0.1849 

A Pearson correlation coefficient¶

cor(df_happiness)
cor(df_happiness)[1,2]^2

A matrix: 2 × 2 of type dbl
	X	Y
X	1.0000000	0.1352282
Y	0.1352282	1.0000000

0.0182866684706643

summary(lm(Y ~ X, data=df_happiness))

Call:
lm(formula = Y ~ X, data = df_happiness)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3017 -0.3017 -0.1748  0.6983  0.9522 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.92093    0.12288  15.633   <2e-16 ***
X            0.12692    0.05207   2.438   0.0153 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6849 on 319 degrees of freedom
Multiple R-squared:  0.01829,	Adjusted R-squared:  0.01521 
F-statistic: 5.942 on 1 and 319 DF,  p-value: 0.01533

M = 319*0.13522^2
M

5.8327390396

(gamma(happiness) / sd(gamma_star))^2

5.69584077993136

chisq.test(happiness)

	Pearson's Chi-squared test

data:  happiness
X-squared = 7.0681, df = 4, p-value = 0.1323

STATS 305B

Ordinal association¶

Job satisfaction (response) against age (predictor)¶

Happiness and Political Ideology¶

Kruskal \(\gamma\)¶

Bootstrap to find SE¶

Compare p-values¶

A Pearson correlation coefficient¶