Coding for Multi-Group Analyses
Import data and change variable names.
tbl.2 <- read_xlsx(here("data", "BEER.xlsx"))
colnames(tbl.2) <- c("bubb", 'beer', 'salt')
View data table
ks <- function(t, c) {
kable(t, caption = c, format = 'html', escape = F, digits = 3) %>%
kable_styling(full_width = F, position = 'left', fixed_thead = T)
}
ks(tbl.2[1:10,], 'Beer Data (first 10 observations)')
Beer Data (first 10 observations)
bubb
|
beer
|
salt
|
12
|
guinness
|
no
|
21
|
guinness
|
no
|
5
|
guinness
|
no
|
10
|
guinness
|
no
|
20
|
guinness
|
no
|
13
|
guinness
|
no
|
24
|
heineken
|
no
|
17
|
heineken
|
no
|
26
|
heineken
|
no
|
16
|
heineken
|
no
|
Dummy Coding
Dummy code variables
tbl.2 <- tbl.2 %>%
mutate(beer.f = factor(beer),
salt.f = factor(salt))
# rename columns for each single-column matrix in 'a' with its name in a
reCol <- function(a) {
n <- names(a)
for (i in 1:length(a)) {
colnames(a[[i]]) <- c(str_sub(n[i], end = 4L))
}
a
}
tbl.2 %>%
select_if(is.factor) %>%
map(contrasts) %>%
reCol() %>%
ks("Dummy Codes")
Dummy Codes
|
beer
|
guinness
|
0
|
heineken
|
1
|
|
|
Analyze dummy-coded data
fit.2.a <- lm(bubb ~ beer.f + salt.f +
beer.f * salt.f, tbl.2)
fit.2.a %>%
tidy() %>%
ks('Regression: Interaction of Beer and Salt')
Regression: Interaction of Beer and Salt
term
|
estimate
|
std.error
|
statistic
|
p.value
|
(Intercept)
|
13.500
|
2.332
|
5.789
|
0.000
|
beer.fheineken
|
9.667
|
3.298
|
2.931
|
0.008
|
salt.fyes
|
7.167
|
3.298
|
2.173
|
0.042
|
beer.fheineken:salt.fyes
|
16.833
|
4.664
|
3.609
|
0.002
|
tbl.2.e <- tibble(beer.f = factor(c("guinness", "guinness",
"heineken", "heineken")),
salt.f = factor(c("no", 'yes', 'no', 'yes')))
tbl.2.b <- predict(fit.2.a, newdata = tbl.2.e) %>%
set_names(c("irish", "irish & salty",
"dutch", "dutch & salty")) %>%
enframe(name = "combo", value = 'bubbles')
tbl.2.b %>%
ks('Predicted Number of Bubbles')
Predicted Number of Bubbles
combo
|
bubbles
|
irish
|
13.500
|
irish & salty
|
20.667
|
dutch
|
23.167
|
dutch & salty
|
47.167
|
1:2 %>%
map_dbl(~ tbl.2.b$bubbles[.*2] / tbl.2.b$bubbles[.]) %>%
set_names(c('guinness','heineken')) %>%
enframe("beer", "salt multiplier") %>%
ks('Effect of Salt by Beer')
Effect of Salt by Beer
beer
|
salt multiplier
|
guinness
|
1.531
|
heineken
|
2.282
|
Contrast Coding
Contrast code variables
tbl.2 <- tbl.2 %>%
mutate_if(~ is.factor(.) && contrasts(.)[1] != -.5, list(c = ~ C(., (contr.sum(2)/(-2))))) %>%
rename_at(vars(contains('_c')),
list(~ paste(gsub('.f_c', '', .), 'c', sep = '.')))
tbl.2 %>%
select_at(vars(ends_with('.c'))) %>%
map(contrasts) %>%
reCol %>%
ks('Contrast Code')
Contrast Code
|
beer
|
guinness
|
-0.5
|
heineken
|
0.5
|
|
|
Analyze contrast-coded data
fit.2.b <- lm(bubb ~ beer.c + salt.c +
beer.c * salt.c, tbl.2)
fit.2.b %>%
tidy() %>%
ks('Regression: Interaction of Contrast-Coded Beer and Salt')
Regression: Interaction of Contrast-Coded Beer and Salt
term
|
estimate
|
std.error
|
statistic
|
p.value
|
(Intercept)
|
26.125
|
1.166
|
22.407
|
0.000
|
beer.c1
|
18.083
|
2.332
|
7.755
|
0.000
|
salt.c1
|
15.583
|
2.332
|
6.683
|
0.000
|
beer.c1:salt.c1
|
16.833
|
4.664
|
3.609
|
0.002
|
mean(tbl.2$bubb)
## [1] 26.125
tbl.2.c <- tbl.2.e %>%
mutate_all(~ C(., (contr.sum(2)/(-2)))) %>%
rename_all(~ paste(gsub('.f', '', .), 'c', sep = '.'))
tbl.2.d <- predict(object = fit.2.b, newdata = tbl.2.c) %>%
set_names(c("irish", "irish & salty",
"dutch", "dutch & salty")) %>%
enframe(name = "combo", value = 'bubbles')
## Warning: contrasts dropped from factor beer.c
## Warning: contrasts dropped from factor salt.c
tbl.2.d %>%
ks('Predicted Number of Bubbles (Contrast Codes)')
Predicted Number of Bubbles (Contrast Codes)
combo
|
bubbles
|
irish
|
13.500
|
irish & salty
|
20.667
|
dutch
|
23.167
|
dutch & salty
|
47.167
|
1:2 %>%
map_dbl(~ tbl.2.d$bubbles[.*2] / tbl.2.d$bubbles[.]) %>%
set_names(c('guinness','heineken')) %>%
enframe("beer", "salt multiplier") %>%
ks('Effect of Salt by Beer (Contrast Codes)')
Effect of Salt by Beer (Contrast Codes)
beer
|
salt multiplier
|
guinness
|
1.531
|
heineken
|
2.282
|
Probing Interactions
Import data and change variable names
tbl.3 <- read_xlsx(here('data', 'wfint.xlsx')) %>%
rename_all(tolower)
View data
ks(tbl.3[1:10,], 'Work and Family Data (First 10 Observations)')
Work and Family Data (First 10 Observations)
anx
|
wacon
|
warel
|
wasec
|
facon
|
farel
|
fasec
|
gen
|
wicon
|
wirel
|
wisec
|
ficon
|
firel
|
fisec
|
4.50
|
4.25
|
6.25
|
3.25
|
4.50
|
6.75
|
6.75
|
1
|
7.00
|
9.25
|
6.00
|
8.00
|
10.00
|
9.50
|
2.50
|
3.25
|
3.25
|
4.25
|
6.00
|
5.25
|
5.50
|
0
|
6.75
|
5.75
|
7.75
|
8.75
|
8.00
|
7.25
|
1.50
|
5.25
|
5.50
|
6.00
|
5.50
|
6.25
|
6.50
|
0
|
7.50
|
8.25
|
9.50
|
7.50
|
9.00
|
9.75
|
3.50
|
3.00
|
4.00
|
2.00
|
5.75
|
5.25
|
5.25
|
1
|
7.25
|
6.25
|
9.00
|
7.75
|
9.50
|
10.00
|
1.75
|
5.50
|
1.75
|
3.50
|
7.00
|
1.00
|
1.00
|
0
|
7.50
|
1.25
|
4.50
|
10.00
|
1.00
|
1.00
|
1.75
|
3.75
|
5.00
|
5.50
|
5.75
|
3.25
|
2.50
|
0
|
6.75
|
6.50
|
8.75
|
9.50
|
9.75
|
9.00
|
2.75
|
4.50
|
4.50
|
4.25
|
5.00
|
6.00
|
6.25
|
0
|
7.25
|
6.75
|
9.25
|
7.25
|
9.00
|
9.25
|
4.00
|
6.75
|
2.75
|
7.00
|
4.50
|
4.75
|
5.00
|
1
|
9.75
|
4.00
|
10.00
|
6.50
|
9.00
|
9.00
|
5.25
|
3.75
|
5.50
|
4.00
|
5.00
|
5.00
|
5.00
|
1
|
8.00
|
9.00
|
9.00
|
7.25
|
9.00
|
9.25
|
2.25
|
4.75
|
4.25
|
3.00
|
5.75
|
6.00
|
5.50
|
1
|
8.25
|
7.00
|
8.75
|
6.75
|
9.25
|
8.75
|
a. Actual Predictors
Analysis of ‘actual’ predictors, i.e., actual amount of control, relationship quality, and security at work and with the family
tbl.3 %>%
names() %>%
grep('wa|fa', ., value = TRUE) %>%
map(~ paste('anx ~', .) %>% as.formula()) %>%
map_df(~ lm(., tbl.3) %>% tidy()) %>%
ks('Regression Output for Anxiety and Actual Control, Relationship Quality, and Security')
Regression Output for Anxiety and Actual Control, Relationship Quality, and Security
term
|
estimate
|
std.error
|
statistic
|
p.value
|
(Intercept)
|
3.231
|
0.112
|
28.967
|
0
|
wacon
|
-0.109
|
0.024
|
-4.591
|
0
|
(Intercept)
|
3.817
|
0.130
|
29.416
|
0
|
warel
|
-0.219
|
0.026
|
-8.530
|
0
|
(Intercept)
|
3.362
|
0.092
|
36.612
|
0
|
wasec
|
-0.130
|
0.018
|
-7.161
|
0
|
(Intercept)
|
3.832
|
0.149
|
25.663
|
0
|
facon
|
-0.206
|
0.028
|
-7.472
|
0
|
(Intercept)
|
3.900
|
0.153
|
25.482
|
0
|
farel
|
-0.207
|
0.027
|
-7.736
|
0
|
(Intercept)
|
3.984
|
0.140
|
28.461
|
0
|
fasec
|
-0.226
|
0.025
|
-9.098
|
0
|
b. Moderation by Gender
Analysis of gender as a moderating variable of effects on anxiety. Continuing to use ‘actual’ predictors.
Significant interactions are automatically highlighted with green text.
tbl.3.b <- tbl.3 %>%
names() %>%
grep('wa|fa', ., value = TRUE) %>%
map(~ paste('anx ~', ., '+ gen +', ., ' * gen') %>% as.formula()) %>%
map_df(~ lm(., tbl.3) %>% tidy())
tbl.3.b %>%
mutate(estimate = cell_spec(round(estimate, 3), "html",
color = ifelse(grepl(':', term) &
(p.value < .05),
"green", "black"))) %>%
ks('Regression Output for Moderation by Gender')
Regression Output for Moderation by Gender
term
|
estimate
|
std.error
|
statistic
|
p.value
|
(Intercept)
|
3.135
|
0.195
|
16.058
|
0.000
|
wacon
|
-0.111
|
0.041
|
-2.720
|
0.007
|
gen
|
0.126
|
0.238
|
0.531
|
0.596
|
wacon:gen
|
0.008
|
0.050
|
0.155
|
0.877
|
(Intercept)
|
3.553
|
0.222
|
16.016
|
0.000
|
warel
|
-0.195
|
0.045
|
-4.304
|
0.000
|
gen
|
0.469
|
0.273
|
1.716
|
0.086
|
warel:gen
|
-0.05
|
0.055
|
-0.909
|
0.364
|
(Intercept)
|
3.22
|
0.152
|
21.162
|
0.000
|
wasec
|
-0.129
|
0.031
|
-4.155
|
0.000
|
gen
|
0.256
|
0.191
|
1.344
|
0.179
|
wasec:gen
|
-0.009
|
0.038
|
-0.238
|
0.812
|
(Intercept)
|
3.284
|
0.251
|
13.083
|
0.000
|
facon
|
-0.128
|
0.047
|
-2.693
|
0.007
|
gen
|
0.902
|
0.312
|
2.894
|
0.004
|
facon:gen
|
-0.131
|
0.058
|
-2.248
|
0.025
|
(Intercept)
|
3.138
|
0.248
|
12.650
|
0.000
|
farel
|
-0.093
|
0.044
|
-2.126
|
0.034
|
gen
|
1.251
|
0.314
|
3.985
|
0.000
|
farel:gen
|
-0.188
|
0.055
|
-3.400
|
0.001
|
(Intercept)
|
3.323
|
0.233
|
14.286
|
0.000
|
fasec
|
-0.128
|
0.041
|
-3.085
|
0.002
|
gen
|
1.033
|
0.290
|
3.558
|
0.000
|
fasec:gen
|
-0.155
|
0.052
|
-2.993
|
0.003
|
Compute the intercepts and slopes
fun.3.b <- function(i, t) {
v.e <- t[["estimate"]]
v.t <- t[["term"]]
s.m <- v.e[i]
i.m <- v.e[i - 1]
s.w <- s.m + v.e[i + 2]
i.w <- i.m + v.e[i + 1]
result <- list(term = v.t[[i]],
intercept0 = i.m, slope0 = s.m,
intercept1 = i.w, slope1 = s.w)
result
}
tbl.3.b.2 <- seq(2, 6 * 4, 4) %>%
map_df(~ fun.3.b(., tbl.3.b))
tbl.3.b.2 %>%
kable(col.names = c("attribute",
"intercept", "slope",
"intercept", "slope"),
digits = 3) %>%
kable_styling("striped", position = "left", full_width = F) %>%
add_header_above(c(" " = 1, "Men" = 2, "Women" = 2))
|
Men
|
Women
|
attribute
|
intercept
|
slope
|
intercept
|
slope
|
wacon
|
3.135
|
-0.111
|
3.261
|
-0.103
|
warel
|
3.553
|
-0.195
|
4.022
|
-0.245
|
wasec
|
3.220
|
-0.129
|
3.476
|
-0.139
|
facon
|
3.284
|
-0.128
|
4.187
|
-0.258
|
farel
|
3.138
|
-0.093
|
4.389
|
-0.281
|
fasec
|
3.323
|
-0.128
|
4.356
|
-0.283
|
Plot the simple intercepts and slopes
vec.3.b <- c('need', 'intercept', 'slope')
tbl.3.b.3 <- tbl.3.b.2 %>%
{list(men = select(., 1:3), women = select(., c(1, 4:5)))} %>%
map(~ set_colnames(., vec.3.b)) %>%
bind_rows() %>%
add_column(gen = factor(c(rep("male", 6), rep('female', 6))))
tbl.3.b.3 %>%
filter(grepl('fa', need)) %>%
mutate(new = paste(need, ifelse(gen == 'male',
'-m', '-f'), sep = '')) %>%
ggplot(aes(x = 1, y = intercept,
xend = 7, yend = intercept + slope * 7,
color = gen)) +
geom_segment() +
# geom_dl(aes(label = new, color = gen),
# method = list(dl.trans(x = x - 1.5), 'last.bumpup')) +
scale_x_continuous(expand = expand_scale(.25)) +
scale_y_continuous(expand = expand_scale(.1)) +
labs(x = "predictor", y = "anxiety", title = "Plot of Simple Slopes for Family-Related Predictors") +
scale_color_discrete(name = "gender")
## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
c. Moderation by Importance
Analysis of importance as a moderating variable of effects on anxiety.
Significant interactions are automatically highlighted with green text.
tbl.3.c.a <- tbl.3 %>%
names() %>%
grep('wa|fa', ., value = TRUE) %>%
map2(grep('wi|fi', names(tbl.3), value = TRUE),
~ paste('anx ~', .x, '+', .y, '+',
.x, '*', .y) %>% as.formula()) %>%
map_df(~ lm(., tbl.3) %>% tidy())
tbl.3.c.a %>%
mutate(estimate = cell_spec(round(estimate, 3), "html",
color = ifelse(grepl(':', term) &
(p.value < .05),
"green", "black"))) %>%
ks('Regression Output for Moderation by Importance')
Regression Output for Moderation by Importance
term
|
estimate
|
std.error
|
statistic
|
p.value
|
(Intercept)
|
2.034
|
0.500
|
4.067
|
0.000
|
wacon
|
0.032
|
0.113
|
0.283
|
0.777
|
wicon
|
0.185
|
0.068
|
2.735
|
0.006
|
wacon:wicon
|
-0.023
|
0.015
|
-1.588
|
0.112
|
(Intercept)
|
2.979
|
0.521
|
5.720
|
0.000
|
warel
|
-0.085
|
0.110
|
-0.771
|
0.441
|
wirel
|
0.126
|
0.071
|
1.762
|
0.078
|
warel:wirel
|
-0.02
|
0.015
|
-1.396
|
0.163
|
(Intercept)
|
2.721
|
0.431
|
6.315
|
0.000
|
wasec
|
-0.057
|
0.094
|
-0.606
|
0.545
|
wisec
|
0.077
|
0.049
|
1.587
|
0.113
|
wasec:wisec
|
-0.009
|
0.010
|
-0.858
|
0.391
|
(Intercept)
|
2.341
|
0.645
|
3.628
|
0.000
|
facon
|
0.039
|
0.130
|
0.301
|
0.763
|
ficon
|
0.208
|
0.081
|
2.558
|
0.011
|
facon:ficon
|
-0.034
|
0.016
|
-2.163
|
0.031
|
(Intercept)
|
2.668
|
0.620
|
4.304
|
0.000
|
farel
|
-0.052
|
0.134
|
-0.386
|
0.699
|
firel
|
0.168
|
0.072
|
2.339
|
0.019
|
farel:firel
|
-0.022
|
0.015
|
-1.528
|
0.127
|
(Intercept)
|
2.598
|
0.540
|
4.808
|
0.000
|
fasec
|
-0.019
|
0.116
|
-0.163
|
0.871
|
fisec
|
0.182
|
0.063
|
2.871
|
0.004
|
fasec:fisec
|
-0.028
|
0.013
|
-2.130
|
0.033
|
Compute the intercepts and slopes
fun.3.c <- function(i, t, data) {
v.e <- t[["estimate"]] # vector of coefficient estimates
v.t <- t[["term"]] # vector of terms
m.t <- v.t[[i + 1]] # name of moderator
m <- data[[m.t]] # vector of moderator data
s.m <- sd(m) # standard deviation of moderator
x.m <- mean(m) # mean of moderator
hi <- x.m + s.m # +1 SD level of moderator
med <- x.m # mean level of moderator
lo <- x.m - s.m # -1 SD level of moderator
int <- v.e[i - 1] # intercept estimate
me <- v.e[i] # main effect estimate
mo <- v.e[i + 1] # main effect of moderator
pr <- v.e[i + 2] # coeffient of product term estimate
s.h <- me + pr * hi # +1 SD: slope
i.h <- int + mo * hi # +1 SD: intercept
s.m <- me + pr * med # mean: slope
i.m <- int + mo * med # mean: int
s.l <- me + pr * lo # -1 SD: slope
i.l <- int + mo * lo # -1 SD: int
list(term = v.t[[i]], # result
int.low = i.l, slo.low = s.l,
int.med = i.m, slo.med = s.m,
int.hi = i.h, slo.hi = s.h)
}
tbl.3.c.2 <- seq(2, 6 * 4, 4) %>%
map_df(~ fun.3.c(., tbl.3.c.a, tbl.3))
headers <- c('Intercept', 'Slope')
tbl.3.c.2 %>%
kable(col.names = c("Attribute", rep(headers, 3)), digits = 3) %>%
kable_styling("striped", position = "left", full_width = F) %>%
add_header_above(c(" " = 1, "Low (-1 SD)" = 2, "Medium (Mean)" = 2, 'High (+1 SD)' = 2))
|
Low (-1 SD)
|
Medium (Mean)
|
High (+1 SD)
|
Attribute
|
Intercept
|
Slope
|
Intercept
|
Slope
|
Intercept
|
Slope
|
wacon
|
3.099
|
-0.102
|
3.375
|
-0.137
|
3.651
|
-0.172
|
warel
|
3.688
|
-0.199
|
3.889
|
-0.232
|
4.089
|
-0.264
|
wasec
|
3.245
|
-0.117
|
3.381
|
-0.133
|
3.518
|
-0.149
|
facon
|
3.639
|
-0.173
|
3.971
|
-0.227
|
4.303
|
-0.281
|
farel
|
3.949
|
-0.222
|
4.162
|
-0.251
|
4.376
|
-0.279
|
fasec
|
3.933
|
-0.221
|
4.186
|
-0.259
|
4.439
|
-0.298
|
Plot the simple intercepts and slopes
vec.3.c <- c('need', 'intercept', 'slope')
tbl.3.c.3 <- tbl.3.c.2 %>%
{list(low = select(., 1:3),
medium = select(., c(1, 4:5)),
high = select(., c(1, 6:7)))} %>%
map(~ set_colnames(., vec.3.b)) %>%
bind_rows() %>%
add_column(lvl = ordered(c(rep("low", 6),
rep('medium', 6),
rep('high', 6)),
levels = c('low', 'medium', 'high')))
tbl.3.c.3 %>%
filter(grepl('facon|fasec', need)) %>%
mutate(new = paste(need, ifelse(lvl == 'low', '-l',
ifelse(lvl == 'medium', '-m',
'-h')), sep = '')) %>%
ggplot(aes(x = 1, y = intercept,
xend = 7, yend = intercept + slope * 7,
color = lvl)) +
geom_segment() +
# geom_dl(aes(label = new, color = lvl),
# method = list(dl.trans(x = x - 1.5), 'last.bumpup')) +
scale_x_continuous(expand = expand_scale(.25)) +
scale_y_continuous(expand = expand_scale(.1)) +
labs(x = "predictor", y = "anxiety", title = "Plot of Simple Slopes for Significant Interactions with Importance") +
scale_color_discrete(name = "Level")
## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
ggsave("imp_simple.png", last_plot(), width = 6, height = 4.5, dpi = 600)