In this first document, I am going to create mock data, produce descriptive statistics, and analyze the reliabilities of the measures.
Before anything else, I need to import the packages I will be using.
# import packages
library(tidyverse)
library(flextable)
library(psych)
library(lavaan)
library(semTools)
The first step I will take is to initalize a fake dataset that I expect to look like our actual dataset once we have it. I’ll start with the manipulations and checks.
# set sample size
= 400
n
# create a dataframe with first 5 variables
<- tibble(
mydata id = 1:n,
interdep = rbinom(n, 1, .5),
disclose = rbinom(n, 1, .5),
intcheck = sample.int(4, n, T, c(.4, .4, .1, .1)),
discheck = sample.int(4, n, T, c(.4, .4, .1, .1))
)
Now, I will add the measures (items).
<- function(x) {
bound ifelse(
< -2, -2,
x ifelse(
> 2, 2, x
x
)
)
}
# define a function to randomly sample along Likert scale
<- function(n, mode) {
likert <- map_dbl(1:5, ~ (2 ** (mode - abs(mode - .))))
weights <- sample(-2:2, n, T,
seed map_dbl(weights, ~ (. / sum(weights))))
map_dfc(
1:4,
~ bound(seed + sample(-1:1, n, T))
)
}
# add a column of fake data for each item
<- mydata %>%
mydata full_join(
map_dfc(c(5, 1, 4), ~ likert(400, .)) %>%
add_column(id = 1:400),
"id"
)
# name columns for items
names(mydata)[6:17] <- c("aff", "cog", "lik") %>%
map(~ map(1:4, ~ paste(.y, .x, sep=""), .y = .x)) %>%
unlist
Let’s see how that looks. I’m going to create a simple function to format a table.
# turn dataframe into html table
<- function(data) {
formatAsTable %>%
data %>%
flextable color(color = "white", part = "all") %>%
autofit }
I have a feeling this function will be useful in future documents, so I will save it to an R object to use again later.
%>%
formatAsTable saveRDS("format.rds")
Now let’s look at the data.
%>%
mydata %>%
head formatAsTable
id | interdep | disclose | intcheck | discheck | aff1 | aff2 | aff3 | aff4 | cog1 | cog2 | cog3 | cog4 | lik1 | lik2 | lik3 | lik4 |
1 | 0 | 1 | 2 | 1 | 1 | 1 | 0 | 1 | -2 | -1 | -1 | -2 | 2 | 1 | 2 | 2 |
2 | 1 | 1 | 4 | 2 | 1 | 0 | 0 | 2 | -1 | -1 | 0 | 1 | 0 | 0 | 0 | 0 |
3 | 1 | 1 | 1 | 3 | 2 | 2 | 1 | 2 | -2 | -2 | -1 | -2 | 1 | -1 | 1 | 0 |
4 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 1 | -1 | -2 | -2 | -2 | 1 | 1 | 2 | 2 |
5 | 0 | 1 | 1 | 2 | 2 | 2 | 1 | 2 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | -1 |
6 | 0 | 1 | 2 | 1 | 1 | 2 | 0 | 0 | -1 | -1 | -2 | -1 | 0 | 0 | -1 | 1 |
We are in business now.
Before moving on, I will export the mock data to a file so I can access it from other pages.
write_csv(mydata, file.path("..", "github", "thesis", "mock.csv"))
I’m now going to examine some descriptive statistics for the data, including the means and standard deviations of each variable. The manipulation check variables are not interpretable as even conceptually continuous, so I will exclude them for the moment.
# subset without id or checks
<- mydata[-c(1, 4, 5)]
cont.data
%>%
cont.data describe(fast = T) %>%
mutate(vars = names(cont.data)) %>%
formatAsTable
vars | n | mean | sd | min | max | range | se |
interdep | 400 | 0.4675 | 0.4995675 | 0 | 1 | 1 | 0.02497837 |
disclose | 400 | 0.5425 | 0.4988144 | 0 | 1 | 1 | 0.02494072 |
aff1 | 400 | 0.9225 | 1.1832133 | -2 | 2 | 4 | 0.05916067 |
aff2 | 400 | 0.9350 | 1.1485089 | -2 | 2 | 4 | 0.05742544 |
aff3 | 400 | 0.9400 | 1.1596387 | -2 | 2 | 4 | 0.05798193 |
aff4 | 400 | 0.9800 | 1.1326107 | -2 | 2 | 4 | 0.05663054 |
cog1 | 400 | -1.0025 | 1.1183114 | -2 | 2 | 4 | 0.05591557 |
cog2 | 400 | -1.0400 | 1.1164749 | -2 | 2 | 4 | 0.05582375 |
cog3 | 400 | -1.0550 | 1.1090762 | -2 | 2 | 4 | 0.05545381 |
cog4 | 400 | -1.0025 | 1.1405024 | -2 | 2 | 4 | 0.05702512 |
lik1 | 400 | 0.6125 | 1.2231451 | -2 | 2 | 4 | 0.06115726 |
lik2 | 400 | 0.6425 | 1.1696641 | -2 | 2 | 4 | 0.05848320 |
lik3 | 400 | 0.6325 | 1.2066830 | -2 | 2 | 4 | 0.06033415 |
lik4 | 400 | 0.5700 | 1.1889634 | -2 | 2 | 4 | 0.05944817 |
Now we can quickly check the frequencies for the manipulation checks.
The correct answers are 1 and 2 for experimental and control conditions, respectively. I will add a new variable that represents whether the participants responded correctly.
<- function(iv, check) iv == -check + 2
isCorrect
<- mydata %>%
mydata mutate(
intcorrect = isCorrect(interdep, intcheck),
discorrect = isCorrect(disclose, discheck)
)
I’ll just quickly throw the code in a function to make the tables of correct answers by manipulation.
<- function(...) {
correctCount %>%
mydata count(...) %>%
filter(if_any(ends_with('correct'))) %>%
select(!ends_with('correct')) %>%
add_column(id = 1:2)
}
Finally, we can look at the tables of correct answers.
correctCount(interdep, intcorrect) %>%
inner_join(
correctCount(disclose, discorrect), 'id'
%>%
) select(!id) %>%
rename(n.int = n.x, n.dis = n.y) %>%
formatAsTable
interdep | n.int | disclose | n.dis |
0 | 93 | 0 | 71 |
1 | 68 | 1 | 81 |
How many people got both answers correct?
<- sum(mydata$intcorrect & mydata$discorrect) total
62 participants aced the test.
What are the reliabilities of the measures? Using the lavaan
package, I will do a confirmatory factor analysis of the twelve items. I’m planning to use McDonald’s \(\omega\) in addition to Cronbach’s \(\alpha\) because it performs better and is preferable especially where there is skew.
<- 'aff =~ aff1 + aff2 + aff3 + aff4
cfa.model cog =~ cog1 + cog2 + cog3 + cog4
lik =~ lik1 + lik2 + lik3 + lik4'
<- cfa(cfa.model, mydata, effect.coding = T)
cfa.fit
%>%
cfa.fit %>%
reliability as_tibble(rownames = "stat") %>%
formatAsTable
stat | aff | cog | lik |
alpha | 0.8982218 | 0.8879687 | 0.8695482 |
omega | 0.8987000 | 0.8887170 | 0.8697517 |
omega2 | 0.8987000 | 0.8887170 | 0.8697517 |
omega3 | 0.8986993 | 0.8888123 | 0.8695897 |
avevar | 0.6895819 | 0.6668051 | 0.6256197 |
As long as I have the CFA model, I might as well look at the loadings and fit statistics.
# loadings
%>%
cfa.fit %>%
parameterEstimates filter(op == "=~") %>%
formatAsTable
lhs | op | rhs | est | se | z | pvalue | ci.lower | ci.upper |
aff | =~ | aff1 | 1.0309627 | 0.03237122 | 31.84812 | 0 | 0.9675162 | 1.0944091 |
aff | =~ | aff2 | 1.0073297 | 0.03151490 | 31.96361 | 0 | 0.9455617 | 1.0690978 |
aff | =~ | aff3 | 1.0290495 | 0.03133055 | 32.84493 | 0 | 0.9676427 | 1.0904562 |
aff | =~ | aff4 | 0.9326581 | 0.03345824 | 27.87529 | 0 | 0.8670812 | 0.9982351 |
cog | =~ | cog1 | 0.9834341 | 0.03454821 | 28.46556 | 0 | 0.9157209 | 1.0511474 |
cog | =~ | cog2 | 0.9872944 | 0.03432629 | 28.76205 | 0 | 0.9200161 | 1.0545727 |
cog | =~ | cog3 | 0.9490811 | 0.03527643 | 26.90411 | 0 | 0.8799405 | 1.0182216 |
cog | =~ | cog4 | 1.0801904 | 0.03280941 | 32.92318 | 0 | 1.0158851 | 1.1444957 |
lik | =~ | lik1 | 0.9962834 | 0.03917536 | 25.43138 | 0 | 0.9195011 | 1.0730657 |
lik | =~ | lik2 | 1.0470204 | 0.03576455 | 29.27537 | 0 | 0.9769232 | 1.1171177 |
lik | =~ | lik3 | 0.9578832 | 0.03961143 | 24.18199 | 0 | 0.8802462 | 1.0355202 |
lik | =~ | lik4 | 0.9988130 | 0.03756747 | 26.58718 | 0 | 0.9251822 | 1.0724439 |
# fit statistics
<- c("chisq", "df", "pvalue", "rmsea", "tli")
m %>%
cfa.fit fitMeasures(fit.measures = m) %>%
round(3) %>%
as_tibble(rownames = 'stat') %>%
formatAsTable
stat | value |
chisq | 46.016 |
df | 51.000 |
pvalue | 0.671 |
rmsea | 0.000 |
tli | 1.002 |
How do the variables relate to each other?
<- cfa.model %>%
cor.model paste( "intfac =~ interdep",
"disfac =~ disclose",
sep = "\n")
<- cfa(cor.model, mydata, effect.coding = T)
cor.fit
%>%
cor.fit lavInspect("cor.lv") %>%
round(3) %>%
as_tibble(rownames = 'var') %>%
formatAsTable
var | aff | cog | lik | intfac | disfac |
aff | 1.000 | -0.025 | -0.028 | 0.004 | -0.023 |
cog | -0.025 | 1.000 | 0.023 | -0.021 | -0.043 |
lik | -0.028 | 0.023 | 1.000 | -0.062 | -0.020 |
intfac | 0.004 | -0.021 | -0.062 | 1.000 | 0.036 |
disfac | -0.023 | -0.043 | -0.020 | 0.036 | 1.000 |
Output document:
::render("prework.Rmd", output_dir = file.path("..", "github", "thesis")) rmarkdown