Analytical Procedure Pt. 1

In this first document, I am going to create mock data, produce descriptive statistics, and analyze the reliabilities of the measures.

Before anything else, I need to import the packages I will be using.

# import packages
library(tidyverse)

library(flextable)

library(psych)
library(lavaan)
library(semTools)

Creating mock data

The first step I will take is to initalize a fake dataset that I expect to look like our actual dataset once we have it. I’ll start with the manipulations and checks.

# set sample size
n = 400

# create a dataframe with first 5 variables
mydata <- tibble(
  id = 1:n,
  interdep = rbinom(n, 1, .5),
  disclose = rbinom(n, 1, .5),
  intcheck = sample.int(4, n, T, c(.4, .4, .1, .1)),
  discheck = sample.int(4, n, T, c(.4, .4, .1, .1))
)

Now, I will add the measures (items).

bound <- function(x) {
  ifelse(
    x < -2, -2,
    ifelse(
      x > 2, 2, x
    )
  )
}

# define a function to randomly sample along Likert scale
likert <- function(n, mode) {
  weights <- map_dbl(1:5, ~ (2 ** (mode - abs(mode - .))))
  seed <- sample(-2:2, n, T,
             map_dbl(weights, ~ (. / sum(weights))))
  map_dfc(
    1:4,
    ~ bound(seed + sample(-1:1, n, T))
  )
}

# add a column of fake data for each item
mydata <- mydata %>%
  full_join(
    map_dfc(c(5, 1, 4), ~ likert(400, .)) %>%
      add_column(id = 1:400),
    "id"
    )

# name columns for items
names(mydata)[6:17] <- c("aff", "cog", "lik") %>%
  map(~ map(1:4, ~ paste(.y, .x, sep=""), .y = .x)) %>%
  unlist

Let’s see how that looks. I’m going to create a simple function to format a table.

# turn dataframe into html table
formatAsTable <- function(data) {
  data %>%
    flextable %>%
    color(color = "white", part = "all") %>%
    autofit
}

I have a feeling this function will be useful in future documents, so I will save it to an R object to use again later.

formatAsTable %>%
  saveRDS("format.rds")

Now let’s look at the data.

mydata %>%
  head %>%
  formatAsTable

id	interdep	disclose	intcheck	discheck	aff1	aff2	aff3	aff4	cog1	cog2	cog3	cog4	lik1	lik2	lik3	lik4
1	0	1	2	1	1	1	0	1	-2	-1	-1	-2	2	1	2	2
2	1	1	4	2	1	0	0	2	-1	-1	0	1	0	0	0	0
3	1	1	1	3	2	2	1	2	-2	-2	-1	-2	1	-1	1	0
4	1	1	1	1	2	2	2	1	-1	-2	-2	-2	1	1	2	2
5	0	1	1	2	2	2	1	2	1	0	0	1	1	0	0	-1
6	0	1	2	1	1	2	0	0	-1	-1	-2	-1	0	0	-1	1

We are in business now.

Before moving on, I will export the mock data to a file so I can access it from other pages.

write_csv(mydata, file.path("..", "github", "thesis", "mock.csv"))

Descriptive statistics

I’m now going to examine some descriptive statistics for the data, including the means and standard deviations of each variable. The manipulation check variables are not interpretable as even conceptually continuous, so I will exclude them for the moment.

# subset without id or checks
cont.data <- mydata[-c(1, 4, 5)]

cont.data %>%
  describe(fast = T) %>%
  mutate(vars = names(cont.data)) %>%
  formatAsTable

vars	n	mean	sd	min	max	range	se
interdep	400	0.4675	0.4995675	0	1	1	0.02497837
disclose	400	0.5425	0.4988144	0	1	1	0.02494072
aff1	400	0.9225	1.1832133	-2	2	4	0.05916067
aff2	400	0.9350	1.1485089	-2	2	4	0.05742544
aff3	400	0.9400	1.1596387	-2	2	4	0.05798193
aff4	400	0.9800	1.1326107	-2	2	4	0.05663054
cog1	400	-1.0025	1.1183114	-2	2	4	0.05591557
cog2	400	-1.0400	1.1164749	-2	2	4	0.05582375
cog3	400	-1.0550	1.1090762	-2	2	4	0.05545381
cog4	400	-1.0025	1.1405024	-2	2	4	0.05702512
lik1	400	0.6125	1.2231451	-2	2	4	0.06115726
lik2	400	0.6425	1.1696641	-2	2	4	0.05848320
lik3	400	0.6325	1.2066830	-2	2	4	0.06033415
lik4	400	0.5700	1.1889634	-2	2	4	0.05944817

Manipulation checks

Now we can quickly check the frequencies for the manipulation checks.

The correct answers are 1 and 2 for experimental and control conditions, respectively. I will add a new variable that represents whether the participants responded correctly.

isCorrect <- function(iv, check) iv == -check + 2

mydata <- mydata %>%
  mutate(
    intcorrect = isCorrect(interdep, intcheck),
    discorrect = isCorrect(disclose, discheck)
  )

I’ll just quickly throw the code in a function to make the tables of correct answers by manipulation.

correctCount <- function(...) {
  mydata %>%
    count(...) %>%
    filter(if_any(ends_with('correct'))) %>%
    select(!ends_with('correct')) %>%
    add_column(id = 1:2)
}

Finally, we can look at the tables of correct answers.

correctCount(interdep, intcorrect) %>%
  inner_join(
    correctCount(disclose, discorrect), 'id'
    ) %>%
  select(!id) %>%
  rename(n.int = n.x, n.dis = n.y) %>%
  formatAsTable

interdep	n.int	disclose	n.dis
0	93	0	71
1	68	1	81

How many people got both answers correct?

total <- sum(mydata$intcorrect & mydata$discorrect)

62 participants aced the test.

Factor Model

What are the reliabilities of the measures? Using the lavaan package, I will do a confirmatory factor analysis of the twelve items. I’m planning to use McDonald’s \(\omega\) in addition to Cronbach’s \(\alpha\) because it performs better and is preferable especially where there is skew.

cfa.model <- 'aff =~ aff1 + aff2 + aff3 + aff4
              cog =~ cog1 + cog2 + cog3 + cog4
              lik =~ lik1 + lik2 + lik3 + lik4'

cfa.fit <- cfa(cfa.model, mydata, effect.coding = T)

cfa.fit %>%
  reliability %>%
  as_tibble(rownames = "stat") %>%
  formatAsTable

stat	aff	cog	lik
alpha	0.8982218	0.8879687	0.8695482
omega	0.8987000	0.8887170	0.8697517
omega2	0.8987000	0.8887170	0.8697517
omega3	0.8986993	0.8888123	0.8695897
avevar	0.6895819	0.6668051	0.6256197

As long as I have the CFA model, I might as well look at the loadings and fit statistics.

# loadings
cfa.fit %>%
  parameterEstimates %>%
  filter(op == "=~") %>%
  formatAsTable

lhs	op	rhs	est	se	z	ci.lower	ci.upper
aff	=~	aff1	1.0309627	0.03237122	31.84812	0.9675162	1.0944091
aff	=~	aff2	1.0073297	0.03151490	31.96361	0.9455617	1.0690978
aff	=~	aff3	1.0290495	0.03133055	32.84493	0.9676427	1.0904562
aff	=~	aff4	0.9326581	0.03345824	27.87529	0.8670812	0.9982351
cog	=~	cog1	0.9834341	0.03454821	28.46556	0.9157209	1.0511474
cog	=~	cog2	0.9872944	0.03432629	28.76205	0.9200161	1.0545727
cog	=~	cog3	0.9490811	0.03527643	26.90411	0.8799405	1.0182216
cog	=~	cog4	1.0801904	0.03280941	32.92318	1.0158851	1.1444957
lik	=~	lik1	0.9962834	0.03917536	25.43138	0.9195011	1.0730657
lik	=~	lik2	1.0470204	0.03576455	29.27537	0.9769232	1.1171177
lik	=~	lik3	0.9578832	0.03961143	24.18199	0.8802462	1.0355202
lik	=~	lik4	0.9988130	0.03756747	26.58718	0.9251822	1.0724439

# fit statistics
m <- c("chisq", "df", "pvalue", "rmsea", "tli")
cfa.fit %>%
  fitMeasures(fit.measures = m) %>%
  round(3) %>%
  as_tibble(rownames = 'stat') %>%
  formatAsTable

stat	value
chisq	46.016
df	51.000
pvalue	0.671
rmsea	0.000
tli	1.002

Intercorrelations

How do the variables relate to each other?

cor.model <- cfa.model %>%
  paste( "intfac =~ interdep",
         "disfac =~ disclose",
         sep = "\n")

cor.fit <- cfa(cor.model, mydata, effect.coding = T)

cor.fit %>%
  lavInspect("cor.lv") %>%
  round(3) %>%
  as_tibble(rownames = 'var') %>%
  formatAsTable

var	aff	cog	lik	intfac	disfac
aff	1.000	-0.025	-0.028	0.004	-0.023
cog	-0.025	1.000	0.023	-0.021	-0.043
lik	-0.028	0.023	1.000	-0.062	-0.020
intfac	0.004	-0.021	-0.062	1.000	0.036
disfac	-0.023	-0.043	-0.020	0.036	1.000

Output document:

rmarkdown::render("prework.Rmd", output_dir = file.path("..", "github", "thesis"))