Exploratory Analyses

I collected data on a number of categorical variables not directly connected to my hypotheses. Using those variables, there are a few types of exploratory analyses I’d like to undertake here.

First, I want to collect some basic descriptive statistics on the additional variables to cite in the participants’ section of the thesis.

Second, I want to know if any of the additional variables correlate with my three measured variables.

Third, I want to know if any of them should be used as controls in my hypothesis tests.

The extra variables I have are:

gender: male or female
ethnic: standard racial categories used in the United States
age (self-explanatory)
employ: standard economic measure of employment status
educat: the highest degree earned
party: political stance on left-right spectrum
adhd: who the participant knows with ADHD
work: work history with white-, blue-, and pink-collar occupations

For all of the above, participants could click ‘Other’ and write in whatever they chose. A small, but significant, number of participants chose to do so.

As usual, I will load my packages and import the data.

library(corrr)
library(tidyverse)

library(flextable)

adhd.data <- readRDS(file.path("..", "data", "hq-data.rds"))
formatAsTable <- readRDS("format.rds")

Descriptive Statistics

extra <- c("gender", "ethnic", "age", "employ",
           "educat", "party", "adhd", "work")

adhd.data %>%
  select(extra) %>%
  head %>%
  formatAsTable

gender	ethnic	age	employ	educat	party	adhd	work
Male	White or Caucasian	27	Full-time	Bachelor's degree	Moderate	Family member,Acquaintance	service
Male	White or Caucasian	30	Full-time	Bachelor's degree	Conservative	I do not know anyone with ADHD	professional
Male	White or Caucasian	33	Full-time	Bachelor's degree	Conservative	Prefer not to answer	professional
Female	White or Caucasian	54	Full-time	High school diploma	Liberal	Friend	service
Male	Hispanic or Latino	40	Full-time	Other	Liberal	Myself,Friend	Prefer not to answer
Male	Other	34	Full-time	Bachelor's degree	Moderate	Family member,Friend,Acquaintance	service

Gender

adhd.data %>%
  count(gender) %>%
  formatAsTable

gender	n
Female	205
Male	195
Other	1
Prefer not to answer	2

Race

adhd.data %>%
  select(contains("ethnic_") & !ethnic_5_text) %>%
  map_int(sum) %>%
  enframe("var", "count") %>%
  arrange(desc(count)) %>%
  formatAsTable

var	count
ethnic_white	312
ethnic_asian	48
ethnic_black	31
ethnic_hispanic	22
ethnic_other	4
ethnic_prefer	2

Age

plotTheme <- function() {
  theme(plot.background = element_rect(fill = '#3b434f'),
        panel.background = element_rect(fill = '#3b434f'),
        text = element_text(color = "wheat"),
        panel.grid = element_line(color = "wheat"),
        axis.text = element_text(color = "wheat"))
}

adhd.data %>%
  ggplot(aes(age)) +
  geom_histogram(fill = "antiquewhite4") +
  plotTheme()

Employment Status

adhd.data %>%
  count(employ) %>%
  arrange(desc(n)) %>%
  formatAsTable

employ	n
Full-time	263
Part-time	56
Not currently working or looking for work	24
Unemployed	23
Other	15
Student	11
Prefer not to answer	10
Temporary	1

Political Ideology

adhd.data %>%
  count(party) %>%
  arrange(desc(n)) %>%
  formatAsTable

party	n
Liberal	190
Conservative	103
Moderate	100
Other	6
Prefer not to answer	4

ADHD Relationships

adhd.data %>%
  select(contains("adhd_") & !contains("text")) %>%
  map_int(sum) %>%
  enframe("var", "count") %>%
  arrange(desc(count)) %>%
  formatAsTable

var	count
adhd_nobody	134
adhd_friend	134
adhd_family	124
adhd_acquaintance	85
adhd_myself	42
adhd_coworker	42
adhd_classmate	17
adhd_prefer	8

somebody.vars <- c("friend", "family", "acquaintance",
                   "coworker", "classmate") %>%
  paste0("adhd_", .)

adhd.data <- adhd.data %>%
  rowwise() %>%
  mutate(adhd_somebody = any(unlist(across(somebody.vars))),
         adhd_simple = factor(ifelse(adhd_myself, "myself",
                                     ifelse(adhd_somebody, "somebody",
                                            "nobody"))))

adhd.data %>%
  count(adhd_simple) %>%
  arrange(desc(n)) %>%
  formatAsTable

adhd_simple	n
somebody	221
nobody	140
myself	42

Work History

adhd.data %>%
  select(contains("work_") & !contains("text")) %>%
  map_int(sum) %>%
  enframe("var", "count") %>%
  arrange(desc(count)) %>%
  formatAsTable

var	count
work_professional	248
work_service	179
work_manual	75
work_other	22
work_prefer	16

Education

adhd.data %>%
  count(educat) %>%
  arrange(desc(n)) %>%
  formatAsTable

educat	n
Bachelor's degree	190
High school diploma	92
Master's degree	54
Associate's degree	52
Doctoral degree	6
Other	5
Prefer not to answer	4

Intercorrelations

Controls

Output document:

options(knitr.duplicate.label = "allow")
rmarkdown::render("exploratory.Rmd",
                  output_dir = file.path("..", "github", "thesis"))