marp_proc %>%
mutate(country = fct_infreq(country) %>% fct_rev()) %>%
ggplot() +
aes(y = country) +
geom_bar() +
scale_x_continuous(breaks = seq(0, 1500, 250)) +
labs(title = "Number of participants by country",
y = NULL)

Age has a few obvious outliers (e.g. age of 0 or 1), that we could remove. However, the sample size makes it unlikely that these outliers have a large influence on the model, therefore we won’t remove them.
marp_proc %>%
drop_na(age) %>%
mutate(country = fct_reorder(country, age)) %>%
ggplot() +
aes(y = country, x = age, fill = country) +
geom_boxplot(alpha = .5, width = .2,
outlier.alpha = .2, show.legend = FALSE) +
geom_density_ridges(alpha = .5, show.legend = FALSE) +
labs(title = "Age distribution by country",
y = NULL)
## Picking joint bandwidth of 2.45

marp_proc %>%
group_by(country) %>%
summarise(pct_female = mean(gender == "Female",
na.rm = TRUE)) %>%
mutate(country = fct_reorder(country, pct_female)) %>%
ggplot() +
aes(y = country, x = pct_female) +
geom_point() +
scale_x_continuous(limits = c(0,1),
labels = scales::percent_format()) +
labs(title = "Proportion of females by country",
x = NULL, y = NULL)
## `summarise()` ungrouping output (override with `.groups` argument)

Education is a common confounder when the relationship between religiosity and well-being is considered. There is a considerable spread of education within and between countries.
marp_proc %>%
mutate(country = fct_reorder(country, education, median)) %>%
ggplot() +
aes(y = country, x = education, fill = country) +
geom_density_ridges(show.legend = FALSE) +
labs(title = "Education by country",
y = NULL, x = NULL)
## Picking joint bandwidth of 0.288

Socio-economic status is a common confounder when the relationship between religiosity and well-being is considered. There is a considerable spread of SES within and between countries.
marp_proc %>%
drop_na(ses) %>%
mutate(country = fct_reorder(country, ses)) %>%
ggplot() +
aes(y = country, x = ses, fill = country) +
geom_density_ridges(show.legend = FALSE) +
labs(title = "Socio-economic status by country",
y = NULL, x = NULL)
## Picking joint bandwidth of 0.403

Religious denomination is often used when the connection between religiosity and well being is considered.
marp_proc %>%
mutate(country = fct_infreq(country) %>% fct_rev()) %>%
count(country, denomination) %>%
ggplot() +
aes(y = country, x = n, fill = denomination) +
geom_col(position = "stack") +
labs(title = "Number/Proportion of denominations by country",
y = NULL,
fill = "Denomination")

However, in its current form, the variable contains categories that are ratther sparse, and this may interfere with the statistical model that we want to use. Therefore we choose to lump together levels that constitute less than 1% of the categories into “Other”.
lumped_denom <-
marp_proc %>%
transmute(subject,
denom_lump = case_when(str_detect(denomination, "Muslim") ~ "Muslim",
str_detect(denomination, "Christian|Evangelical") ~ "Christian",
str_detect(denomination, "Other") ~ "Other",
is.na(denomination) ~ "No denomination",
TRUE ~ denomination) %>%
fct_lump_prop(.01, other_level = "Other"))
marp_proc %>%
left_join(lumped_denom, by = "subject") %>%
mutate(country = fct_infreq(country) %>% fct_rev()) %>%
count(country, denom_lump) %>%
ggplot() +
aes(y = country, x = n, fill = denom_lump, label = denom_lump) +
geom_col(position = "stack") +
labs(title = "Number/Proportion of denominations by country",
subtitle = "Denominations that were infrequent (<1%) were lumped together",
y = NULL,
fill = "Denomination")

Well-being might be influenced by ethicity, but not directly. For e.g. minority status might be associated with WB, but there were no questions about this. Therefore, in my opinion, raw ethnicity should not be added to the model.
marp_proc %>%
mutate(ethnicity = fct_infreq(ethnicity) %>% fct_rev()) %>%
ggplot() +
aes(y = ethnicity, fill = ethnicity) +
geom_bar(show.legend = FALSE) +
labs(y = NULL)

cnorm_questions <-
tibble(name = c("cnorm_1", "cnorm_2"),
question = c("Importance of religious lifestyle for average person in country",
"Importance of belief in God/Gods for average person in country"
))
marp_proc %>%
select(subject, country, cnorm_1, cnorm_2) %>%
pivot_longer(c("cnorm_1", "cnorm_2")) %>%
group_by(country, name) %>%
summarise(avg = mean(value)) %>%
ungroup() %>%
left_join(cnorm_questions, by = "name") %>%
mutate(country = reorder_within(country, avg, question)) %>%
ggplot() +
aes(x = avg, y = country) +
geom_point() +
scale_y_reordered(NULL) +
facet_wrap(~question, ncol = 2, scales = "free_y") +
labs(title = "Normativity of religion by country in two questions")
## `summarise()` regrouping output by 'country' (override with `.groups` argument)

marp_proc %>%
group_by(country) %>%
summarise(gdp = mean(gdp)) %>%
ungroup() %>%
mutate(country = fct_reorder(country, gdp)) %>%
ggplot() +
aes(x = gdp, y = country) +
geom_point() +
scale_x_continuous(labels = scales::dollar_format()) +
labs(title = "GDP per capita by country")
## `summarise()` ungrouping output (override with `.groups` argument)

marp_proc %>%
mutate(country = fct_infreq(country) %>% fct_rev()) %>%
count(country, sample_type) %>%
ggplot() +
aes(y = country, x = n, fill = sample_type) +
geom_col(position = "stack") +
labs(title = "Proportion of sample type by country",
x = NULL, y = NULL, fill = "Sample type")

Compensation is very closely associated with the sample type, therefore using it in the model would be redundant.
marp_proc %>%
mutate(country = fct_infreq(country) %>% fct_rev()) %>%
count(country, compensation) %>%
ggplot() +
aes(y = country, x = n, fill = compensation) +
geom_col(position = "stack") +
labs(title = "Proportion of compensation by country",
x = NULL, y = NULL, fill = "Compensation")
