my_hist <- function(data, x) {
data %>%
ggplot(aes(x = {{x}})) +
geom_bar(aes(y = stat(count) / sum(count)), width = 0.9, fill = col_base) +
scale_y_continuous("", labels = scales::percent)
}
sep_mult <- function(data) {
data %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(number = as.numeric(number))
}
group_share <- function(data){
data %>%
group_by(name, number) %>%
summarise(share = sum(value, na.rm = T)/n())
}
no_labs <-function() {
labs(x = "", y = "", fill = "")
}
y_percent <- function() {
scale_y_continuous("", labels = scales::percent)
}
give_var_label <- function(var, data = labels) {
data %>% filter(name == var) %>% pull(var_label) %>% unique()
}
sh_by_value <- function(data){
data %>%
group_by(name) %>%
count(value) %>%
mutate(share = n/sum(n))
}
give_val_label <- function(var, val = 999, data = labels) {
data %>% filter(name == var & value <= val) %>% pull(label)
}
bar_one_var <- function(data, x) {
data %>% pivot_longer(all_of(x)) %>%
mutate(name = x) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(label %>% str_wrap(17), value))) +
geom_bar(aes(y = stat(count) / sum(count)), width = 0.9, fill = col_base) +
no_labs() + y_percent() +
ggtitle(give_var_label(x, labels_p))
}
standard <- function(x) {
(x - mean(x, na.rm =T)) / sd(x, na.rm = T)
}
ds_raw <- haven::read_dta(here("data", "2020Estudiantes.dta"))
dp_raw <- haven::read_dta(here("data", "2020Padres.dta"))
var_labels <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 1)
val_labels <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 2)
var_labels_p <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 3)
val_labels_p <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 4)
var_labels %<>% filter(str_detect(type, "select") | str_detect(type, "integer")) %>%
separate(type, into = c("type", "val_label_name"), sep = " ") %>%
select(-c(type, label)) %>%
rename(var_label = label_ita) %>%
mutate(name = str_replace_all(name, "_1", ""))
val_labels %<>% select(-label) %>%
rename(val_label_name = list_name,
label = label_ita) %>%
mutate(value = as.numeric(value))
labels <- left_join(var_labels, val_labels)
labels %<>% group_by(var_label) %>% mutate(var_nr = row_number()) %>%
filter(var_nr == 1) %>% select(var_label, var_nr) %>% ungroup() %>%
mutate(var_nr = row_number()) %>%
left_join(labels)
# parents
var_labels_p %<>% filter(str_detect(type, "select") | str_detect(type, "integer")) %>%
separate(type, into = c("type", "val_label_name"), sep = " ") %>%
select(-c(type, label)) %>%
rename(var_label = label_ita) %>%
mutate(name = str_replace_all(name, "_1", "")) %>%
mutate(
val_label_name = ifelse(
name == "support_help_child_whom_shouldve",
"support_help_child_whom_shouldve", val_label_name))
val_labels_p %<>% select(-label) %>%
rename(val_label_name = list_name,
label = label_ita) %>%
mutate(value = as.numeric(value))
labels_p <- left_join(var_labels_p, val_labels_p)
labels_p %<>% group_by(var_label) %>% mutate(var_nr = row_number()) %>%
filter(var_nr == 1) %>% select(var_label, var_nr) %>% ungroup() %>%
mutate(var_nr = row_number()) %>%
left_join(labels_p)
#----Children
# drop pratice observations
ds <- ds_raw[-c(1:13), ] %>% zap_labels() %>%
mutate(row = row_number()) %>%
filter(id_stud != 0)
# function that I will use a a lot below
sep_pivot_wide <- function(data){
separate(data, name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
select(-number) %>%
pivot_wider()
}
# combine together different treatments
# separated by survye CTO into separate variables
ds <- ds %>%
# scenarios closed questions
relocate(starts_with("s") & ends_with("_closed"), .before = "start_b") %>%
pivot_longer(c(sb1_closed:sa3_closed), values_drop_na = T) %>%
mutate(name = str_replace_all(name, c("sb" = "s", "sa" = "s"))) %>%
pivot_wider() %>%
# scenarios open questions
pivot_longer(c(start_b:end_a), values_drop_na = T) %>%
filter(value != "") %>%
mutate(name = str_replace_all(
name, c("sb" = "s", "sa" = "s", "_b" = "_s", "_a" = "_s"))
) %>%
pivot_wider() %>%
# manipulation checks
pivot_longer(c(man_lively_1:man_mad_5), values_drop_na = T) %>%
sep_pivot_wide() %>%
# raven and stroop scores
pivot_longer(starts_with("raven") | matches("^stroop[0-9]"), values_drop_na = T) %>%
sep_pivot_wide() %>%
# time raven and stroop
pivot_longer(starts_with(c("time_raven", "start_raven", "end_raven",
"start_stroop", "stroop_time","end_stroop")),
values_drop_na = T) %>%
filter(value != "") %>%
sep_pivot_wide() %>%
# emotions (character variables)
pivot_longer(
starts_with(c("start_depr", "end_depr", "start_emotions", "end_emotions")),
values_drop_na = T
) %>%
filter(value != "") %>%
sep_pivot_wide() %>%
# emotions (numeric variables)
pivot_longer(c(depr_nervous_1:reciproc_negative_1,
depr_nervous_2:reciproc_negative_2),
values_drop_na = T) %>%
sep_pivot_wide()
# Dropping and re-arranging variables
ds %<>%
select(-c(stroop_instr1_1:stroop_test_start_2)) %>%
relocate(c(formdef_version:endtime), .before = "id_stud") %>%
select(-c(deviceid, subscriberid, simid, devicephonenum, formdef_version, key)) %>%
relocate(c(s1_closed:end_s), .after = "survey_version_check") %>%
relocate(c(man_happy:man_tired), .after = "end_man") %>%
relocate(c(raven1:raven10), .after = "tests_order") %>%
relocate(c(time_raven_1:end_raven), .after = "raven10") %>%
relocate(c(stroop1:stroop20), .after = "time_raven_11") %>%
relocate(c(start_stroop:end_stroop), .after = "stroop20") %>%
relocate(c(start_depr:reciproc_negative), .after = "emotions_first") %>%
# this variable had the wrong question
select(-emot_angry)
#---- Parents (same steps)
# drop pratice observations
dp <- dp_raw[-c(1:11), ] %>% zap_labels() %>%
mutate(row = row_number()) %>%
filter(id_stud != 0)
# combine together different treatments
# separated by survye CTO into separate variables
dp <- dp %>%
# scenarios closed questions
relocate(starts_with("s") & ends_with("_closed"), .before = "start_b") %>%
pivot_longer(c(sb1_closed:sa3_closed), values_drop_na = T) %>%
mutate(name = str_replace_all(name, c("sb" = "s", "sa" = "s"))) %>%
pivot_wider() %>%
# scenarios open questions
pivot_longer(c(start_b:end_a), values_drop_na = T) %>%
filter(value != "") %>%
mutate(name = str_replace_all(
name, c("sb" = "s", "sa" = "s", "_b" = "_s", "_a" = "_s"))
) %>%
pivot_wider() %>%
# manipulation checks
pivot_longer(c(man_lively_1:man_mad_5), values_drop_na = T) %>%
sep_pivot_wide() %>%
# raven and stroop scores
pivot_longer(starts_with("raven") | matches("^stroop[0-9]"), values_drop_na = T) %>%
sep_pivot_wide() %>%
# time raven and stroop
pivot_longer(starts_with(c("time_raven", "start_raven", "end_raven",
"start_stroop", "stroop_time","end_stroop")),
values_drop_na = T) %>%
filter(value != "") %>%
sep_pivot_wide() %>%
# emotions (character variables)
pivot_longer(
starts_with(c("start_emotions", "end_emotions")),
values_drop_na = T
) %>%
filter(value != "") %>%
sep_pivot_wide() %>%
# emotions (numeric variables)
pivot_longer(c(depr_nervous_1:self_improvelife_1,
depr_nervous_2:self_improvelife_2),
values_drop_na = T) %>%
sep_pivot_wide()
dp %<>%
select(-c(stroop_instr1_1:stroop_test_start_2)) %>%
relocate(c(formdef_version:endtime), .before = "id_stud") %>%
select(-c(deviceid, subscriberid, simid, devicephonenum, formdef_version, key)) %>%
relocate(c(s1_closed:end_s), .after = "end_demographics") %>%
relocate(c(man_happy:man_tired), .after = "end_man") %>%
relocate(c(raven1:raven10), .after = "tests_order") %>%
relocate(c(time_raven_1:end_raven), .after = "raven10") %>%
relocate(c(stroop1:stroop20), .after = "time_raven_11") %>%
relocate(c(start_stroop:end_stroop), .after = "stroop20") %>%
relocate(c(start_emotions:self_improvelife), .after = "emotions_first")
dup_id_s <- ds %>% get_dupes("id_stud") %>% pull(id_stud)
dup_id_p <- dp %>% group_by(id_stud) %>% filter(n() > 1) %>% nrow()
id_s <- ds %>% pull(id_stud) %>% sort()
id_p <- dp %>% pull(id_stud) %>% sort()
s_not_p <- setdiff(id_s, id_p)
p_not_s <- setdiff(id_p, id_s)
s_and_p <- intersect(id_s, id_p)
# without a match
s_n <- ds %>% filter(id_stud %in% s_not_p) %>%
select(id_stud, start_s, starttime, endtime, username, other_present:other_difficulties, everything())
p_n <- dp %>% filter(id_stud %in% p_not_s) %>%
select(id_stud, start_s, starttime, endtime, username, other_present:other_difficulties, everything())
not <- bind_rows(s_n, p_n, .id = "_")
# not %>% arrange( start_s) %>% View()
# all together
s_all <- ds %>%
select(id_stud, start_s, starttime, endtime, username, other_present:other_difficulties, everything())
p_all <- dp %>%
select(id_stud, start_s, starttime, endtime, username, other_present:other_difficulties, everything())
all <- bind_rows(s_all, p_all, .id = "_")
# all %>% mutate(not = ifelse(id_stud %in% c(s_not_p, p_not_s), 1, 0 )) %>%
# relocate(not) %>% arrange(start_s) %>% View()
# For not we can add an asterix to the duplicates
ds %<>% group_by(id_stud) %>%
mutate(id_stud_temp =
ifelse(row_number() > 1, paste0(id_stud, "*"), id_stud)) %>%
ungroup() %>% mutate(id_stud = id_stud_temp)
There are 25/31 students/parents without their parents/children interviewed. This limits the analysis of correlation to 71 students-parents pairs.
There are also some duplicates (2 for students, and 1 for parents). Just looking at this data, for one duplicate we can probably identify which is the correct one because the parents was interviewed just after. For the other ones there is no way to tell, the parent with that id was not interviewed and there is no parent without a match interviewed in a near time windown. I will look at tests scores to see if at least we can tell which is the most plausible correct recording of the id.
#---- Children
raven_correct <- c(2, 2, 4, 1, 5, 5, 3, 2, 3, 3)
n_obs <- nrow(ds)
ds %<>%
pivot_longer(c(raven1:raven10)) %>%
arrange(id_stud) %>%
mutate(
raven_correct = rep(raven_correct, time = n_obs),
score = ifelse(value == raven_correct, 1, 0)
) %>% group_by(id_stud) %>%
summarise(raven = sum(score, na.rm = TRUE)) %>%
right_join(ds)
p1 <- my_hist(ds, raven) + xlab("Raven")
stroop_correct <- c(2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1)
ds %<>%
pivot_longer(c(stroop1:stroop20)) %>%
arrange(id_stud) %>%
mutate(
stroop_correct = rep(stroop_correct, time = n_obs),
score = ifelse(value == stroop_correct, 1, 0)
) %>% group_by(id_stud) %>%
summarise(stroop = sum(score, na.rm = TRUE)) %>%
right_join(ds)
p2 <- my_hist(ds, stroop) + xlab("Stroop")
#----- Parents
raven_correct <- c(5, 3, 6, 6, 7, 7, 1, 8, 2, 5)
n_obs <- nrow(dp)
dp %<>%
pivot_longer(c(raven1:raven10)) %>%
arrange(id_stud) %>%
mutate(
raven_correct = rep(raven_correct, time = n_obs),
score = ifelse(value == raven_correct, 1, 0)
) %>% group_by(id_stud) %>%
summarise(raven = sum(score, na.rm = TRUE),
raven99 = sum(value == 99),
raven88 = sum(value == 88)) %>%
right_join(dp)
p3 <-my_hist(dp, raven) + xlab("Raven Parents")
p4 <- my_hist(dp, raven88)+ xlab("Raven DK Parents")
p5 <- my_hist(dp, raven99) + xlab("Raven NA Parents")
p6 <- dp %>% mutate(raven8899 = raven88 + raven99) %>%
my_hist(raven8899) + xlab("Raven DK + NA Parents")
stroop_correct <- c(2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1)
dp %<>%
pivot_longer(c(stroop1:stroop20)) %>%
arrange(id_stud) %>%
mutate(
stroop_correct = rep(stroop_correct, time = n_obs),
score = ifelse(value == stroop_correct, 1, 0)
) %>% group_by(id_stud) %>%
summarise(stroop = sum(score, na.rm = TRUE)) %>%
right_join(dp)
p7 <-my_hist(dp, stroop) + xlab("Stroop Parents")
grid.arrange(p1, p2, p3, p7, p4, p5, p6, ncol = 2)
I will add the labels a bit later and arrange things nicer. For now, note that the other category (66) has a lot of responses. We need to code them, to see if they fit with existing answers or if are truly other.
# main activities
p1 <- ds %>%
pivot_longer(c(activ_morning_1:activ_morning_99,
activ_afternoon_1:activ_afternoon_99,
activ_evening_1:activ_evening_99)) %>%
sep_mult() %>%
group_share() %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
geom_col(fill = col_base) +
scale_y_continuous("", labels = scales::percent) +
facet_wrap(~var_label) +
coord_flip() +
labs(x = "", y = "") +
ggtitle("Attività giornaliere durante la quarantena")
# studying time
temp <- ds %>%
pivot_longer(starts_with("study")) %>%
mutate(value = as.numeric(value),
value = ifelse(value > 0, 1, 0))
any_studying <- temp %>% group_by(id_stud) %>%
summarise(study = sum(value)) %>%
summarise(share = sum(study > 0)/n())
p2 <- temp %>%
group_by(name) %>%
count(value) %>%
mutate(share = n/sum(n)) %>%
filter(value == 1) %>%
arrange(desc(share)) %>%
ggplot(aes(
x = factor(name, levels = name, labels = c('Mattina', 'Pomeriggio', 'Sera')),
y = share)) +
geom_col(fill = col_base, width = 0.6) +
ggtitle("Studiato o fatto i compiti") +
no_labs() + y_percent()
grid.arrange(p1, p2, nrow = 3, layout_matrix = cbind(c(1,1,NA), c(1,1, 2)))
# Access to books and guides
p_access <- ds %>%
pivot_longer(c(access_books_0:access_books_2)) %>%
sep_mult %>%
group_share() %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(str_wrap(label, 30)), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("access_books")) +
coord_flip() +
no_labs() + y_percent()
# How many printed and solved
p_solved <- ds %>%
pivot_longer(c(print_books_many, solved_boooks_many)) %>%
sh_by_value() %>%
filter(value < 88) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(label %>% str_wrap(30), value, .desc = T), y = share, fill = factor(var_label %>% str_wrap(30)))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
no_labs() + labs(fill = "") + y_percent() + ggtitle("Materiale didattico") +
coord_flip() +
theme(legend.position = c(0.7, 0.8),
legend.direction = "vertical",
legend.background = element_rect(colour = "transparent", fill = "white"))
#theme(axis.text.x = element_text(angle = 90))
# How printed
p_print_how <- ds %>%
pivot_longer(c(print_books_how_1:print_books_how_99)) %>%
sep_mult %>%
group_share() %>%
filter(number < 88) %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("print_books_how")) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
# How worke d books
# paper vs electronic
p_work_how <- ds %>%
pivot_longer(c(how_work_books_1:how_work_books_99)) %>%
sep_mult %>%
group_share() %>%
filter(number < 88) %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("how_work_books")) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
# on her own or with help
p_alone<- ds %>%
pivot_longer(work_books_alone) %>%
count(value) %>%
na.omit() %>%
mutate(share = n/sum(n)) %>%
mutate(name = 'work_books_alone') %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(factor(str_wrap(label, 15)), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("work_books_alone")) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
# books useful vs boring
p_difficult <- ds %>%
pivot_longer(c(books_difficult, books_useful)) %>%
group_by(name) %>%
count(value) %>%
na.omit() %>%
mutate(share = n/sum(n)) %>%
filter(value < 88) %>%
left_join(labels) %>%
ggplot(aes(x = factor(var_label), y = share,
fill = factor(value, levels = c(0, 1, 2),
labels = c("Non difficile/interessante",
"Abbastanza difficile/interessante",
"Molto difficile/interessante"))
)) +
geom_col(position = position_dodge2()) +
scale_fill_ghibli_d(pal, direction = -1) +
ggtitle("Difficoltà/Utilità materiale didattico") +
no_labs() + labs(fill = "") + y_percent()
# Watched tv or listened to Radio?
p_tv <- ds %>% my_hist(factor(tv_educ, levels = c(0, 1, 2, 3),
labels = give_val_label("tv_educ", 3))) +
no_labs() + ggtitle(str_wrap(give_var_label("tv_educ"), 20))
# was it useful for learning
p_tv_useful <- ds %>% filter(!is.na(tv_educ_help)) %>%
my_hist(factor(tv_educ_help, levels = c(0, 1, 2),
labels = give_val_label("tv_educ_help", 2))) +
no_labs() + ggtitle(give_var_label("tv_educ_help"))
# Did other education activities
p_other_educ <- ds %>% filter(!is.na(other_educ)) %>%
my_hist(factor(other_educ, levels = c(0, 1, 2, 3),
labels = give_val_label("other_educ", 3))) +
no_labs() + ggtitle(str_wrap(give_var_label("other_educ"), 30))
# what other educ activ
p_what_other_educ <- ds %>%
pivot_longer(c(other_educ_activ_1:other_educ_activ_99)) %>%
sep_mult() %>%
filter(number <= 66) %>%
group_share() %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(str_wrap(give_var_label("other_educ_activ"), 20)) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
grid.arrange(p_access, p_solved, nrow = 3, layout_matrix = cbind(c(1,2,2), c(NA,2, 2)))
grid.arrange(p_print_how, p_work_how, nrow = 2)
p_alone
p_difficult
grid.arrange(p_tv, p_tv_useful, p_other_educ, p_what_other_educ, nrow = 2)
Most students had access to at least some of the materials. Among those respondent that they did not have access, only one student said that he/she did not know they were giving these handbooks and could not print them. The others either did not give a reason or gave reasons hard to classify or understand well. 5 students reported not having worked the exercise book, one because there was no one to help, one blamed her/his own laziness, the others did not give an answer or one not listed.
# contacted (by) teachers, classmates
ds %>%
pivot_longer(c(contact_prof_yn, contact_by_prof_yn, contact_classmates_yn)) %>%
group_by(name) %>%
summarise(share = sum(value == 1, na.rm = T)/n()) %>%
left_join(labels %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share)) +
geom_col(fill = col_base) +
ggtitle("Contatto con insegnanti e compagni") +
coord_flip() + no_labs() + y_percent()
# how often
ds %>%
pivot_longer(c(contact_prof_freq, contact_by_prof_freq, contact_classmates_freq)) %>%
group_by(name) %>%
count(value) %>%
na.omit() %>%
filter(value < 66) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share,
fill = factor(str_wrap(label, 20)))) +
geom_col(position = position_fill(reverse = TRUE)) +
scale_fill_ghibli_d(pal, direction = -1) +
ggtitle("Frequenza contatto") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# how contacted
ds %>%
pivot_longer(starts_with(
c("contact_prof_how_", "contact_by_prof_how_", "contact_classmates_how_")),
values_drop_na = T) %>%
sep_mult() %>%
group_share() %>%
filter(number < 66) %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share, fill = factor(label))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_dodge2(reverse = T)) +
coord_flip() +
ggtitle("Modalità contatto") +
labs(x = "", y = "", fill = "") + y_percent()
# why contacted?
ds %>%
pivot_longer(starts_with(
c("contact_prof_why_", "contact_by_prof_why_", "contact_classmates_why_")) &
!ends_with("other"),
values_drop_na = T) %>%
sep_mult() %>%
group_share() %>%
filter(number <= 66) %>%
left_join(labels %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share, fill = factor(number))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_dodge2(reverse = F)) +
geom_text(aes(label = as.character(label), y = 0.5),
position = position_dodge(.9), hjust = 0) +
ggtitle("Ragione contatto") +
coord_flip() +
labs(x = "", y = "") + y_percent() +
guides(fill = FALSE)
# Depression
ds %>%
pivot_longer(c(depr_nervous:depr_afraid)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 66) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label, value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Benessere emozionale") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# Self-efficacy
ds %>%
pivot_longer(starts_with("self")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 66) %>%
mutate(share = n / sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Autoefficacia") +
coord_flip() +
labs(x = "", y = "", fill = "") +
y_percent()
# other skills
ds %>%
pivot_longer(shy_authority:reciproc_negative) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 66) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Altre capacità socio-emotive") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
ds %>%
pivot_longer(c(rel_kids:rel_adults)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Relazioni familiari durante pandemia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
ds %>%
pivot_longer(c(it_cellcall:it_pcarchive)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Capacità IT") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
ds %>%
pivot_longer(c(covid_hands:covid_touch)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Comportamenti dalla pandemia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
ds %>%
pivot_longer(starts_with("covid_emot")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Emozioni riguardo alla pandemia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
ds %>%
pivot_longer(starts_with("soleterre_") & !ends_with("other"),
values_drop_na = T) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(number = as.numeric(number)) %>%
group_by(name, number) %>%
summarise(share = sum(value, na.rm = T)/n()) %>%
left_join(labels %>% rename(number = value)) %>%
filter(number < 66) %>%
ggplot(aes(x = fct_reorder(label %>% str_wrap(17), number), y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("soleterre"))+
# coord_flip() +
labs(x = "", y = "") + y_percent()
ds %>%
pivot_longer(starts_with("neigh")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Ambiente del quartiere") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
ds %>%
pivot_longer(starts_with("viol_")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Esposizione alla violenza") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# "child_labor_yn"
# [327] "child_labor_activ" "child_labor_freq"
# [329] "school_useful" "aspire_school"
# [331] "expect_school" "expect_school_whynot"
child_labor_sh <- ds$child_labor_yn %>% tabyl() %>% .[2, 3]
school_useful_sh <- ds$school_useful %>% tabyl() %>% .[2, 3]
expect_sh <- ds$expect_school %>% tabyl()
ds %>%
pivot_longer(c(aspire_school),
values_drop_na = T) %>%
count(value) %>%
mutate(share = n/sum(n, na.rm = T)) %>%
ggplot(aes(x = factor(
value, levels = value,
labels =c("III ciclo", "Diploma generale",
"Diploma tecnico-vocazionale", "Educazione superiore", "Altro") %>%
str_wrap(20)), y = share)
) +
geom_col(fill = col_base) +
# coord_flip() +
labs(x = "", y = "", fill = "") + y_percent() +
ggtitle(give_var_label("aspire_school"))
24% of children reported having contributed economically to the family in the past year, more than half of them at least once a week while a quarter every day. The most common activity is sewing hammocks or other forms of crafting.
90% think school is useful and essential for their future, the rest think it as useful but not essential. 89% believe it is likely or very likely that they will achieve the aspired educational level. Two students think it is unlikely or very unlikely (one did not know why, the other because teachers do not come to the school) while the rest did not know what to respond.
dp %>% bar_one_var("female")
dp %>% bar_one_var("to_child")
dp %>% filter(to_head < 66) %>% bar_one_var("to_head")
dp %>% my_hist(age) + no_labs() +
ggtitle("Età") +
geom_vline(xintercept = mean(dp$age, na.rm = T), linetype = 2, colour="black")
dp %>% bar_one_var("educ")
dp %>% bar_one_var("educ_high_hh")
dp %>% my_hist(hhsize) + no_labs() +
ggtitle(give_var_label("hhsize", labels_p)) +
scale_x_continuous(breaks = 0:9) +
geom_vline(xintercept = mean(dp$hhsize, na.rm = T), linetype = 2, colour="black")
dp %>%
pivot_longer(c(starts_with("asset") & !ends_with("nrooms"), internet)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
#filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
filter(value == 1) %>%
left_join(labels_p %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share)) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(fill = col_base) +
coord_flip() +
ggtitle("Beni di proprietà") +
no_labs() + y_percent()
dp %>%
pivot_longer(c(bef_nactiv, after_nactiv)) %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p %>% select(-value)) %>%
ggplot(aes(x = value, y = share,
fill = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = F))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_dodge2()) +
ggtitle("Numero di persone con attività generatrici di reddito") +
no_labs() + y_percent() +
theme(legend.position = c(0.8, 0.7))
# received remit or help from gov
dp %>%
pivot_longer(c(remit_yn, govhelp_yn)) %>%
group_by(name) %>%
count(value) %>%
# filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
filter(value == 1) %>%
left_join(labels_p %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = F), y = share)) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(fill = col_base, width = 0.6) +
ggtitle("Sostegno economico da parte del governo o rimesse") +
no_labs() + y_percent()
# what happened to main income, total income, remit etc
dp %>%
pivot_longer(ends_with("_change")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
drop_na() %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
# ggplot(aes(x = name, y = share, fill = factor(value))) +
# scale_fill_ghibli_d(pal, direction = -1) +
# geom_col(position = position_fill(reverse = TRUE)) +
# coord_flip() +
# labs(x = "", y = "")
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Cambiamento della situazione economica post pandemia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# How they dealt with loss in income
dp %>%
pivot_longer(c(deal_with_loss_1:deal_with_loss_88),
values_drop_na = T) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(number = as.numeric(number)) %>%
group_by(name, number) %>%
summarise(share = sum(value)/n()) %>%
left_join(labels_p %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
geom_col(fill = col_base) +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent() +
ggtitle(give_var_label("deal_with_loss", labels_p))
# types of support from gov or ngo
dp %>%
pivot_longer(c(help_gov_1:help_gov_99, help_ngo_1:help_ngo_99),
values_drop_na = T) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(number = as.numeric(number)) %>%
group_by(name, number) %>%
summarise(share = sum(value)/n()) %>%
filter(number <= 66) %>%
left_join(labels_p %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("help_gov", labels_p)) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
# not enough money for meds, and worries about enough food
dp %>% mutate(enoug_medicine = ifelse(enoug_medicine == 0, 1, 0)) %>%
pivot_longer(c(enoug_medicine, worry_food)) %>%
group_by(name) %>%
summarise(share = sum(value)/n()) %>%
left_join(labels_p %>% select(-value)) %>%
distinct(name, .keep_all = T) %>%
ggplot(aes(x = var_label, y = share)) +
geom_col(fill = col_base) +
ggtitle("Preoccupazioni") +
no_labs() + labs(fill = "") + y_percent()
dp %>%
pivot_longer(starts_with("worries")) %>%
group_by(name) %>%
count(value) %>%
filter(value < 4) %>%
mutate(share = n/n()) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Preoccupazioni") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# types of support from gov or ngo
dp %>%
pivot_longer(c(activ_educ_0:activ_educ_99),
values_drop_na = T) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(number = as.numeric(number)) %>%
group_by(name, number) %>%
summarise(share = sum(value)/n()) %>%
filter(number <= 66) %>%
left_join(labels_p %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(paste(give_var_label("activ_educ", labels_p), "educative")) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
dp %>%
pivot_longer(c(activ_leisure_0:activ_leisure_99),
values_drop_na = T) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(number = as.numeric(number)) %>%
group_by(name, number) %>%
summarise(share = sum(value)/n()) %>%
filter(number <= 66) %>%
left_join(labels_p %>% rename(number = value)) %>%
ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
geom_col(fill = col_base) +
ggtitle(paste(give_var_label("activ_leisure", labels_p), "ricreative")) +
no_labs() + labs(fill = "") + y_percent() +
coord_flip()
mean_time_hw <- dp %>% filter(time_hw < 77) %>% summarise(mean = mean(as.numeric(time_hw), na.rm = T)) %>% pull(mean)
dp %>% filter(time_hw < 77) %>%
mutate(time_hw = as.numeric(time_hw)) %>%
#group_by(time_hw) %>%
count(time_hw) %>%
mutate(share = n/sum(n)) %>%
ggplot(aes(x = time_hw, y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("time_hw", labels_p)) +
scale_x_continuous(breaks = seq(0, 5.5, 0.5), labels = paste(seq(0, 5.5, 0.5), "h")) +
no_labs() + y_percent() +
geom_vline(xintercept = mean_time_hw , linetype = 2, colour="black")
mean_time_help <- dp %>% filter(time_help < 77) %>% summarise(mean = mean(as.numeric(time_help), na.rm = T)) %>% pull(mean)
dp %>% filter(time_help < 77) %>%
mutate(time_help = as.numeric(time_help)) %>%
#group_by(time_hw) %>%
count(time_help) %>%
mutate(share = n/sum(n)) %>%
ggplot(aes(x = time_help, y = share)) +
geom_col(fill = col_base) +
ggtitle(give_var_label("time_help", labels_p)) +
scale_x_continuous(breaks = seq(0, 5.5, 0.5), labels = paste(seq(0, 5.5, 0.5), "h")) +
no_labs() + y_percent() +
geom_vline(xintercept = mean_time_help , linetype = 2, colour="black")
support_help_child <- tabyl(dp$support_help_child)
# in contact with teachers
dp %>% filter(contact_teachers_yn < 88) %>% bar_one_var("contact_teachers_yn")
#teachers revised hw
dp %>% filter(contact_teachers_yn < 88) %>% bar_one_var("verify_hw_much")
75% of parents reported that they did not receive any support on how to help the pupil solve the exercise books. They belive the school (primarily) and public institutions should have provided support. Among those who received support, the teacher was the main source for most of them.
# Depression
dp %>%
pivot_longer(c(depr_nervous:depr_afraid)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 66) %>%
mutate(share = n/sum(n)) %>%
left_join(labels) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label, value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Benessere emozionale") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# Self-efficacy
dp %>%
pivot_longer(starts_with("self")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 66) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Autoefficacia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
dp %>%
pivot_longer(c(rel_children:rel_community)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Relazioni familiari") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
test_covid <- dp %>% tabyl(test_covid_yn)
test_result <-dp %>% tabyl(test_covid_result)
In 0% of household, at least one person did the virus test. In 2 households the person/s tested was/were found positive.
dp %>%
pivot_longer(c(covid_hands:covid_market)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Comportamenti dalla pandemia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
dp %>%
pivot_longer(starts_with("covid_emot")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p) %>%
na.omit() %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Emozioni riguardo alla pandemia") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
dp %>%
pivot_longer(starts_with("neigh")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 40), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Ambiente del quartiere") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
dp %>%
pivot_longer(starts_with("viol_")) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T),
y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_fill(reverse = TRUE)) +
ggtitle("Esposizione alla violenza") +
coord_flip() +
labs(x = "", y = "", fill = "") + y_percent()
# dp %>% tabyl(ladder_now)
dp %>%
pivot_longer(c(ladder_now, ladder_before, ladder_in1yr)) %>%
# separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name) %>%
count(value) %>%
filter(value < 77) %>%
mutate(share = n/sum(n)) %>%
left_join(labels_p %>% select(-value)) %>%
ggplot(aes(x = value, y = share,
fill = fct_relevel(str_wrap(var_label, 20), "Livello pre-covid"))) +
scale_fill_ghibli_d(pal, direction = -1) +
geom_col(position = position_dodge2()) +
scale_x_continuous(breaks = seq(0, 10, 2)) +
ggtitle("Stato socioeconomico soggettivo") +
#coord_flip() +
labs(x = "Scala socio-economica (1 condizione peggiore, 10 condizione migliore)", y = "", fill = "") + y_percent()
load(here("data", "yr1.RData"))
df %<>%
rename(
parent_hw = parenting_1,
parent_org_sch = parenting_2,
parent_talk_parents = parenting_6,
parent_grades = parenting_7,
parent_work = parenting_8,
child_labor_yn = worked_last_yr,
school_useful = sch_useful,
aspire_school = aspire_sch,
expect_school = aspire_sch,
self_diffsit = self_efficacy_3,
self_howto = self_efficacy_1,
self_improvelife = self_efficacy_2,
shy_authority = shyness_2,
emot_sad = emot_prob_1,
emot_fears = emot_prob_2,
emot_angry = conduct_prob_1,
emot_controlothers = conduct_prob_3,
emot_fight = conduct_prob_2,
reciproc_positive = reciproc_positve,
reciproc_negative = reciproc_negative,
neigh_trust = neigh_trust,
neigh_safeviolence = neigh_safe,
neigh_treatworse = neigh_treat_bad,
viol_hurt = violence_1,
viol_shots = violence_2,
viol_robbed = violence_3,
viol_familyhurt = violence_5
) %>% mutate(survey = "s0") %>%
zap_labels()
dt <- full_join(
ds %>%
set_colnames(paste0(colnames(ds), "_s2")) %>%
rename(id_stud = id_stud_s2) %>%
mutate(s2 = 1),
df %>% mutate(id_stud = as.character(id_stud)) %>%
set_colnames(paste0(colnames(df), "_s1")) %>%
rename(id_stud = id_stud_s1) %>%
mutate(s1 = 1),
by = "id_stud"
)
dt <- full_join(
dt,
dp %>% set_colnames(paste0(colnames(dp), "_p2")) %>%
rename(id_stud = id_stud_p2) %>%
mutate(p2 = 1),
by = "id_stud"
)
# re-arrange variables
# dt %<>%
# relocate(c(username_s1:id_stud_check_s1,
# school_s0:encues_id_s0,
# username_p1:endtime_p1,
# female_s0:years_at_school_s0,
# hh_size_s0:bathroom_s0,
# address_p1:internet_p1,
# bef_nactiv_p1:help_ngo_other_p1,
# ses_now_s0:ses_4yrs_s0,
# ), .after = "id_stud") %>%
# relo
# relocate(c(s1_closed:end_s), .after = "end_demographics") %>%
# relocate(c(man_happy:man_tired), .after = "end_man") %>%
# relocate(c(raven1:raven10), .after = "tests_order") %>%
# relocate(c(time_raven_1:end_raven), .after = "raven10") %>%
# relocate(c(stroop1:stroop20), .after = "time_raven_11") %>%
# relocate(c(start_stroop:end_stroop), .after = "stroop20") %>%
# relocate(c(start_emotions:self_improvelife), .after = "emotions_first")
s12 <- dt %>% mutate(temp = s1 + s2) %>%
filter(temp == 2) %>% select(-temp)
sp <- dt %>% mutate(temp = p2 + s2) %>%
filter(temp == 2) %>% select(-temp)
s12p <- dt %>% mutate(temp = s1 + p2 + s2) %>%
filter(temp == 3) %>% select(-temp)
We are left with 68 student yr1 - student yr2 - parent yr 2 pairs. So all the folowing analysis has that many observations.
# Study time summing up across the day
s12p %<>%
pivot_longer(starts_with("study")) %>%
group_by(id_stud) %>%
summarise(
study_time_s2 = sum(as.numeric(value))
) %>% right_join(s12p)
# Books solved
mean_v <- s12p %>% filter(solved_boooks_many_s2 < 88) %>%
pull(solved_boooks_many_s2) %>% mean(., na.rm = T)
s12p %<>%
mutate(
across(solved_boooks_many_s2,
~ifelse(.x == 88, mean_v, .x))
)
# Number of assets
s12p %<>%
pivot_longer(c(starts_with("asset") & !ends_with("nrooms_p2"), internet_p2)) %>%
group_by(name) %>%
mutate(
across(value,
~ ifelse(.x == 88,
round(mean(replace(.x, .x == 88, NA), na.rm = TRUE)),
.x)
)) %>%
group_by(id_stud) %>%
summarise(
assets_p2 = sum(as.numeric(value))
) %>% right_join(s12p)
# Income loss during pandemic
s12p %<>%
mutate(
age_s2 = age_s1 + 1,
loss_inc_p2 = ifelse(total_inc_change_p2 > 2, 1, 0)
)
# Re-coding maximum level of education
s12p %<>%
mutate(
educ_basica = ifelse(educ_high_hh_p2 <= 3, 1, 0)
)
# self-efficacy at baseline
s12p %<>% pivot_longer(starts_with("self") & ends_with("_s1")) %>%
group_by(id_stud) %>%
summarise(self = 10 * mean(value)/4) %>%
right_join(s12p)
# emotional wellbeing parent and student
s12p %<>% select(id_stud, starts_with("depr") ) %>%
pivot_longer(starts_with("depr")) %>%
separate(name, into = c("name", "resp"), sep="_(?=[^_]+$)") %>%
mutate(
value = ifelse(value > 4, NA, value),
value = abs(value - 4)) %>%
group_by(name, resp) %>%
mutate(value = ifelse(is.na(value), mean(value, na.rm = T), value)) %>%
group_by(id_stud, resp) %>%
summarise(depr = 10*mean(value)/3) %>%
pivot_wider(names_from = resp, values_from = depr, names_prefix = "depr_") %>%
ungroup() %>% right_join(s12p)
I recoded variables such as self-efficacy and emotional well-being (scores), to vary from 0 to 10, 0 being the lowest possible score on the scale and 10 the highest. For now, in the models I used the Raven (fluid intelligence test) at baseline as control for capacita cognitiva. With the Stroop there was nothing. The asset variable is the sum of the 11 asset dummies. I recoded the maximum level of education into a dummy (1 for basic education - grades 1 to 9 in el salvador, and 0 for highschool and university). I also recoded the loss of income into a dummy (1 if lost or completely lost). In this model the dependent variables is continuous indicated the number of daily hours of study.
p_age <- s12p %>%
ggplot(aes(x = age_s2, y = study_time_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Età") + ylab("")
p_educ <- s12p %>%
group_by(educ_basica) %>%
summarise(mean = mean(study_time_s2, na.rm = T)) %>%
ggplot(aes(x = factor(educ_basica, c(1,0), labels = c("Istruzione di \n base (1-9)", "Istruzione media \n e superiore")), y = mean)) +
geom_col(fill = col_base) +
xlab("Massimo livello di studio in famiglia") + ylab("")
p_asset <- s12p %>%
ggplot(aes(x = assets_p2, y = study_time_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Numero di beni di proprietà (0-11)") + ylab("")
p_loss <- s12p %>%
group_by(loss_inc_p2) %>%
summarise(mean = mean(study_time_s2, na.rm = T)) %>%
ggplot(aes(x = factor(loss_inc_p2, c(0,1),
c("No", "Sì")),
y = mean)) +
geom_col(fill = col_base) +
xlab("Riduzione del reddito durante la pandemia") + ylab("")
p_raven <- s12p %>%
ggplot(aes(x = raven_s1, y = study_time_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Capacità cognitiva") + ylab("")
p_self <- s12p %>%
ggplot(aes(x = self, y = study_time_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Autoefficacia") + ylab("")
grid.arrange(p_age, p_asset, p_educ, p_loss, p_raven, p_self, ncol = 2,
top = textGrob("Numero di ore di studio giornaliere durante \n la pandemia e caratteristiche dello studente/della famiglia"))
vars <- c("female_s1", "age_s2", "raven_s1", "self", "educ_basica", "hhsize_p2",
"assets_p2" , "loss_inc_p2")
labels_vars <- str_wrap(c("Femmina", "Età", "Capacità cognitiva 2019", "Autoefficacia 2019", "Massimo livello di studio in famiglia - istruzione di base (1-9)", "Dimensione famiglia", "Numero di beni di proprietà (0-11)", "Riduzione del reddito durante la pandemia"), 30)
s12p %>%
lm(study_time_s2 ~ female_s1 + age_s2 + raven_s1 + self + educ_basica + hhsize_p2 +
assets_p2 + loss_inc_p2 , data = .) %>% broom::tidy() %>%
mutate(ci = 1.96 * std.error) %>%
filter(term != "(Intercept)") %>%
mutate(
term = factor(
term,
levels = vars,
labels = labels_vars)
) %>%
ggplot(aes(x = fct_rev(term), y = estimate)) +
geom_point(color = col_base, size =2) +
geom_errorbar(
aes(
ymin = estimate - ci,
ymax = estimate + ci
),
size = .8,
width = .005,
color = col_base) +
coord_flip() +
geom_hline(yintercept = 0, linetype = 2) +
xlab("") + ylab("Coefficiente")
Even though not really correct, I kept the variables as continuous to have a bit more variability.
p_age <- s12p %>%
ggplot(aes(x = age_s2, y = solved_boooks_many_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Età") + ylab("")
p_educ <- s12p %>%
group_by(educ_basica) %>%
summarise(mean = mean(solved_boooks_many_s2, na.rm = T)) %>%
ggplot(aes(x = factor(educ_basica, c(1,0), labels = c("Istruzione di \n base (1-9)", "Istruzione media \n e superiore")), y = mean)) +
geom_col(fill = col_base) +
xlab("Massimo livello di studio in famiglia") + ylab("")
p_asset <- s12p %>%
ggplot(aes(x = assets_p2, y = solved_boooks_many_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Numero di beni di proprietà (0-11)") + ylab("")
p_loss <- s12p %>%
group_by(loss_inc_p2) %>%
summarise(mean = mean(solved_boooks_many_s2, na.rm = T)) %>%
ggplot(aes(x = factor(loss_inc_p2, c(0,1),
c("No", "Sì")),
y = mean)) +
geom_col(fill = col_base) +
xlab("Riduzione del reddito durante la pandemia") + ylab("")
p_raven <- s12p %>%
ggplot(aes(x = raven_s1, y = solved_boooks_many_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Capacità cognitiva") + ylab("")
p_self <- s12p %>%
ggplot(aes(x = self, y = solved_boooks_many_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Autoefficacia") + ylab("")
grid.arrange(p_age, p_asset, p_educ, p_loss, p_raven, p_self, ncol = 2,
top = textGrob("Quantità materiale didattico risolto (1 - nessuno, 5 - tutti) durante \n la pandemia e caratteristiche dello studente/della famiglia"))
s12p %>%
lm(solved_boooks_many_s2 ~ female_s1 + age_s2 + raven_s1 + self + educ_basica + hhsize_p2 +
assets_p2 + loss_inc_p2 , data = .) %>% broom::tidy() %>%
mutate(ci = 1.96 * std.error) %>%
filter(term != "(Intercept)") %>%
mutate(
term = factor(
term,
levels = vars,
labels = labels_vars)
) %>%
ggplot(aes(x = fct_rev(term), y = estimate)) +
geom_point(color = col_base, size =2) +
geom_errorbar(
aes(
ymin = estimate - ci,
ymax = estimate + ci
),
size = .8,
width = .005,
color = col_base) +
coord_flip() +
geom_hline(yintercept = 0, linetype = 2) +
xlab("") + ylab("Coefficiente")
Higher values indicate higher well-being (I reverse the scale, not to call it depression)
p_depr <- s12p %>%
ggplot(aes(x = depr_p2, y = depr_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Benessere emotivo del genitore") + ylab("")
p_age <- s12p %>%
ggplot(aes(x = age_s2, y = depr_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Età") + ylab("")
p_educ <- s12p %>%
group_by(educ_basica) %>%
summarise(mean = mean(depr_s2, na.rm = T)) %>%
ggplot(aes(x = factor(educ_basica, c(1,0), labels = c("Istruzione di \n base (1-9)", "Istruzione media \n e superiore")), y = mean)) +
geom_col(fill = col_base) +
xlab("Massimo livello di studio in famiglia") + ylab("")
p_asset <- s12p %>%
ggplot(aes(x = assets_p2, y = depr_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Numero di beni di proprietà (0-11)") + ylab("")
p_loss <- s12p %>%
group_by(loss_inc_p2) %>%
summarise(mean = mean(depr_s2, na.rm = T)) %>%
ggplot(aes(x = factor(loss_inc_p2, c(0,1),
c("No", "Sì")),
y = mean)) +
geom_col(fill = col_base) +
xlab("Riduzione del reddito durante la pandemia") + ylab("")
p_raven <- s12p %>%
ggplot(aes(x = raven_s1, y = depr_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Capacità cognitiva") + ylab("")
p_self <- s12p %>%
ggplot(aes(x = self, y = depr_s2)) +
geom_point(col = col_base) +
geom_smooth(method = "lm", se = F, col = "black") +
xlab("Autoefficacia") + ylab("")
blank <- grid.rect(gp=gpar(col="white"))
grid.arrange(p_depr, blank, p_age, p_asset, p_educ, p_loss, p_raven, p_self, ncol = 2,
top = textGrob("Benessere emotivo dello studente \n e caratteristiche dello studente/della famiglia"))
s12p %>%
lm(depr_s2 ~ depr_p2 + female_s1 + age_s2 + raven_s1 + self + educ_basica + hhsize_p2 +
assets_p2 + loss_inc_p2 , data = .) %>% broom::tidy() %>%
mutate(ci = 1.96 * std.error) %>%
filter(term != "(Intercept)") %>%
mutate(
term = factor(
term,
levels = c("depr_p2", vars),
labels = c("Benessere emotivo del genitore", labels_vars))
) %>%
ggplot(aes(x = fct_rev(term), y = estimate)) +
geom_point(color = col_base, size =2) +
geom_errorbar(
aes(
ymin = estimate - ci,
ymax = estimate + ci
),
size = .8,
width = .005,
color = col_base) +
coord_flip() +
geom_hline(yintercept = 0, linetype = 2) +
xlab("") + ylab("Coefficiente")
Here I recoded also the Stroop to go from 0 to 10, as Raven. The vertical lines are the means for each year.
means <- s12p %>%
pivot_longer(c(raven_s1, raven_s2)) %>%
group_by(name) %>%
summarise(mean = mean(value, na.rm = T))
s12p %>%
pivot_longer(c(raven_s1, raven_s2)) %>%
group_by(name) %>%
count(value) %>% mutate(share = n/sum(n)) %>%
ggplot(aes(x = value, y = share, fill = factor(name, labels = c("2019", "2020")))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
scale_color_ghibli_d(pal, direction = -1) +
no_labs() + labs(fill = "Anno") + y_percent() +
scale_x_continuous(breaks = 0:10) +
geom_vline(data = means, aes(xintercept = mean, color = name), show.legend = F) +
ggtitle("Capacità cognitive (intelligenza fluida) - variazioni nel tempo")
means <- s12p %>%
pivot_longer(c(stroop_s1, stroop_s2)) %>%
group_by(name) %>%
summarise(mean = mean(value, na.rm = T)/2)
s12p %>%
pivot_longer(c(stroop_s1, stroop_s2)) %>%
group_by(name) %>%
count(value) %>% mutate(share = n/sum(n)) %>%
ggplot(aes(x = value/2, y = share,
fill = factor(name, labels = c("2019", "2020")))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
scale_color_ghibli_d(pal, direction = -1) +
no_labs() + labs(fill = "Anno") + y_percent() +
scale_x_continuous(breaks = 0:10) +
geom_vline(data = means, aes(xintercept = mean, color = name), show.legend = F) +
ggtitle("Capacità cognitive (controllo degli impulsi) - variazioni nel tempo")
I focus only on items which were repeated in the 2nd year. All were measured on a 4 item Likert. I computed an average treating the variables as continous (1 tot. disagree, 4 tot. agree). In my opinion it is not hard to read the figure. Overall, there is very little change across the waves.
s12p %>% pivot_longer(
starts_with(c("self_diffsit_s", "self_howto_s", "self_improvelife_s",
"emot_sad", "emot_fear", "emot_controlothers", "emot_fight",
"shy_authority", "reciproc_"))
) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(
value = ifelse(number == "s1", value + 1, value),
value = ifelse(value > 4, NA, value)
) %>%
group_by(name, number) %>%
summarise(mean = (mean(value, na.rm = T)-1)/3) %>%
left_join(labels %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = mean,
fill = factor(number, levels = c("s1", "s2"),
labels = c("2019", "2020")))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
coord_flip() +
labs(x = "", y = "", fill = "Anno") +
scale_y_continuous(breaks = c(0, 0.33, 0.66, 1),
limits = c(0,1) ,
labels = c("Tot. in disaccordo", "In disaccordo", "D'accordo", "Tot. d'accordo")) +
ggtitle("Capacità socio-emotive - variazioni nel tempo")
I did the same for neighborhood environment. Violence in the previous year was measured on a dichotomous scales (yes, no), so I did the same for this year. Except for one variable, there was a reduction in violence.
s12p %>% pivot_longer(
starts_with(c("neigh_trust_s", "neigh_safeviolence_s", "neigh_treatworse_s"))
) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(
value = ifelse(number == "s1", value + 1, value),
value = ifelse(value > 4, NA, value)
) %>%
group_by(name, number) %>%
summarise(mean = (mean(value, na.rm = T)-1)/3) %>%
left_join(labels %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = mean,
fill = factor(number, levels = c("s1", "s2"),
labels = c("2019", "2020")))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
coord_flip() +
labs(x = "", y = "", fill = "Anno") +
scale_y_continuous(breaks = c(0, 0.33, 0.66, 1),
limits = c(0,1) ,
labels = c("Tot. in disaccordo", "In disaccordo", "D'accordo", "Tot. d'accordo")) +
ggtitle("Ambiente del quartiere - variazioni nel tempo")
s12p %>% pivot_longer(
starts_with(c("viol_hurt_s", "viol_shots_s", "viol_robbed_s", "viol_familyhurt_s"))
) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(
value = ifelse(value > 4, NA, value),
value = ifelse(number == "s2" & value >= 1, 1, value)
) %>%
group_by(name, number) %>%
summarise(mean = mean(value, na.rm = T)) %>%
left_join(labels %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = mean,
fill = factor(number, levels = c("s1", "s2"),
labels = c("2019", "2020")))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
coord_flip() +
labs(x = "", y = "", fill = "Anno") + y_percent() +
ggtitle("Esposizione alla violenza - variazioni nel tempo")
# parent_hw = parenting_1,
# parent_org_sch = parenting_2,
# parent_talk_parents = parenting_6,
# parent_grades = parenting_7,
# parent_work = parenting_8,
# child_labor_yn = worked_last_yr,
# school_useful = sch_useful,
# aspire_school = aspire_sch,
# expect_school = aspire_sch,
# self_diffsit = self_efficacy_3,
# self_howto = self_efficacy_1,
# self_improvelife = self_efficacy_2,
# shy_authority = shyness_2,
# emot_sad = emot_prob_1,
# emot_fears = emot_prob_2,
# emot_angry = conduct_prob_1,
# emot_controlothers = conduct_prob_3,
# emot_fight = conduct_prob_2,
# reciproc_positive = reciproc_positve,
# reciproc_negative = reciproc_negative,
# neigh_trust = neigh_trust,
# neigh_safeviolence = neigh_safe,
# neigh_treatworse = neigh_treat_bad,
# viol_hurt = violence_1,
# viol_shots = violence_2,
# viol_robbed = violence_3,
# viol_familyhurt = violence_5
s12p %>% pivot_longer(
starts_with(c("parent_org_sch_s", "parent_talk_parents_s", "parent_grades_s", "parent_work_s"))
) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
mutate(
value = ifelse(number == "s1", value + 1, value),
value = ifelse(value > 4, NA, value)
) %>%
group_by(name, number) %>%
summarise(mean = (mean(value, na.rm = T)-1)/3) %>%
left_join(labels %>% filter(value == 1)) %>%
ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
y = mean,
fill = factor(number, levels = c("s1", "s2"),
labels = c("2019", "2020")))) +
geom_col(position = position_dodge2(reverse = T)) +
scale_fill_ghibli_d(pal, direction = -1) +
coord_flip() +
labs(x = "", y = "", fill = "Anno") +
scale_y_continuous(breaks = c(0, 0.33, 0.66, 1),
limits = c(0,1) ,
labels = c("Mai o quasi mai", "A volte", "Spesso", "Sempre")) +
ggtitle("Supporto dai genitori - variazioni nel tempo")
s12p %>%
mutate(aspire_uni_s2 = case_when(
aspire_school_s2 == 6 ~ 1,
is.na(aspire_school_s2) ~ NA_real_,
aspire_school_s2 > 6 ~ NA_real_,
TRUE ~ 0
),
aspire_uni_s1 = ifelse(aspire_uni_s1 == 99, NA, aspire_uni_s1)) %>%
# mutate()
select(aspire_uni_s2, aspire_uni_s1) %>%
pivot_longer(aspire_uni_s2:aspire_uni_s1) %>%
separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
group_by(name, number) %>%
summarise(mean = mean(value, na.rm = T)) %>%
ggplot(aes(x = factor(number, levels = c("s1", "s2"),
labels = c("2019", "2020")),
y = mean)) +
geom_col(fill = col_base, width = 0.6) +
labs(x = "", y = "") + y_percent() +
scale_y_continuous(limits = c(0, 1), labels = scales::percent)+
ggtitle("Desidera andare all'università - variazioni nel tempo")