my_hist <- function(data, x) {
  data %>%
  ggplot(aes(x = {{x}})) + 
  geom_bar(aes(y = stat(count) / sum(count)), width = 0.9, fill = col_base) +
  scale_y_continuous("", labels = scales::percent)
}

sep_mult <- function(data) {
  data %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  mutate(number = as.numeric(number))
}
 group_share <- function(data){
    data %>% 
    group_by(name, number) %>% 
    summarise(share = sum(value, na.rm = T)/n())
  }
  
no_labs <-function() {
  labs(x = "", y = "", fill = "")
}

y_percent <- function() {
  scale_y_continuous("", labels = scales::percent)
}  

give_var_label <- function(var, data = labels) {
  data %>% filter(name == var) %>% pull(var_label) %>%  unique()
}

sh_by_value <- function(data){
  data %>% 
  group_by(name) %>% 
  count(value) %>% 
  mutate(share = n/sum(n))
} 

give_val_label <- function(var, val = 999, data = labels) {
  data %>% filter(name == var & value <= val) %>% pull(label)
}
bar_one_var <- function(data, x) {
  data %>% pivot_longer(all_of(x)) %>% 
    mutate(name = x) %>% 
    left_join(labels_p) %>% 
    ggplot(aes(x = fct_reorder(label %>% str_wrap(17), value))) + 
    geom_bar(aes(y = stat(count) / sum(count)), width = 0.9, fill = col_base) +
    no_labs() + y_percent() +
    ggtitle(give_var_label(x, labels_p))
}

standard <- function(x) {
  (x - mean(x, na.rm =T)) / sd(x, na.rm = T)
}
ds_raw <- haven::read_dta(here("data", "2020Estudiantes.dta"))
dp_raw <- haven::read_dta(here("data", "2020Padres.dta"))

var_labels <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 1)
val_labels <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 2)
var_labels_p <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 3)
val_labels_p <- read_xlsx(here("data", "var_val_labels.xlsx"), sheet = 4)
var_labels %<>% filter(str_detect(type, "select") | str_detect(type, "integer")) %>% 
  separate(type, into = c("type", "val_label_name"), sep = " ") %>% 
  select(-c(type, label)) %>% 
  rename(var_label = label_ita) %>% 
  mutate(name = str_replace_all(name, "_1", ""))
val_labels %<>%   select(-label) %>% 
  rename(val_label_name = list_name,
         label = label_ita) %>% 
  mutate(value = as.numeric(value))

labels <- left_join(var_labels, val_labels)

labels %<>% group_by(var_label) %>% mutate(var_nr = row_number()) %>% 
  filter(var_nr == 1) %>% select(var_label, var_nr) %>% ungroup() %>% 
  mutate(var_nr = row_number()) %>% 
  left_join(labels)

# parents
var_labels_p %<>% filter(str_detect(type, "select") | str_detect(type, "integer")) %>% 
  separate(type, into = c("type", "val_label_name"), sep = " ") %>% 
  select(-c(type, label)) %>% 
  rename(var_label = label_ita) %>% 
  mutate(name = str_replace_all(name, "_1", "")) %>% 
  mutate(
    val_label_name = ifelse(
      name == "support_help_child_whom_shouldve",
      "support_help_child_whom_shouldve", val_label_name))

val_labels_p %<>%   select(-label) %>% 
  rename(val_label_name = list_name,
         label = label_ita) %>% 
  mutate(value = as.numeric(value)) 

labels_p <- left_join(var_labels_p, val_labels_p)

labels_p %<>% group_by(var_label) %>% mutate(var_nr = row_number()) %>% 
  filter(var_nr == 1) %>% select(var_label, var_nr) %>% ungroup() %>% 
  mutate(var_nr = row_number()) %>% 
  left_join(labels_p)

Data cleaning

Combining data from various treatments (randomization of sections etc.)

#----Children
# drop pratice observations
ds <- ds_raw[-c(1:13), ] %>% zap_labels() %>% 
  mutate(row = row_number()) %>% 
  filter(id_stud != 0)

# function that I will use a a lot below
sep_pivot_wide <- function(data){
  separate(data, name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  select(-number) %>% 
  pivot_wider()
}


# combine together different treatments 
# separated by survye CTO into separate variables
ds <- ds %>% 
  # scenarios closed questions
  relocate(starts_with("s") & ends_with("_closed"), .before = "start_b") %>% 
  pivot_longer(c(sb1_closed:sa3_closed), values_drop_na = T) %>%
  mutate(name = str_replace_all(name, c("sb" = "s", "sa" = "s"))) %>% 
  pivot_wider() %>% 
  # scenarios open questions
  pivot_longer(c(start_b:end_a), values_drop_na = T) %>%
  filter(value != "") %>% 
  mutate(name = str_replace_all(
    name, c("sb" = "s", "sa" = "s", "_b" = "_s", "_a" = "_s"))
    ) %>%
  pivot_wider() %>%
  # manipulation checks
  pivot_longer(c(man_lively_1:man_mad_5), values_drop_na = T) %>% 
  sep_pivot_wide() %>% 
  # raven and stroop scores
  pivot_longer(starts_with("raven") | matches("^stroop[0-9]"), values_drop_na = T) %>% 
  sep_pivot_wide() %>% 
  # time raven and stroop
  pivot_longer(starts_with(c("time_raven", "start_raven", "end_raven",
                           "start_stroop", "stroop_time","end_stroop")), 
               values_drop_na = T) %>%
  filter(value != "") %>% 
  sep_pivot_wide() %>% 
  # emotions (character variables)
  pivot_longer(
    starts_with(c("start_depr", "end_depr", "start_emotions", "end_emotions")), 
    values_drop_na = T
  ) %>% 
  filter(value != "") %>% 
  sep_pivot_wide() %>%
  # emotions (numeric variables)
  pivot_longer(c(depr_nervous_1:reciproc_negative_1, 
                 depr_nervous_2:reciproc_negative_2),
               values_drop_na = T) %>% 
  sep_pivot_wide()

# Dropping and re-arranging variables
ds %<>% 
  select(-c(stroop_instr1_1:stroop_test_start_2)) %>% 
  relocate(c(formdef_version:endtime), .before = "id_stud") %>%
  select(-c(deviceid, subscriberid, simid, devicephonenum, formdef_version, key)) %>% 
  relocate(c(s1_closed:end_s), .after = "survey_version_check") %>% 
  relocate(c(man_happy:man_tired), .after = "end_man") %>%
  relocate(c(raven1:raven10), .after = "tests_order") %>%
  relocate(c(time_raven_1:end_raven), .after = "raven10") %>%
  relocate(c(stroop1:stroop20), .after = "time_raven_11") %>%
  relocate(c(start_stroop:end_stroop), .after = "stroop20") %>%
  relocate(c(start_depr:reciproc_negative), .after = "emotions_first") %>%    
# this variable had the wrong question
  select(-emot_angry)

#---- Parents (same steps)
# drop pratice observations
dp <- dp_raw[-c(1:11), ] %>% zap_labels() %>% 
  mutate(row = row_number()) %>% 
  filter(id_stud != 0)

# combine together different treatments 
# separated by survye CTO into separate variables
dp <- dp %>% 
  # scenarios closed questions
  relocate(starts_with("s") & ends_with("_closed"), .before = "start_b") %>% 
  pivot_longer(c(sb1_closed:sa3_closed), values_drop_na = T) %>%
  mutate(name = str_replace_all(name, c("sb" = "s", "sa" = "s"))) %>% 
  pivot_wider() %>% 
  # scenarios open questions
  pivot_longer(c(start_b:end_a), values_drop_na = T) %>%
  filter(value != "") %>% 
  mutate(name = str_replace_all(
    name, c("sb" = "s", "sa" = "s", "_b" = "_s", "_a" = "_s"))
    ) %>%
  pivot_wider() %>%
  # manipulation checks
  pivot_longer(c(man_lively_1:man_mad_5), values_drop_na = T) %>% 
  sep_pivot_wide() %>% 
  # raven and stroop scores
  pivot_longer(starts_with("raven") | matches("^stroop[0-9]"), values_drop_na = T) %>% 
  sep_pivot_wide() %>% 
  # time raven and stroop
  pivot_longer(starts_with(c("time_raven", "start_raven", "end_raven",
                           "start_stroop", "stroop_time","end_stroop")), 
               values_drop_na = T) %>%
  filter(value != "") %>% 
  sep_pivot_wide() %>% 
  # emotions (character variables)
  pivot_longer(
    starts_with(c("start_emotions", "end_emotions")), 
    values_drop_na = T
  ) %>% 
  filter(value != "") %>% 
  sep_pivot_wide() %>%
  # emotions (numeric variables)
  pivot_longer(c(depr_nervous_1:self_improvelife_1, 
                 depr_nervous_2:self_improvelife_2),
               values_drop_na = T) %>% 
  sep_pivot_wide()



dp %<>% 
  select(-c(stroop_instr1_1:stroop_test_start_2)) %>% 
  relocate(c(formdef_version:endtime), .before = "id_stud") %>%
  select(-c(deviceid, subscriberid, simid, devicephonenum, formdef_version, key)) %>% 
  relocate(c(s1_closed:end_s), .after = "end_demographics") %>% 
  relocate(c(man_happy:man_tired), .after = "end_man") %>%
  relocate(c(raven1:raven10), .after = "tests_order") %>%
  relocate(c(time_raven_1:end_raven), .after = "raven10") %>%
  relocate(c(stroop1:stroop20), .after = "time_raven_11") %>%
  relocate(c(start_stroop:end_stroop), .after = "stroop20") %>%
  relocate(c(start_emotions:self_improvelife), .after = "emotions_first")

Duplicates and nr of parents/students pairs

dup_id_s <- ds %>% get_dupes("id_stud") %>% pull(id_stud)
dup_id_p <- dp %>% group_by(id_stud) %>% filter(n() > 1) %>% nrow()

id_s <- ds %>% pull(id_stud) %>% sort()
id_p <- dp %>% pull(id_stud) %>% sort()

s_not_p <- setdiff(id_s, id_p)
p_not_s <- setdiff(id_p, id_s)
s_and_p <- intersect(id_s, id_p)

# without a match
s_n <- ds %>% filter(id_stud %in% s_not_p) %>% 
  select(id_stud, start_s, starttime, endtime, username, other_present:other_difficulties, everything())

p_n <- dp %>% filter(id_stud %in% p_not_s) %>% 
  select(id_stud,  start_s, starttime, endtime, username, other_present:other_difficulties, everything())

not <- bind_rows(s_n, p_n, .id = "_")

# not %>% arrange( start_s) %>% View()

# all together
s_all <- ds %>%
  select(id_stud, start_s, starttime, endtime, username, other_present:other_difficulties, everything())

p_all <- dp %>% 
  select(id_stud,  start_s, starttime, endtime, username, other_present:other_difficulties, everything())

all <- bind_rows(s_all, p_all, .id = "_")

# all %>% mutate(not = ifelse(id_stud %in% c(s_not_p, p_not_s), 1, 0 )) %>% 
  # relocate(not) %>% arrange(start_s) %>% View()

# For not we can add an asterix to the duplicates

ds %<>% group_by(id_stud) %>% 
  mutate(id_stud_temp = 
           ifelse(row_number() > 1, paste0(id_stud, "*"), id_stud)) %>% 
  ungroup() %>% mutate(id_stud = id_stud_temp)

There are 25/31 students/parents without their parents/children interviewed. This limits the analysis of correlation to 71 students-parents pairs.

There are also some duplicates (2 for students, and 1 for parents). Just looking at this data, for one duplicate we can probably identify which is the correct one because the parents was interviewed just after. For the other ones there is no way to tell, the parent with that id was not interviewed and there is no parent without a match interviewed in a near time windown. I will look at tests scores to see if at least we can tell which is the most plausible correct recording of the id.

Computing test scores

#---- Children
raven_correct <- c(2, 2, 4, 1, 5, 5, 3, 2, 3, 3)
n_obs <- nrow(ds)

ds %<>% 
  pivot_longer(c(raven1:raven10)) %>% 
  arrange(id_stud) %>%
  mutate(
    raven_correct = rep(raven_correct, time = n_obs),
    score = ifelse(value == raven_correct, 1, 0)
  ) %>% group_by(id_stud) %>%
  summarise(raven = sum(score, na.rm = TRUE)) %>% 
  right_join(ds)


p1 <- my_hist(ds, raven) + xlab("Raven")


stroop_correct <- c(2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1)

ds %<>% 
  pivot_longer(c(stroop1:stroop20)) %>% 
  arrange(id_stud) %>%
  mutate(
    stroop_correct = rep(stroop_correct, time = n_obs),
    score = ifelse(value == stroop_correct, 1, 0)
  ) %>% group_by(id_stud) %>%
  summarise(stroop = sum(score, na.rm = TRUE)) %>%
  right_join(ds)

p2 <- my_hist(ds, stroop) + xlab("Stroop")


#----- Parents
raven_correct <- c(5, 3, 6, 6, 7, 7, 1, 8, 2, 5)
n_obs <- nrow(dp)
dp %<>%
  pivot_longer(c(raven1:raven10)) %>% 
  arrange(id_stud) %>%
  mutate(
    raven_correct = rep(raven_correct, time = n_obs),
    score = ifelse(value == raven_correct, 1, 0)
  ) %>% group_by(id_stud) %>%
  summarise(raven = sum(score, na.rm = TRUE),
            raven99 = sum(value == 99),
            raven88 = sum(value == 88)) %>% 
  right_join(dp)
p3 <-my_hist(dp, raven) + xlab("Raven Parents")
p4 <- my_hist(dp, raven88)+ xlab("Raven DK Parents")
p5 <- my_hist(dp, raven99) + xlab("Raven NA Parents")
p6 <- dp %>% mutate(raven8899 = raven88 + raven99) %>% 
  my_hist(raven8899) + xlab("Raven DK + NA Parents")


stroop_correct <- c(2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1)
dp %<>% 
  pivot_longer(c(stroop1:stroop20)) %>% 
  arrange(id_stud) %>%
  mutate(
    stroop_correct = rep(stroop_correct, time = n_obs),
    score = ifelse(value == stroop_correct, 1, 0)
  ) %>% group_by(id_stud) %>%
  summarise(stroop = sum(score, na.rm = TRUE)) %>%
  right_join(dp)
p7 <-my_hist(dp, stroop) + xlab("Stroop Parents")


grid.arrange(p1, p2, p3, p7, p4, p5, p6, ncol = 2)

Students

A day in the life of a student

I will add the labels a bit later and arrange things nicer. For now, note that the other category (66) has a lot of responses. We need to code them, to see if they fit with existing answers or if are truly other.

# main activities
p1 <- ds %>% 
  pivot_longer(c(activ_morning_1:activ_morning_99,
                 activ_afternoon_1:activ_afternoon_99,
                 activ_evening_1:activ_evening_99)) %>% 
  sep_mult() %>% 
  group_share() %>%  
  left_join(labels %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
  geom_col(fill = col_base) +
  scale_y_continuous("", labels = scales::percent) +
  facet_wrap(~var_label) +
  coord_flip() +
  labs(x = "", y = "") +
  ggtitle("Attività giornaliere durante la quarantena")
  
# studying time
temp <- ds %>% 
  pivot_longer(starts_with("study")) %>% 
  mutate(value = as.numeric(value),
         value = ifelse(value > 0, 1, 0))

any_studying <- temp %>% group_by(id_stud) %>% 
  summarise(study = sum(value)) %>% 
  summarise(share = sum(study > 0)/n())

p2 <- temp %>% 
  group_by(name) %>% 
  count(value) %>% 
  mutate(share = n/sum(n)) %>% 
  filter(value == 1) %>% 
  arrange(desc(share)) %>% 
  ggplot(aes(
    x = factor(name, levels = name, labels = c('Mattina', 'Pomeriggio', 'Sera')), 
    y = share)) +
  geom_col(fill = col_base, width = 0.6) +
  ggtitle("Studiato o fatto i compiti") +
  no_labs() + y_percent()

grid.arrange(p1, p2, nrow = 3, layout_matrix = cbind(c(1,1,NA), c(1,1, 2)))

Access to materials and learning

# Access to books and guides
p_access <- ds %>% 
  pivot_longer(c(access_books_0:access_books_2)) %>% 
  sep_mult %>%  
  group_share() %>% 
  left_join(labels %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(str_wrap(label, 30)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(give_var_label("access_books")) +
  coord_flip() +
  no_labs() + y_percent()

# How many printed and solved
p_solved <- ds %>%
  pivot_longer(c(print_books_many, solved_boooks_many)) %>% 
  sh_by_value() %>% 
  filter(value < 88) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(label %>% str_wrap(30), value, .desc = T), y = share, fill = factor(var_label %>% str_wrap(30)))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  no_labs() + labs(fill = "") + y_percent() + ggtitle("Materiale didattico") +
  coord_flip() +
  theme(legend.position = c(0.7, 0.8),
                           legend.direction = "vertical",
        legend.background =  element_rect(colour = "transparent", fill = "white"))
  #theme(axis.text.x = element_text(angle = 90))


# How printed 
p_print_how <- ds %>% 
  pivot_longer(c(print_books_how_1:print_books_how_99)) %>% 
  sep_mult %>%  
  group_share() %>% 
  filter(number < 88) %>% 
  left_join(labels %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(give_var_label("print_books_how")) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

# How worke d books

# paper vs electronic
p_work_how <- ds %>% 
  pivot_longer(c(how_work_books_1:how_work_books_99)) %>% 
  sep_mult %>%  
  group_share() %>% 
  filter(number < 88) %>% 
  left_join(labels %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(give_var_label("how_work_books")) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

# on her own or with help

p_alone<- ds %>%
  pivot_longer(work_books_alone) %>% 
  count(value) %>%
  na.omit() %>% 
  mutate(share = n/sum(n)) %>%
  mutate(name = 'work_books_alone') %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(factor(str_wrap(label, 15)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(give_var_label("work_books_alone")) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

# books useful vs boring
p_difficult <- ds %>% 
  pivot_longer(c(books_difficult, books_useful)) %>% 
  group_by(name) %>% 
  count(value) %>%
  na.omit() %>% 
  mutate(share = n/sum(n)) %>% 
  filter(value < 88) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = factor(var_label), y = share, 
             fill = factor(value, levels = c(0, 1, 2), 
                           labels = c("Non difficile/interessante", 
                                      "Abbastanza difficile/interessante", 
                                      "Molto difficile/interessante"))
             )) +
  geom_col(position = position_dodge2()) +
  scale_fill_ghibli_d(pal, direction = -1) +
  ggtitle("Difficoltà/Utilità materiale didattico") +
  no_labs() + labs(fill = "") + y_percent() 
  

# Watched tv or listened to Radio?
p_tv <- ds %>% my_hist(factor(tv_educ, levels = c(0, 1, 2, 3), 
                      labels = give_val_label("tv_educ", 3))) +
  no_labs() + ggtitle(str_wrap(give_var_label("tv_educ"), 20))
  

# was it useful for learning
p_tv_useful <- ds %>% filter(!is.na(tv_educ_help)) %>% 
  my_hist(factor(tv_educ_help, levels = c(0, 1, 2), 
                      labels = give_val_label("tv_educ_help", 2))) +
  no_labs() + ggtitle(give_var_label("tv_educ_help"))

# Did other education activities
p_other_educ <- ds %>% filter(!is.na(other_educ)) %>% 
  my_hist(factor(other_educ, levels = c(0, 1, 2, 3), 
                      labels = give_val_label("other_educ", 3))) +
  no_labs() + ggtitle(str_wrap(give_var_label("other_educ"), 30))


# what other educ activ
p_what_other_educ <- ds %>% 
  pivot_longer(c(other_educ_activ_1:other_educ_activ_99)) %>% 
  sep_mult() %>% 
  filter(number <= 66) %>% 
  group_share() %>% 
  left_join(labels %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(str_wrap(give_var_label("other_educ_activ"), 20)) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()


grid.arrange(p_access, p_solved, nrow = 3, layout_matrix = cbind(c(1,2,2), c(NA,2, 2)))

grid.arrange(p_print_how, p_work_how, nrow = 2)

p_alone

p_difficult

grid.arrange(p_tv, p_tv_useful, p_other_educ, p_what_other_educ, nrow = 2)

Most students had access to at least some of the materials. Among those respondent that they did not have access, only one student said that he/she did not know they were giving these handbooks and could not print them. The others either did not give a reason or gave reasons hard to classify or understand well. 5 students reported not having worked the exercise book, one because there was no one to help, one blamed her/his own laziness, the others did not give an answer or one not listed.

Support and social contact

Parental, community and NGO support

ds %>% 
  pivot_longer(c(parent_hw:parent_day)) %>% 
  group_by(name) %>% 
  filter(value < 88) %>% 
  count(value) %>% 
  mutate(share = n/sum(n)) %>%
  left_join(labels) %>% 
  ggplot(aes(x = str_wrap(var_label, 30), y = share, fill = factor(str_wrap(label, 20)))) +
  geom_col(position = position_fill(reverse = TRUE)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  ggtitle("Supporto dai genitori") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent() + 
  theme(
    #legend.direction = "vertical",
        legend.position = "right")

ds %>% 
  pivot_longer(c(educ_support_com_yn, educ_support_ngo_yn)) %>% 
  group_by(name) %>% 
  filter(value < 88) %>% 
  count(value) %>% 
  mutate(share = n/sum(n)) %>%
  left_join(labels) %>% 
  ggplot(aes(x = str_wrap(var_label, 20), y = share, fill = factor(str_wrap(label, 20)))) +
  geom_col(position = position_fill(reverse = TRUE), width = 0.5) +
  scale_fill_ghibli_d(pal, direction = -1) +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent() + 
  ggtitle("Supporto scolastico dalla comunità e da organizzazioni") +
  theme(
    #legend.direction = "vertical",
        legend.position = "right")

Contact with professors and classmates

# contacted (by) teachers, classmates
ds %>% 
  pivot_longer(c(contact_prof_yn, contact_by_prof_yn, contact_classmates_yn)) %>% 
  group_by(name) %>% 
  summarise(share = sum(value == 1, na.rm = T)/n()) %>% 
  left_join(labels %>% filter(value == 1)) %>%
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share)) +
  geom_col(fill = col_base) +
  ggtitle("Contatto con insegnanti e compagni") +
  coord_flip() + no_labs() + y_percent()

# how often
ds %>% 
  pivot_longer(c(contact_prof_freq, contact_by_prof_freq, contact_classmates_freq)) %>% 
  group_by(name) %>% 
  count(value) %>% 
  na.omit() %>% 
  filter(value < 66) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share, 
             fill = factor(str_wrap(label, 20)))) +
  geom_col(position = position_fill(reverse = TRUE)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  ggtitle("Frequenza contatto") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

# how contacted
ds %>% 
  pivot_longer(starts_with(
    c("contact_prof_how_", "contact_by_prof_how_", "contact_classmates_how_")),
    values_drop_na = T) %>% 
  sep_mult() %>% 
  group_share() %>% 
  filter(number < 66) %>% 
  left_join(labels %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share, fill = factor(label))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_dodge2(reverse = T))  +
  coord_flip() +
  ggtitle("Modalità contatto") +
  labs(x = "", y = "", fill = "") + y_percent()

# why contacted?  
ds %>% 
  pivot_longer(starts_with(
    c("contact_prof_why_", "contact_by_prof_why_", "contact_classmates_why_")) &
      !ends_with("other"),
    values_drop_na = T) %>% 
  sep_mult() %>% 
  group_share() %>% 
  filter(number <= 66) %>% 
  left_join(labels %>% rename(number = value)) %>%  
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), y = share, fill = factor(number))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_dodge2(reverse = F))  +
  geom_text(aes(label = as.character(label), y = 0.5),
            position = position_dodge(.9), hjust = 0) +
  ggtitle("Ragione contatto") +
  coord_flip() +
  labs(x = "", y = "") + y_percent() +
  guides(fill = FALSE)

Socio-emotional skills

# Depression
ds %>%
  pivot_longer(c(depr_nervous:depr_afraid)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>% 
  filter(value < 66) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label, value))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Benessere emozionale") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

#  Self-efficacy
ds %>%
  pivot_longer(starts_with("self")) %>%
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 66) %>%
  mutate(share = n / sum(n)) %>%
  left_join(labels) %>%
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE)) +
  ggtitle("Autoefficacia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") +
  y_percent()

# other skills
ds %>%
  pivot_longer(shy_authority:reciproc_negative) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>% 
  filter(value < 66) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Altre capacità socio-emotive") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Family relations

ds %>%
  pivot_longer(c(rel_kids:rel_adults)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Relazioni familiari durante pandemia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

IT abilities

ds %>%
  pivot_longer(c(it_cellcall:it_pcarchive)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Capacità IT") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Covid practices

ds %>%
  pivot_longer(c(covid_hands:covid_touch)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Comportamenti dalla pandemia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Covid emotions

ds %>%
  pivot_longer(starts_with("covid_emot")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Emozioni riguardo alla pandemia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Soleterre

ds %>% 
  pivot_longer(starts_with("soleterre_") & !ends_with("other"),
  values_drop_na = T) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(number = as.numeric(number)) %>% 
  group_by(name, number) %>% 
  summarise(share = sum(value, na.rm = T)/n()) %>% 
  left_join(labels %>% rename(number = value)) %>% 
  filter(number < 66) %>% 
  ggplot(aes(x = fct_reorder(label %>% str_wrap(17), number), y = share)) +
  geom_col(fill = col_base)  +
  ggtitle(give_var_label("soleterre"))+
  # coord_flip() +
  labs(x = "", y = "") + y_percent()

Neighborhood

ds %>%
  pivot_longer(starts_with("neigh")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Ambiente del quartiere") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Violence

ds %>%
  pivot_longer(starts_with("viol_")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Esposizione alla violenza") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Child labor, educational aspirations/expectations

# "child_labor_yn"              
# [327] "child_labor_activ"            "child_labor_freq"            
# [329] "school_useful"                "aspire_school"               
# [331] "expect_school"                "expect_school_whynot"

child_labor_sh <- ds$child_labor_yn %>% tabyl() %>%  .[2, 3]
school_useful_sh <- ds$school_useful %>% tabyl() %>%  .[2, 3]
expect_sh <- ds$expect_school %>% tabyl()

ds %>% 
  pivot_longer(c(aspire_school),
  values_drop_na = T) %>% 
  count(value) %>% 
  mutate(share = n/sum(n, na.rm = T)) %>% 
  ggplot(aes(x = factor(
    value, levels = value, 
    labels =c("III ciclo", "Diploma generale",
              "Diploma tecnico-vocazionale", "Educazione superiore", "Altro") %>% 
      str_wrap(20)), y = share)
    ) +
  geom_col(fill = col_base)  +
  # coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent() +
  ggtitle(give_var_label("aspire_school"))

24% of children reported having contributed economically to the family in the past year, more than half of them at least once a week while a quarter every day. The most common activity is sewing hammocks or other forms of crafting.

90% think school is useful and essential for their future, the rest think it as useful but not essential. 89% believe it is likely or very likely that they will achieve the aspired educational level. Two students think it is unlikely or very unlikely (one did not know why, the other because teachers do not come to the school) while the rest did not know what to respond.

Parents

Demographic characteristics

dp %>% bar_one_var("female") 

dp %>% bar_one_var("to_child") 

dp %>% filter(to_head < 66) %>% bar_one_var("to_head") 

dp %>% my_hist(age) + no_labs() + 
    ggtitle("Età") +
  geom_vline(xintercept = mean(dp$age, na.rm = T), linetype = 2, colour="black")

dp %>% bar_one_var("educ") 

dp %>% bar_one_var("educ_high_hh") 

dp %>% my_hist(hhsize) + no_labs() + 
    ggtitle(give_var_label("hhsize", labels_p)) +
  scale_x_continuous(breaks = 0:9) +
  geom_vline(xintercept = mean(dp$hhsize, na.rm = T), linetype = 2, colour="black")

Economic situation

dp %>%
  pivot_longer(c(starts_with("asset") & !ends_with("nrooms"), internet)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  #filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>%
  filter(value == 1) %>% 
  left_join(labels_p %>% filter(value == 1)) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(fill = col_base)  +
  coord_flip() +
  ggtitle("Beni di proprietà") +
  no_labs() + y_percent()

dp %>% 
  pivot_longer(c(bef_nactiv, after_nactiv)) %>% 
  group_by(name) %>% 
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p %>% select(-value)) %>% 
  ggplot(aes(x = value, y = share, 
             fill = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = F))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_dodge2())  +
  ggtitle("Numero di persone con attività generatrici di reddito") +
  no_labs() + y_percent() +
  theme(legend.position = c(0.8, 0.7))

# received remit or help from gov
dp %>% 
  pivot_longer(c(remit_yn, govhelp_yn)) %>% 
  group_by(name) %>% 
  count(value) %>%
  # filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  filter(value == 1) %>% 
  left_join(labels_p %>% filter(value == 1)) %>%
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = F), y = share)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(fill = col_base, width = 0.6)  +
  ggtitle("Sostegno economico da parte del governo o rimesse") +
  no_labs() + y_percent() 

# what happened to main income, total income, remit etc
dp %>%
  pivot_longer(ends_with("_change")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  drop_na() %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  # ggplot(aes(x = name, y = share, fill = factor(value))) +
  # scale_fill_ghibli_d(pal, direction = -1) +
  # geom_col(position = position_fill(reverse = TRUE))  +
  # coord_flip() +
  # labs(x = "", y = "")

  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Cambiamento della situazione economica post pandemia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

# How they dealt with loss in income
dp %>% 
  pivot_longer(c(deal_with_loss_1:deal_with_loss_88),
    values_drop_na = T) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(number = as.numeric(number)) %>% 
  group_by(name, number) %>% 
  summarise(share = sum(value)/n()) %>% 
  left_join(labels_p %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label), share), y = share)) +
  geom_col(fill = col_base) +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent() +
  ggtitle(give_var_label("deal_with_loss", labels_p))

# types of support from gov or ngo
dp %>% 
  pivot_longer(c(help_gov_1:help_gov_99, help_ngo_1:help_ngo_99),
    values_drop_na = T) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(number = as.numeric(number)) %>% 
  group_by(name, number) %>% 
  summarise(share = sum(value)/n()) %>%
  filter(number <= 66) %>% 
  left_join(labels_p %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(give_var_label("help_gov", labels_p)) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

# not enough money for meds, and worries about enough food
dp %>% mutate(enoug_medicine = ifelse(enoug_medicine == 0, 1, 0)) %>% 
  pivot_longer(c(enoug_medicine, worry_food)) %>% 
  group_by(name) %>% 
  summarise(share = sum(value)/n()) %>%
  left_join(labels_p %>% select(-value)) %>% 
  distinct(name, .keep_all = T) %>% 
  ggplot(aes(x = var_label, y = share)) + 
  geom_col(fill = col_base) +
  ggtitle("Preoccupazioni") +
  no_labs() + labs(fill = "") + y_percent()

Worries

dp %>% 
  pivot_longer(starts_with("worries")) %>% 
  group_by(name) %>% 
  count(value) %>%
  filter(value < 4) %>% 
  mutate(share = n/n()) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Preoccupazioni") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Educational activities of pupils

# types of support from gov or ngo
dp %>% 
  pivot_longer(c(activ_educ_0:activ_educ_99),
    values_drop_na = T) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(number = as.numeric(number)) %>% 
  group_by(name, number) %>% 
  summarise(share = sum(value)/n()) %>%
  filter(number <= 66) %>% 
  left_join(labels_p %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(paste(give_var_label("activ_educ", labels_p), "educative")) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

dp %>% 
  pivot_longer(c(activ_leisure_0:activ_leisure_99),
    values_drop_na = T) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(number = as.numeric(number)) %>% 
  group_by(name, number) %>% 
  summarise(share = sum(value)/n()) %>%
  filter(number <= 66) %>% 
  left_join(labels_p %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(paste(give_var_label("activ_leisure", labels_p), "ricreative")) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

mean_time_hw <- dp %>% filter(time_hw < 77) %>% summarise(mean = mean(as.numeric(time_hw), na.rm = T)) %>% pull(mean)
dp %>% filter(time_hw < 77) %>% 
  mutate(time_hw = as.numeric(time_hw)) %>% 
  #group_by(time_hw) %>% 
  count(time_hw) %>% 
  mutate(share = n/sum(n)) %>% 
  ggplot(aes(x = time_hw, y = share)) + 
  geom_col(fill = col_base) +
  ggtitle(give_var_label("time_hw", labels_p)) +
  scale_x_continuous(breaks = seq(0, 5.5, 0.5), labels = paste(seq(0, 5.5, 0.5), "h")) + 
  no_labs() + y_percent() + 
  geom_vline(xintercept = mean_time_hw , linetype = 2, colour="black")

mean_time_help <- dp %>% filter(time_help < 77) %>% summarise(mean = mean(as.numeric(time_help), na.rm = T)) %>% pull(mean)
dp %>% filter(time_help < 77) %>% 
  mutate(time_help = as.numeric(time_help)) %>% 
  #group_by(time_hw) %>% 
  count(time_help) %>% 
  mutate(share = n/sum(n)) %>% 
  ggplot(aes(x = time_help, y = share)) + 
  geom_col(fill = col_base) +
  ggtitle(give_var_label("time_help", labels_p)) +
  scale_x_continuous(breaks = seq(0, 5.5, 0.5), labels = paste(seq(0, 5.5, 0.5), "h")) + 
  no_labs() + y_percent() +
  geom_vline(xintercept = mean_time_help , linetype = 2, colour="black")

support_help_child <- tabyl(dp$support_help_child)

# in contact with teachers
dp %>% filter(contact_teachers_yn < 88) %>% bar_one_var("contact_teachers_yn")

#teachers revised hw
dp %>% filter(contact_teachers_yn < 88) %>% bar_one_var("verify_hw_much")

75% of parents reported that they did not receive any support on how to help the pupil solve the exercise books. They belive the school (primarily) and public institutions should have provided support. Among those who received support, the teacher was the main source for most of them.

Socio-emotional skills

# Depression
dp %>%
  pivot_longer(c(depr_nervous:depr_afraid)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>% 
  filter(value < 66) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label, value))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Benessere emozionale") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

#  Self-efficacy
dp %>%
  pivot_longer(starts_with("self")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>% 
  filter(value < 66) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Autoefficacia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Family relations

dp %>%
  pivot_longer(c(rel_children:rel_community)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Relazioni familiari") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Covid practices

test_covid <- dp %>% tabyl(test_covid_yn)
test_result <-dp %>% tabyl(test_covid_result)

In 0% of household, at least one person did the virus test. In 2 households the person/s tested was/were found positive.

dp %>%
  pivot_longer(c(covid_hands:covid_market)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Comportamenti dalla pandemia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Satisfaction with local and national authorities

dp %>%
  pivot_longer(ends_with("_satisfy")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  drop_na() %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), y = share, fill = fct_reorder(label %>% str_wrap(17), value))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE), width = 0.5)  +
  ggtitle("Soddisfazione misure anti covid") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

# Why not satisfied with local authorities
dp %>% 
  pivot_longer(c(covid_loc_auth_notsatisfy_1:covid_loc_auth_notsatisfy_99),
    values_drop_na = T) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(number = as.numeric(number)) %>% 
  group_by(name, number) %>% 
  summarise(share = sum(value)/n()) %>%
  filter(number <= 66) %>% 
  left_join(labels_p %>% rename(number = value)) %>% 
  ggplot(aes(x = fct_reorder(factor(label %>% str_wrap(20)), share), y = share)) +
  geom_col(fill = col_base) +
  ggtitle(give_var_label("covid_loc_auth_notsatisfy", labels_p)) +
  no_labs() + labs(fill = "") + y_percent() +
  coord_flip()

Covid emotions

dp %>%
  pivot_longer(starts_with("covid_emot")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  na.omit() %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Emozioni riguardo alla pandemia") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Neighborhood

dp %>%
  pivot_longer(starts_with("neigh")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 40), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Ambiente del quartiere") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Violence

dp %>%
  pivot_longer(starts_with("viol_")) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 20), var_nr, .desc = T), 
             y = share, fill = fct_reorder(label %>% str_wrap(17), value)))  +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_fill(reverse = TRUE))  +
  ggtitle("Esposizione alla violenza") +
  coord_flip() +
  labs(x = "", y = "", fill = "") + y_percent()

Subjective SES

# dp %>% tabyl(ladder_now)

dp %>%
  pivot_longer(c(ladder_now, ladder_before, ladder_in1yr)) %>% 
  # separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>%
  group_by(name) %>%
  count(value) %>%
  filter(value < 77) %>% 
  mutate(share = n/sum(n)) %>% 
  left_join(labels_p %>% select(-value)) %>% 
  ggplot(aes(x = value, y = share, 
             fill = fct_relevel(str_wrap(var_label, 20), "Livello pre-covid"))) +
  scale_fill_ghibli_d(pal, direction = -1) +
  geom_col(position = position_dodge2()) +
  scale_x_continuous(breaks = seq(0, 10, 2)) +
  ggtitle("Stato socioeconomico soggettivo") + 
  #coord_flip() +
  labs(x = "Scala socio-economica (1 condizione peggiore, 10 condizione migliore)", y = "", fill = "") + y_percent()

Combining all 3 datasets

load(here("data", "yr1.RData"))

df %<>% 
  rename(
    parent_hw   = parenting_1,
    parent_org_sch = parenting_2,
    parent_talk_parents =   parenting_6,
    parent_grades   = parenting_7,
    parent_work =   parenting_8,
    child_labor_yn =    worked_last_yr,
    school_useful   = sch_useful,
    aspire_school   = aspire_sch,
    expect_school   = aspire_sch,
    self_diffsit    = self_efficacy_3,
    self_howto  = self_efficacy_1,
    self_improvelife    = self_efficacy_2,
    shy_authority   = shyness_2,
    emot_sad    = emot_prob_1,
    emot_fears  = emot_prob_2,
    emot_angry  = conduct_prob_1,
    emot_controlothers  = conduct_prob_3,
    emot_fight  = conduct_prob_2,
    reciproc_positive   = reciproc_positve,
    reciproc_negative   = reciproc_negative,
    neigh_trust = neigh_trust,
    neigh_safeviolence  = neigh_safe,
    neigh_treatworse    = neigh_treat_bad,
    viol_hurt   = violence_1,
    viol_shots  = violence_2,
    viol_robbed = violence_3,
    viol_familyhurt = violence_5
  ) %>% mutate(survey = "s0") %>% 
  zap_labels()


dt <- full_join(
  ds %>% 
    set_colnames(paste0(colnames(ds), "_s2")) %>% 
    rename(id_stud = id_stud_s2) %>% 
    mutate(s2 = 1),
  df %>% mutate(id_stud = as.character(id_stud)) %>% 
    set_colnames(paste0(colnames(df), "_s1")) %>% 
    rename(id_stud = id_stud_s1) %>% 
    mutate(s1 = 1),
  by = "id_stud"
)
dt <- full_join(
  dt,
  dp %>% set_colnames(paste0(colnames(dp), "_p2")) %>% 
    rename(id_stud = id_stud_p2) %>% 
    mutate(p2 = 1),
  by = "id_stud"
)


# re-arrange variables 
# dt %<>% 
#   relocate(c(username_s1:id_stud_check_s1,
#              school_s0:encues_id_s0,
#              username_p1:endtime_p1,
#              female_s0:years_at_school_s0,
#              hh_size_s0:bathroom_s0,
#              address_p1:internet_p1,
#              bef_nactiv_p1:help_ngo_other_p1,
#              ses_now_s0:ses_4yrs_s0,
#              ), .after = "id_stud") %>%
#   relo
#   relocate(c(s1_closed:end_s), .after = "end_demographics") %>% 
#   relocate(c(man_happy:man_tired), .after = "end_man") %>%
#   relocate(c(raven1:raven10), .after = "tests_order") %>%
#   relocate(c(time_raven_1:end_raven), .after = "raven10") %>%
#   relocate(c(stroop1:stroop20), .after = "time_raven_11") %>%
#   relocate(c(start_stroop:end_stroop), .after = "stroop20") %>%
#   relocate(c(start_emotions:self_improvelife), .after = "emotions_first")

s12 <- dt %>% mutate(temp = s1 + s2) %>% 
  filter(temp == 2) %>% select(-temp)

sp <- dt %>% mutate(temp = p2 + s2) %>% 
  filter(temp == 2) %>% select(-temp)

s12p <- dt %>% mutate(temp = s1 + p2 + s2) %>% 
  filter(temp == 3) %>% select(-temp)

Correlational analysis

We are left with 68 student yr1 - student yr2 - parent yr 2 pairs. So all the folowing analysis has that many observations.

# Study time summing up across the day
s12p %<>% 
  pivot_longer(starts_with("study")) %>% 
  group_by(id_stud) %>% 
  summarise(
    study_time_s2 = sum(as.numeric(value))
  ) %>% right_join(s12p)

# Books solved
mean_v <- s12p %>% filter(solved_boooks_many_s2 < 88) %>% 
  pull(solved_boooks_many_s2) %>% mean(., na.rm = T)

s12p %<>% 
  mutate(
    across(solved_boooks_many_s2, 
           ~ifelse(.x == 88, mean_v, .x))
  )

# Number of assets
s12p %<>% 
  pivot_longer(c(starts_with("asset") & !ends_with("nrooms_p2"), internet_p2)) %>% 
  group_by(name) %>% 
  mutate(
    across(value, 
           ~ ifelse(.x == 88, 
                    round(mean(replace(.x, .x == 88, NA), na.rm = TRUE)), 
                    .x)
           )) %>% 
  group_by(id_stud) %>% 
    summarise(
    assets_p2 = sum(as.numeric(value))
  ) %>% right_join(s12p)

# Income loss during pandemic
s12p %<>% 
  mutate(
    age_s2 = age_s1 + 1,
    loss_inc_p2 = ifelse(total_inc_change_p2 > 2, 1, 0) 
  )

# Re-coding maximum level of education
s12p %<>% 
  mutate(
    educ_basica = ifelse(educ_high_hh_p2 <= 3, 1, 0)
  )

# self-efficacy at baseline

s12p %<>% pivot_longer(starts_with("self") & ends_with("_s1")) %>% 
  group_by(id_stud) %>% 
  summarise(self = 10 * mean(value)/4) %>% 
  right_join(s12p)

# emotional wellbeing parent and student

s12p %<>% select(id_stud, starts_with("depr") ) %>% 
  pivot_longer(starts_with("depr")) %>%
  separate(name, into = c("name", "resp"), sep="_(?=[^_]+$)") %>% 
  mutate(
    value = ifelse(value > 4, NA, value),
    value = abs(value - 4)) %>% 
  group_by(name, resp) %>% 
  mutate(value = ifelse(is.na(value), mean(value, na.rm = T), value)) %>% 
  group_by(id_stud, resp) %>% 
  summarise(depr = 10*mean(value)/3) %>% 
  pivot_wider(names_from = resp, values_from = depr, names_prefix = "depr_") %>% 
  ungroup() %>% right_join(s12p)

Correlates of amount of study time during the pandemic

I recoded variables such as self-efficacy and emotional well-being (scores), to vary from 0 to 10, 0 being the lowest possible score on the scale and 10 the highest. For now, in the models I used the Raven (fluid intelligence test) at baseline as control for capacita cognitiva. With the Stroop there was nothing. The asset variable is the sum of the 11 asset dummies. I recoded the maximum level of education into a dummy (1 for basic education - grades 1 to 9 in el salvador, and 0 for highschool and university). I also recoded the loss of income into a dummy (1 if lost or completely lost). In this model the dependent variables is continuous indicated the number of daily hours of study.

p_age <- s12p %>% 
  ggplot(aes(x = age_s2, y = study_time_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Età") + ylab("")

p_educ <- s12p %>% 
  group_by(educ_basica) %>% 
  summarise(mean = mean(study_time_s2, na.rm = T)) %>% 
ggplot(aes(x = factor(educ_basica, c(1,0), labels = c("Istruzione di \n base (1-9)", "Istruzione media \n e superiore")), y = mean)) +
 geom_col(fill = col_base) +
 xlab("Massimo livello di studio in famiglia") + ylab("")

p_asset <- s12p %>% 
  ggplot(aes(x = assets_p2, y = study_time_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Numero di beni di proprietà (0-11)") + ylab("")

p_loss <- s12p %>% 
  group_by(loss_inc_p2) %>% 
  summarise(mean = mean(study_time_s2, na.rm = T)) %>% 
  ggplot(aes(x = factor(loss_inc_p2, c(0,1), 
                        c("No", "Sì")),
             y = mean)) +
  geom_col(fill = col_base) +
  xlab("Riduzione del reddito durante la pandemia") + ylab("")

p_raven <- s12p %>% 
  ggplot(aes(x = raven_s1, y = study_time_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Capacità cognitiva") + ylab("")

p_self <- s12p %>% 
  ggplot(aes(x = self, y = study_time_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Autoefficacia") + ylab("")

grid.arrange(p_age, p_asset, p_educ,  p_loss, p_raven, p_self, ncol = 2,
             top = textGrob("Numero di ore di studio giornaliere durante \n la pandemia e caratteristiche dello studente/della famiglia"))

vars <- c("female_s1", "age_s2", "raven_s1", "self", "educ_basica", "hhsize_p2", 
       "assets_p2" , "loss_inc_p2")
labels_vars <- str_wrap(c("Femmina", "Età", "Capacità cognitiva 2019", "Autoefficacia 2019", "Massimo livello di studio in famiglia - istruzione di base (1-9)", "Dimensione famiglia", "Numero di beni di proprietà (0-11)", "Riduzione del reddito durante la pandemia"), 30)

s12p %>% 
  lm(study_time_s2 ~  female_s1 + age_s2 + raven_s1 + self + educ_basica +  hhsize_p2 + 
       assets_p2 + loss_inc_p2  , data = .) %>% broom::tidy() %>% 
    mutate(ci = 1.96 * std.error) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(
    term = factor(
      term, 
      levels = vars,
      labels = labels_vars)
  ) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate)) +
  geom_point(color = col_base, size =2) + 
  geom_errorbar(
    aes(
      ymin = estimate - ci,
      ymax = estimate + ci
    ),
    size = .8,
    width = .005,
    color = col_base) +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = 2) +
  xlab("") + ylab("Coefficiente")

Correlates of amount of exercise books solved during the pandemic

Even though not really correct, I kept the variables as continuous to have a bit more variability.

p_age <- s12p %>% 
  ggplot(aes(x = age_s2, y = solved_boooks_many_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Età") + ylab("")

p_educ <- s12p %>% 
  group_by(educ_basica) %>% 
  summarise(mean = mean(solved_boooks_many_s2, na.rm = T)) %>% 
ggplot(aes(x = factor(educ_basica, c(1,0), labels = c("Istruzione di \n base (1-9)", "Istruzione media \n e superiore")), y = mean)) +
 geom_col(fill = col_base) +
 xlab("Massimo livello di studio in famiglia") + ylab("")

p_asset <- s12p %>% 
  ggplot(aes(x = assets_p2, y = solved_boooks_many_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Numero di beni di proprietà (0-11)") + ylab("")

p_loss <- s12p %>% 
  group_by(loss_inc_p2) %>% 
  summarise(mean = mean(solved_boooks_many_s2, na.rm = T)) %>% 
  ggplot(aes(x = factor(loss_inc_p2, c(0,1), 
                        c("No", "Sì")),
             y = mean)) +
  geom_col(fill = col_base) +
  xlab("Riduzione del reddito durante la pandemia") + ylab("")

p_raven <- s12p %>% 
  ggplot(aes(x = raven_s1, y = solved_boooks_many_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Capacità cognitiva") + ylab("")

p_self <- s12p %>% 
  ggplot(aes(x = self, y = solved_boooks_many_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Autoefficacia") + ylab("")

grid.arrange(p_age, p_asset, p_educ,  p_loss, p_raven, p_self, ncol = 2,
             top = textGrob("Quantità materiale didattico risolto (1 - nessuno, 5 - tutti) durante \n la pandemia e caratteristiche dello studente/della famiglia"))

s12p %>% 
  lm(solved_boooks_many_s2 ~  female_s1 + age_s2 + raven_s1 + self + educ_basica +  hhsize_p2 + 
       assets_p2 + loss_inc_p2  , data = .) %>% broom::tidy() %>% 
    mutate(ci = 1.96 * std.error) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(
    term = factor(
      term, 
      levels = vars,
      labels = labels_vars)
  ) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate)) +
  geom_point(color = col_base, size =2) + 
  geom_errorbar(
    aes(
      ymin = estimate - ci,
      ymax = estimate + ci
    ),
    size = .8,
    width = .005,
    color = col_base) +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = 2) +
  xlab("") + ylab("Coefficiente")

Correlates of emotional well-being

Higher values indicate higher well-being (I reverse the scale, not to call it depression)

p_depr <- s12p %>% 
  ggplot(aes(x = depr_p2, y = depr_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Benessere emotivo del genitore") + ylab("")

p_age <- s12p %>% 
  ggplot(aes(x = age_s2, y = depr_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Età") + ylab("")

p_educ <- s12p %>% 
  group_by(educ_basica) %>% 
  summarise(mean = mean(depr_s2, na.rm = T)) %>% 
ggplot(aes(x = factor(educ_basica, c(1,0), labels = c("Istruzione di \n base (1-9)", "Istruzione media \n e superiore")), y = mean)) +
 geom_col(fill = col_base) +
 xlab("Massimo livello di studio in famiglia") + ylab("")

p_asset <- s12p %>% 
  ggplot(aes(x = assets_p2, y = depr_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Numero di beni di proprietà (0-11)") + ylab("")

p_loss <- s12p %>% 
  group_by(loss_inc_p2) %>% 
  summarise(mean = mean(depr_s2, na.rm = T)) %>% 
  ggplot(aes(x = factor(loss_inc_p2, c(0,1), 
                        c("No", "Sì")),
             y = mean)) +
  geom_col(fill = col_base) +
  xlab("Riduzione del reddito durante la pandemia") + ylab("")

p_raven <- s12p %>% 
  ggplot(aes(x = raven_s1, y = depr_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Capacità cognitiva") + ylab("")

p_self <- s12p %>% 
  ggplot(aes(x = self, y = depr_s2)) +
  geom_point(col = col_base) +
  geom_smooth(method = "lm", se = F, col = "black") +
  xlab("Autoefficacia") + ylab("")
blank <- grid.rect(gp=gpar(col="white"))

grid.arrange(p_depr, blank, p_age, p_asset, p_educ,  p_loss, p_raven, p_self, ncol = 2,
             top = textGrob("Benessere emotivo dello studente \n e caratteristiche dello studente/della famiglia"))

s12p %>% 
  lm(depr_s2 ~  depr_p2 + female_s1 + age_s2 + raven_s1 + self + educ_basica +  hhsize_p2 + 
       assets_p2 + loss_inc_p2  , data = .) %>% broom::tidy() %>% 
    mutate(ci = 1.96 * std.error) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(
    term = factor(
      term, 
      levels = c("depr_p2", vars),
      labels = c("Benessere emotivo del genitore", labels_vars))
  ) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate)) +
  geom_point(color = col_base, size =2) + 
  geom_errorbar(
    aes(
      ymin = estimate - ci,
      ymax = estimate + ci
    ),
    size = .8,
    width = .005,
    color = col_base) +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = 2) +
  xlab("") + ylab("Coefficiente")

Changes across time

Cognitive tests

Here I recoded also the Stroop to go from 0 to 10, as Raven. The vertical lines are the means for each year.

means <- s12p %>% 
  pivot_longer(c(raven_s1, raven_s2)) %>% 
  group_by(name) %>% 
  summarise(mean = mean(value, na.rm = T))

s12p %>% 
  pivot_longer(c(raven_s1, raven_s2)) %>% 
  group_by(name) %>% 
  count(value) %>% mutate(share = n/sum(n)) %>% 
  ggplot(aes(x = value, y = share, fill = factor(name, labels = c("2019", "2020")))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  scale_color_ghibli_d(pal, direction = -1) +
  no_labs() + labs(fill = "Anno") + y_percent() +
  scale_x_continuous(breaks = 0:10) +
  geom_vline(data = means, aes(xintercept = mean, color = name), show.legend = F) +
  ggtitle("Capacità cognitive (intelligenza fluida) - variazioni nel tempo")

means <- s12p %>% 
  pivot_longer(c(stroop_s1, stroop_s2)) %>% 
  group_by(name) %>% 
  summarise(mean = mean(value, na.rm = T)/2)
s12p %>% 
  pivot_longer(c(stroop_s1, stroop_s2)) %>% 
  group_by(name) %>% 
  count(value) %>% mutate(share = n/sum(n)) %>% 
  ggplot(aes(x = value/2, y = share, 
             fill = factor(name, labels = c("2019", "2020")))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  scale_color_ghibli_d(pal, direction = -1) +
  no_labs() + labs(fill = "Anno") + y_percent() +
  scale_x_continuous(breaks = 0:10) +
  geom_vline(data = means, aes(xintercept = mean, color = name), show.legend = F) +
  ggtitle("Capacità cognitive (controllo degli impulsi) - variazioni nel tempo")

Socio-emotional

I focus only on items which were repeated in the 2nd year. All were measured on a 4 item Likert. I computed an average treating the variables as continous (1 tot. disagree, 4 tot. agree). In my opinion it is not hard to read the figure. Overall, there is very little change across the waves.

s12p %>% pivot_longer(
  starts_with(c("self_diffsit_s", "self_howto_s", "self_improvelife_s",
                "emot_sad", "emot_fear", "emot_controlothers", "emot_fight",
                "shy_authority", "reciproc_"))
  ) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(
    value = ifelse(number == "s1", value + 1, value),
    value = ifelse(value > 4, NA, value)
  ) %>% 
  group_by(name, number) %>% 
  summarise(mean = (mean(value, na.rm = T)-1)/3) %>% 
  left_join(labels %>% filter(value == 1)) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
             y = mean, 
             fill = factor(number, levels = c("s1", "s2"), 
                           labels = c("2019", "2020")))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  coord_flip() +
  labs(x = "", y = "", fill = "Anno") +
  scale_y_continuous(breaks = c(0, 0.33, 0.66, 1),
                     limits = c(0,1) , 
                     labels = c("Tot. in disaccordo", "In disaccordo", "D'accordo", "Tot. d'accordo")) +
  ggtitle("Capacità socio-emotive - variazioni nel tempo")

Neighborhood and violence

I did the same for neighborhood environment. Violence in the previous year was measured on a dichotomous scales (yes, no), so I did the same for this year. Except for one variable, there was a reduction in violence.

s12p %>% pivot_longer(
  starts_with(c("neigh_trust_s", "neigh_safeviolence_s", "neigh_treatworse_s"))
  ) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(
    value = ifelse(number == "s1", value + 1, value),
    value = ifelse(value > 4, NA, value)
  ) %>% 
  group_by(name, number) %>% 
  summarise(mean = (mean(value, na.rm = T)-1)/3) %>% 
  left_join(labels %>% filter(value == 1)) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
             y = mean, 
             fill = factor(number, levels = c("s1", "s2"), 
                           labels = c("2019", "2020")))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  coord_flip() +
  labs(x = "", y = "", fill = "Anno") +
  scale_y_continuous(breaks = c(0, 0.33, 0.66, 1),
                     limits = c(0,1) , 
                     labels = c("Tot. in disaccordo", "In disaccordo", "D'accordo", "Tot. d'accordo")) +
  ggtitle("Ambiente del quartiere - variazioni nel tempo")

s12p %>% pivot_longer(
  starts_with(c("viol_hurt_s", "viol_shots_s", "viol_robbed_s", "viol_familyhurt_s"))
  ) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(
    value = ifelse(value > 4, NA, value),
    value = ifelse(number == "s2" & value >= 1, 1, value)
  ) %>% 
  group_by(name, number) %>% 
  summarise(mean = mean(value, na.rm = T)) %>% 
  left_join(labels %>% filter(value == 1)) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
             y = mean, 
             fill = factor(number, levels = c("s1", "s2"), 
                           labels = c("2019", "2020")))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  coord_flip() +
  labs(x = "", y = "", fill = "Anno") + y_percent() +
  ggtitle("Esposizione alla violenza - variazioni nel tempo")

# parent_hw = parenting_1,
    # parent_org_sch = parenting_2,
    # parent_talk_parents = parenting_6,
    # parent_grades = parenting_7,
    # parent_work = parenting_8,
    # child_labor_yn =  worked_last_yr,
    # school_useful = sch_useful,
    # aspire_school = aspire_sch,
    # expect_school = aspire_sch,
    # self_diffsit  = self_efficacy_3,
    # self_howto    = self_efficacy_1,
    # self_improvelife  = self_efficacy_2,
    # shy_authority = shyness_2,
    # emot_sad  = emot_prob_1,
    # emot_fears    = emot_prob_2,
    # emot_angry    = conduct_prob_1,
    # emot_controlothers    = conduct_prob_3,
    # emot_fight    = conduct_prob_2,
    # reciproc_positive = reciproc_positve,
    # reciproc_negative = reciproc_negative,
    # neigh_trust   = neigh_trust,
    # neigh_safeviolence    = neigh_safe,
    # neigh_treatworse  = neigh_treat_bad,
    # viol_hurt = violence_1,
    # viol_shots    = violence_2,
    # viol_robbed   = violence_3,
    # viol_familyhurt   = violence_5

Parental support

s12p %>% pivot_longer(
  starts_with(c("parent_org_sch_s", "parent_talk_parents_s", "parent_grades_s", "parent_work_s"))
  ) %>%
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  mutate(
    value = ifelse(number == "s1", value + 1, value),
    value = ifelse(value > 4, NA, value)
  ) %>% 
  group_by(name, number) %>% 
  summarise(mean = (mean(value, na.rm = T)-1)/3) %>% 
  left_join(labels %>% filter(value == 1)) %>% 
  ggplot(aes(x = fct_reorder(str_wrap(var_label, 30), var_nr, .desc = T),
             y = mean, 
             fill = factor(number, levels = c("s1", "s2"), 
                           labels = c("2019", "2020")))) +
  geom_col(position = position_dodge2(reverse = T)) +
  scale_fill_ghibli_d(pal, direction = -1) +
  coord_flip() +
  labs(x = "", y = "", fill = "Anno") +
  scale_y_continuous(breaks = c(0, 0.33, 0.66, 1),
                     limits = c(0,1) , 
                     labels = c("Mai o quasi mai", "A volte", "Spesso", "Sempre")) +
  ggtitle("Supporto dai genitori - variazioni nel tempo")

Educational aspirations

s12p %>% 
  mutate(aspire_uni_s2 = case_when(
    aspire_school_s2 == 6 ~ 1,
    is.na(aspire_school_s2) ~ NA_real_,
    aspire_school_s2 > 6 ~ NA_real_,
    TRUE ~ 0
  ),
  aspire_uni_s1 = ifelse(aspire_uni_s1 == 99, NA, aspire_uni_s1)) %>% 
 # mutate()
  select(aspire_uni_s2, aspire_uni_s1) %>% 
  pivot_longer(aspire_uni_s2:aspire_uni_s1) %>% 
  separate(name, into = c("name", "number"), sep="_(?=[^_]+$)") %>% 
  group_by(name, number) %>% 
  summarise(mean = mean(value, na.rm = T)) %>% 
  ggplot(aes(x = factor(number, levels = c("s1", "s2"), 
                           labels = c("2019", "2020")),
             y = mean)) +
  geom_col(fill = col_base, width = 0.6) +
  labs(x = "", y = "") + y_percent() +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent)+ 
  ggtitle("Desidera andare all'università - variazioni nel tempo")