1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

  • Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
  • Social contacts: people and/or groups frequented by participants
  • Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities
VERITAS entities

New participants and returning participants are presented separately below, as they were presented two slightly different question flows.

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

home_location <- locations[locations$location_category == 1, ]

## version ggmap
mtl_aoi <- st_bbox(home_location)
names(mtl_aoi) <- c("left", "bottom", "right", "top")
mtl_aoi[["left"]] <- mtl_aoi[["left"]] - .07
mtl_aoi[["right"]] <- mtl_aoi[["right"]] + .07
mtl_aoi[["top"]] <- mtl_aoi[["top"]] + .01
mtl_aoi[["bottom"]] <- mtl_aoi[["bottom"]] - .01

bm <- get_stadiamap(mtl_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Montréal 99
Beaconsfield 2
Dollard-Des Ormeaux 2
Brossard 1
Côte-Saint-Luc 1
Kirkland 1
Laval 1
Longueuil 1
Montréal-Est 1
Pointe-Claire 1

2.1.2 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 80
2 [No] 30

2.1.3 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I work…
working n
1 [From home only] 10
2 [Both from home and away from home] 45
3 [Away from home only] 25
N/A 30

2.1.4 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.5 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 21
2 [No] 89

2.1.6 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I study…
studying n
1 [From home only] 5
2 [Both from home and away from home] 9
3 [Away from home only] 7
N/A 89

2.1.7 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.8 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
shopping n
1 [Yes] 110

2.1.9 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

  1. Grocery store
  2. Market
  3. Bakery
  4. Specialty store
  5. Convenience store
  6. Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 244
6 [Public/farmer’s market] 34
7 [Bakery] 21
8 [Specialty food store] 28
9 [Convenience store/Dépanneur] 5
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.22 2 6
6 [Public/farmer’s market] 0 0.31 0 2
7 [Bakery] 0 0.19 0 3
8 [Specialty food store] 0 0.25 0 5
9 [Convenience store/Dépanneur] 0 0.05 0 2
10 [Liquor store/SAQ] 0 0.00 0 0

2.1.10 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
services n
1 [Yes] 80
2 [No] 30

2.1.11 Please list the places where you received services in the past month

The following location types are grouped under this question:

  1. Bank
  2. Hair salon/barbershop
  3. Post office
  4. Drugstore
  5. Doctor/healthcare provider
  6. Other service location
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
service locations by categories
location_category n
11 [Bank] 26
12 [Hair salon/barbershop] 21
13 [Post office] 11
14 [Drugstore] 31
15 [Doctor/healthcare provider] 55
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of service locations by participant and category
location_category min mean median max
11 [Bank] 0 0.24 0 2
12 [Hair salon/barbershop] 0 0.19 0 2
13 [Post office] 0 0.10 0 2
14 [Drugstore] 0 0.28 0 4
15 [Doctor/healthcare provider] 0 0.50 0 3

2.1.12 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
visiting_new0_w3 n
1 [Yes] 84
2 [No] 26

2.1.13 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

2.1.14 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
other_w3 n
1 [Yes] 60
2 [No] 50

2.1.15 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.16 Now, we will ask you questions about the people around you.

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 4 5 7 22
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

2.1.16.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people in household per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 1.1 2 5
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people in household among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 0 20 38.5 100 100 2

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 3.3 4.8 16
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 50 84.5 71 100 100 2

2.1.16.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 3.4 5 17
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 50 86.6 73.8 100 100 2

2.2 Derived metrics

2.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car On foot By bike By public transit Other
3 [Work] 81 32 25 24 45 1
4 [School/College/University] 28 5 6 5 20 5
5 [Supermarket] 244 113 111 34 21 1
6 [Public/farmer’s market] 34 14 14 10 3 0
7 [Bakery] 21 3 16 6 5 0
8 [Specialty food store] 28 8 13 8 5 0
9 [Convenience store/Dépanneur] 5 0 3 0 2 0
11 [Bank] 26 4 11 4 7 1
12 [Hair salon/barbershop] 21 4 7 7 4 0
13 [Post office] 11 0 10 1 1 0
14 [Drugstore] 31 10 21 3 7 0
15 [Doctor/healthcare provider] 55 16 17 9 20 1
17 [Leisure-time physical activity] 37 15 14 9 4 1
18 [Park] 76 11 42 29 5 1
19 [Cultural activity] 62 17 14 9 27 3
20 [Volunteering place] 4 0 2 0 0 3
22 [Restaurant, café, bar, etc.] 112 30 49 11 32 2
23 [Take-out] 9 2 4 2 1 0
24 [Walk] 21 7 10 2 2 3
25 [Other place] 228 102 64 33 49 19
26 [Social contact residence] 180 86 50 32 46 3
# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places with someone
description N Visited with someone Visited with someone (%)
3 [Work] 81 29 35.8
4 [School/College/University] 28 9 32.1
5 [Supermarket] 244 128 52.5
6 [Public/farmer’s market] 34 22 64.7
7 [Bakery] 21 3 14.3
8 [Specialty food store] 28 10 35.7
9 [Convenience store/Dépanneur] 5 4 80.0
11 [Bank] 26 5 19.2
12 [Hair salon/barbershop] 21 3 14.3
13 [Post office] 11 0 0.0
14 [Drugstore] 31 7 22.6
15 [Doctor/healthcare provider] 55 10 18.2
17 [Leisure-time physical activity] 37 21 56.8
18 [Park] 76 64 84.2
19 [Cultural activity] 62 47 75.8
20 [Volunteering place] 4 0 0.0
22 [Restaurant, café, bar, etc.] 112 95 84.8
23 [Take-out] 9 5 55.6
24 [Walk] 21 16 76.2
25 [Other place] 228 118 51.8
26 [Social contact residence] 180 141 78.3
# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

2.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
3 [Work] 81 1 31 10.2 7.0 8.6
4 [School/College/University] 28 1 40 5.8 3.0 8.4
5 [Supermarket] 244 1 30 3.7 3.0 3.9
6 [Public/farmer’s market] 34 1 20 3.9 3.0 3.8
7 [Bakery] 21 1 10 3.1 2.0 2.8
8 [Specialty food store] 28 1 20 2.9 2.0 3.6
9 [Convenience store/Dépanneur] 5 1 5 2.8 3.0 1.5
11 [Bank] 26 1 5 1.7 1.0 1.1
12 [Hair salon/barbershop] 21 1 2 1.1 1.0 0.3
13 [Post office] 11 1 10 2.2 1.0 2.6
14 [Drugstore] 31 1 4 2.0 2.0 0.9
15 [Doctor/healthcare provider] 55 1 30 1.9 1.0 3.9
17 [Leisure-time physical activity] 37 1 20 6.4 4.0 5.8
18 [Park] 76 1 20 4.6 3.0 4.6
19 [Cultural activity] 62 1 10 1.6 1.0 1.5
20 [Volunteering place] 4 2 4 3.2 3.5 1.0
22 [Restaurant, café, bar, etc.] 112 1 10 1.5 1.0 1.2
23 [Take-out] 9 1 4 1.8 1.0 1.3
24 [Walk] 21 1 25 3.8 2.0 5.3
25 [Other place] 228 1 25 2.6 1.0 3.6
26 [Social contact residence] 180 1 30 2.8 2.0 3.8
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'new'

2.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'new'

2.2.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 4 5.009091 7 22

2.2.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
2 3 4.5 5.815217 7.25 22 18

2.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 3.4 5 17

2.2.5.4 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 3.272727 4.75 16

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

# extract and recode
.home <- veritas_main[c("interact_id", "home_geom_confirmation")] %>% dplyr::rename(home_geom_confirmation_code = home_geom_confirmation)
.home$home_geom_confirmation <- factor(ifelse(.home$home_geom_confirmation_code == 1, "1 [Yes]",
  ifelse(.home$home_geom_confirmation_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .home) +
  geom_histogram(aes(x = home_geom_confirmation), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "home_geom_confirmation")

.home_cnt <- .home %>%
  group_by(home_geom_confirmation) %>%
  dplyr::count() %>%
  arrange(home_geom_confirmation)
kable(.home_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
home_geom_confirmation n
1 [Yes] 234
2 [No] 74

3.1.2 Place your home on map

home_location <- locations[locations$location_category == 1, ] %>%
  inner_join(.home, by = "interact_id")

## version ggmap
mtl_aoi <- st_bbox(home_location)
names(mtl_aoi) <- c("left", "bottom", "right", "top")
mtl_aoi[["left"]] <- mtl_aoi[["left"]] - .07
mtl_aoi[["right"]] <- mtl_aoi[["right"]] + .07
mtl_aoi[["top"]] <- mtl_aoi[["top"]] + .01
mtl_aoi[["bottom"]] <- mtl_aoi[["bottom"]] - .01

bm <- get_stadiamap(mtl_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, aes(color = home_geom_confirmation), size = 1.8, alpha = .5) + # see https://github.com/r-spatial/sf/issues/336
  theme(legend.position = "bottom")

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Montréal 242
Longueuil 21
Laval 17
Brossard 9
Saint-Lambert 6
Beaconsfield 2
Dollard-Des Ormeaux 2
Kirkland 2
Candiac 1
Dorval 1
Mont-Royal 1
Montréal-Ouest 1
Pointe-Claire 1
Rosemère 1
Westmount 1

3.1.3 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 211
2 [No] 97

3.1.4 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I work…
working n
1 [From home only] 36
2 [Both from home and away from home] 124
3 [Away from home only] 51
N/A 97

3.1.5 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.6 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 25
2 [No] 283

3.1.7 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I study…
studying n
1 [From home only] 9
2 [Both from home and away from home] 10
3 [Away from home only] 6
N/A 283

3.1.8 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.9 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
shopping n
1 [Yes] 300
2 [No] 8

3.1.10 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

  1. Grocery store
  2. Market
  3. Bakery
  4. Specialty store
  5. Convenience store
  6. Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 783
6 [Public/farmer’s market] 126
7 [Bakery] 174
8 [Specialty food store] 235
9 [Convenience store/Dépanneur] 65
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.54 2 10
6 [Public/farmer’s market] 0 0.41 0 3
7 [Bakery] 0 0.56 0 5
8 [Specialty food store] 0 0.76 0 7
9 [Convenience store/Dépanneur] 0 0.21 0 3
10 [Liquor store/SAQ] 0 0.00 0 0

3.1.11 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
services n
1 [Yes] 233
2 [No] 75

3.1.12 Please list the places where you received services in the past month

The following location types are grouped under this question:

  1. Bank
  2. Hair salon/barbershop
  3. Post office
  4. Drugstore
  5. Doctor/healthcare provider
  6. Other service location
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
service locations by categories
location_category n
11 [Bank] 92
12 [Hair salon/barbershop] 90
13 [Post office] 79
14 [Drugstore] 201
15 [Doctor/healthcare provider] 179
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of service locations by participant and category
location_category min mean median max
11 [Bank] 0 0.30 0 2
12 [Hair salon/barbershop] 0 0.29 0 1
13 [Post office] 0 0.26 0 2
14 [Drugstore] 0 0.65 1 3
15 [Doctor/healthcare provider] 0 0.58 0 8

3.1.13 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
visiting_new0_w3 n
1 [Yes] 228
2 [No] 80

3.1.14 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

3.1.15 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
other_w3 n
1 [Yes] 171
2 [No] 137

3.1.16 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.17 Now, we will ask you questions about the people around you

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 3 5 6.8 9 73
# histogram
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

3.1.17.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people in household per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 1.3 2 10
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people in household among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 16.7 31.7 50 100

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 3 3.6 5 22
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.5 33.3 63.6 64.1 100 100

3.1.17.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 2 3 4.6 6 34
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.5 56.7 85 76.5 100 100

3.2 Derived metrics

3.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car On foot By bike By public transit Other
3 [Work] 224 86 46 85 104 8
4 [School/College/University] 22 3 5 4 16 0
5 [Supermarket] 783 367 379 97 37 15
6 [Public/farmer’s market] 126 45 61 35 10 0
7 [Bakery] 174 36 115 29 12 0
8 [Specialty food store] 235 68 137 38 17 3
9 [Convenience store/Dépanneur] 65 15 54 0 0 0
11 [Bank] 92 31 51 14 5 4
12 [Hair salon/barbershop] 90 35 35 9 15 1
13 [Post office] 79 23 50 7 2 5
14 [Drugstore] 201 57 145 25 6 3
15 [Doctor/healthcare provider] 179 68 52 39 30 9
17 [Leisure-time physical activity] 157 62 50 46 19 6
18 [Park] 278 45 200 62 11 6
19 [Cultural activity] 190 63 52 44 62 4
20 [Volunteering place] 27 11 12 3 3 2
21 [Religious/spiritual activity] 9 5 4 0 1 0
22 [Restaurant, café, bar, etc.] 356 114 176 48 56 7
23 [Take-out] 87 38 41 12 2 4
24 [Walk] 178 30 136 24 16 3
25 [Other place] 829 375 335 97 104 15
26 [Social contact residence] 480 319 80 93 50 5
# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

3.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places with someone
description N Visited with someone Visited with someone (%)
3 [Work] 224 95 42.4
4 [School/College/University] 22 3 13.6
5 [Supermarket] 783 463 59.1
6 [Public/farmer’s market] 126 68 54.0
7 [Bakery] 174 105 60.3
8 [Specialty food store] 235 130 55.3
9 [Convenience store/Dépanneur] 65 49 75.4
11 [Bank] 92 68 73.9
12 [Hair salon/barbershop] 90 53 58.9
13 [Post office] 79 63 79.7
14 [Drugstore] 201 140 69.7
15 [Doctor/healthcare provider] 179 77 43.0
17 [Leisure-time physical activity] 157 104 66.2
18 [Park] 278 153 55.0
19 [Cultural activity] 190 125 65.8
20 [Volunteering place] 27 15 55.6
21 [Religious/spiritual activity] 9 6 66.7
22 [Restaurant, café, bar, etc.] 356 269 75.6
23 [Take-out] 87 52 59.8
24 [Walk] 178 104 58.4
25 [Other place] 829 497 60.0
26 [Social contact residence] 480 410 85.4
# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

3.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
3 [Work] 224 1 32 9.2 8.0 7.3
4 [School/College/University] 22 1 30 5.8 2.5 7.6
5 [Supermarket] 783 -35 68 3.2 2.0 4.7
6 [Public/farmer’s market] 126 0 30 3.5 2.5 4.2
7 [Bakery] 174 -38 20 2.1 2.0 4.9
8 [Specialty food store] 235 -57 25 1.7 2.0 5.9
9 [Convenience store/Dépanneur] 65 -27 710 13.3 2.0 87.9
11 [Bank] 92 -2 5 1.6 1.0 1.1
12 [Hair salon/barbershop] 90 -26 3 0.6 1.0 2.9
13 [Post office] 79 -35 4 0.9 1.0 4.3
14 [Drugstore] 201 -2 25 2.4 2.0 2.4
15 [Doctor/healthcare provider] 179 0 7 1.4 1.0 1.1
17 [Leisure-time physical activity] 157 -3 34 4.8 3.0 5.6
18 [Park] 278 -30 115 4.6 2.0 9.2
19 [Cultural activity] 190 -42 10 1.2 1.0 3.4
20 [Volunteering place] 27 0 30 4.1 2.0 7.4
21 [Religious/spiritual activity] 9 -1 25 5.2 4.0 7.6
22 [Restaurant, café, bar, etc.] 356 -27 25 1.1 1.0 3.1
23 [Take-out] 87 -43 25 1.6 1.0 6.0
24 [Walk] 178 -55 31 3.6 2.0 9.6
25 [Other place] 829 -2 30 3.1 2.0 4.4
26 [Social contact residence] 480 1 30 2.2 1.0 3.0
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'return'

3.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'return'

3.2.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 3 5 6.844156 9 73

3.2.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
2 3 6 7.642066 9 73 37

3.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 2 3 4.571429 6 34

3.2.5.4 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 3 3.590909 5 22