1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

  • Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
  • Social contacts: people and/or groups frequented by participants
  • Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities
VERITAS entities

New participants and returning participants are presented separately below, as they were presented two slightly different question flows.

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

home_location <- locations[locations$location_category == 1, ]

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .07
van_aoi[["right"]] <- van_aoi[["right"]] + .07
van_aoi[["top"]] <- van_aoi[["top"]] + .01
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .01

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Vancouver 48
Surrey 1

2.1.2 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 40
2 [No] 9

2.1.3 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I work…
working n
1 [From home only] 2
2 [Both from home and away from home] 26
3 [Away from home only] 12
N/A 9

2.1.4 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.5 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 12
2 [No] 37

2.1.6 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I study…
studying n
1 [From home only] 4
2 [Both from home and away from home] 6
3 [Away from home only] 2
N/A 37

2.1.7 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.8 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
shopping n
1 [Yes] 49

2.1.9 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

  1. Grocery store
  2. Market
  3. Bakery
  4. Specialty store
  5. Convenience store
  6. Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 112
6 [Public/farmer’s market] 17
7 [Bakery] 8
8 [Specialty food store] 6
9 [Convenience store/Dépanneur] 6
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 1 2.29 2 6
6 [Public/farmer’s market] 0 0.35 0 2
7 [Bakery] 0 0.16 0 2
8 [Specialty food store] 0 0.12 0 2
9 [Convenience store/Dépanneur] 0 0.12 0 1
10 [Liquor store/SAQ] 0 0.00 0 0

2.1.10 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
services n
1 [Yes] 39
2 [No] 10

2.1.11 Please list the places where you received services in the past month

The following location types are grouped under this question:

  1. Bank
  2. Hair salon/barbershop
  3. Post office
  4. Drugstore
  5. Doctor/healthcare provider
  6. Other service location
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
service locations by categories
location_category n
11 [Bank] 11
12 [Hair salon/barbershop] 17
13 [Post office] 6
14 [Drugstore] 20
15 [Doctor/healthcare provider] 25
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of service locations by participant and category
location_category min mean median max
11 [Bank] 0 0.22 0 1
12 [Hair salon/barbershop] 0 0.35 0 1
13 [Post office] 0 0.12 0 1
14 [Drugstore] 0 0.41 0 2
15 [Doctor/healthcare provider] 0 0.51 0 3

2.1.12 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
visiting_new0_w3 n
1 [Yes] 35
2 [No] 14

2.1.13 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

2.1.14 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
other_w3 n
1 [Yes] 28
2 [No] 21

2.1.15 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.16 Now, we will ask you questions about the people around you.

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 3 4 6.6 10 24
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

2.1.16.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people in household per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 1.4 2 5
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people in household among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 0 20.7 35.9 54.2 100 1

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 4.6 6 24
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 53.4 84.5 74.3 100 100 1

2.1.16.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 4.3 6 24
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 50 70.7 68.1 100 100 1

2.2 Derived metrics

2.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car On foot By bike By public transit Other
3 [Work] 45 19 9 19 27 0
4 [School/College/University] 8 2 1 2 7 0
5 [Supermarket] 112 39 56 36 17 1
6 [Public/farmer’s market] 17 4 10 5 2 0
7 [Bakery] 8 0 8 2 1 0
8 [Specialty food store] 6 0 5 2 0 0
9 [Convenience store/Dépanneur] 6 1 6 2 0 0
11 [Bank] 11 1 9 3 2 0
12 [Hair salon/barbershop] 17 4 4 5 5 0
13 [Post office] 6 0 6 0 0 0
14 [Drugstore] 20 1 17 4 1 0
15 [Doctor/healthcare provider] 25 9 7 7 6 0
17 [Leisure-time physical activity] 27 15 8 9 3 0
18 [Park] 63 12 30 28 5 0
19 [Cultural activity] 16 2 5 4 7 0
20 [Volunteering place] 3 1 1 1 1 0
22 [Restaurant, café, bar, etc.] 93 15 51 23 16 0
23 [Take-out] 23 2 5 16 7 0
24 [Walk] 14 5 6 8 0 1
25 [Other place] 119 23 54 44 30 0
26 [Social contact residence] 73 37 24 20 14 1
# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places with someone
description N Visited with someone Visited with someone (%)
3 [Work] 45 14 31.1
4 [School/College/University] 8 2 25.0
5 [Supermarket] 112 52 46.4
6 [Public/farmer’s market] 17 11 64.7
7 [Bakery] 8 5 62.5
8 [Specialty food store] 6 6 100.0
9 [Convenience store/Dépanneur] 6 4 66.7
11 [Bank] 11 1 9.1
12 [Hair salon/barbershop] 17 0 0.0
13 [Post office] 6 2 33.3
14 [Drugstore] 20 9 45.0
15 [Doctor/healthcare provider] 25 7 28.0
17 [Leisure-time physical activity] 27 23 85.2
18 [Park] 63 50 79.4
19 [Cultural activity] 16 14 87.5
20 [Volunteering place] 3 1 33.3
22 [Restaurant, café, bar, etc.] 93 66 71.0
23 [Take-out] 23 11 47.8
24 [Walk] 14 8 57.1
25 [Other place] 119 61 51.3
26 [Social contact residence] 73 59 80.8
# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

2.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
3 [Work] 45 1 35 10.7 8.0 8.2
4 [School/College/University] 8 1 28 10.2 7.0 10.3
5 [Supermarket] 112 0 20 3.4 2.0 2.8
6 [Public/farmer’s market] 17 1 4 2.6 3.0 1.1
7 [Bakery] 8 1 10 3.0 2.0 2.9
8 [Specialty food store] 6 1 8 3.0 2.0 2.5
9 [Convenience store/Dépanneur] 6 2 9 4.2 3.5 2.6
11 [Bank] 11 1 3 1.8 2.0 0.6
12 [Hair salon/barbershop] 17 1 1 1.0 1.0 0.0
13 [Post office] 6 1 2 1.3 1.0 0.5
14 [Drugstore] 20 1 10 3.1 2.0 2.8
15 [Doctor/healthcare provider] 25 1 4 1.6 1.0 1.0
17 [Leisure-time physical activity] 27 1 12 3.4 2.0 2.7
18 [Park] 63 1 28 3.7 2.0 4.9
19 [Cultural activity] 16 1 4 1.6 1.0 1.0
20 [Volunteering place] 3 2 16 7.0 3.0 7.8
22 [Restaurant, café, bar, etc.] 93 1 8 1.7 1.0 1.5
23 [Take-out] 23 1 4 1.5 1.0 0.9
24 [Walk] 14 1 3 1.8 2.0 0.8
25 [Other place] 119 1 20 2.8 2.0 3.1
26 [Social contact residence] 73 1 30 3.8 2.0 5.8
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'new'

2.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'new'

2.2.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 3 4 6.571429 10 24

2.2.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
2 3 5 7.227273 10 24 5

2.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 4.346939 6 24

2.2.5.4 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 4.632653 6 24

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

# extract and recode
.home <- veritas_main[c("interact_id", "home_geom_confirmation")] %>% dplyr::rename(home_geom_confirmation_code = home_geom_confirmation)
.home$home_geom_confirmation <- factor(ifelse(.home$home_geom_confirmation_code == 1, "1 [Yes]",
  ifelse(.home$home_geom_confirmation_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .home) +
  geom_histogram(aes(x = home_geom_confirmation), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "home_geom_confirmation")

.home_cnt <- .home %>%
  group_by(home_geom_confirmation) %>%
  dplyr::count() %>%
  arrange(home_geom_confirmation)
kable(.home_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
home_geom_confirmation n
1 [Yes] 85
2 [No] 54

3.1.2 Place your home on map

home_location <- locations[locations$location_category == 1, ] %>%
  inner_join(.home, by = "interact_id")

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .07
van_aoi[["right"]] <- van_aoi[["right"]] + .07
van_aoi[["top"]] <- van_aoi[["top"]] + .01
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .01

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, aes(color = home_geom_confirmation), size = 1.8, alpha = .5) + # see https://github.com/r-spatial/sf/issues/336
  theme(legend.position = "bottom")

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Vancouver 138
Cowichan Valley 1

3.1.3 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 82
2 [No] 57

3.1.4 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I work…
working n
1 [From home only] 22
2 [Both from home and away from home] 38
3 [Away from home only] 22
N/A 57

3.1.5 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.6 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 6
2 [No] 133

3.1.7 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I study…
studying n
1 [From home only] 2
2 [Both from home and away from home] 4
N/A 133

3.1.8 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.9 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
shopping n
1 [Yes] 138
2 [No] 1

3.1.10 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

  1. Grocery store
  2. Market
  3. Bakery
  4. Specialty store
  5. Convenience store
  6. Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 396
6 [Public/farmer’s market] 62
7 [Bakery] 47
8 [Specialty food store] 84
9 [Convenience store/Dépanneur] 12
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.85 3 8
6 [Public/farmer’s market] 0 0.45 0 3
7 [Bakery] 0 0.34 0 3
8 [Specialty food store] 0 0.60 0 4
9 [Convenience store/Dépanneur] 0 0.09 0 2
10 [Liquor store/SAQ] 0 0.00 0 0

3.1.11 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
services n
1 [Yes] 122
2 [No] 17

3.1.12 Please list the places where you received services in the past month

The following location types are grouped under this question:

  1. Bank
  2. Hair salon/barbershop
  3. Post office
  4. Drugstore
  5. Doctor/healthcare provider
  6. Other service location
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
service locations by categories
location_category n
11 [Bank] 73
12 [Hair salon/barbershop] 51
13 [Post office] 40
14 [Drugstore] 103
15 [Doctor/healthcare provider] 91
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of service locations by participant and category
location_category min mean median max
11 [Bank] 0 0.53 0 3
12 [Hair salon/barbershop] 0 0.37 0 1
13 [Post office] 0 0.29 0 1
14 [Drugstore] 0 0.74 1 4
15 [Doctor/healthcare provider] 0 0.65 0 5

3.1.13 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
visiting_new0_w3 n
1 [Yes] 93
2 [No] 46

3.1.14 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

3.1.15 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
other_w3 n
1 [Yes] 85
2 [No] 54

3.1.16 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.17 Now, we will ask you questions about the people around you

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 3 6 7.2 10 32
# histogram
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

3.1.17.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people in household per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 1.1 2 6
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people in household among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 0 16.7 29.9 48.2 100 1

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 3.7 5 12
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 36.4 61.2 61.5 98.1 100 1

3.1.17.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 4 4.9 6.5 20
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 50 79.3 71.3 100 100 1

3.2 Derived metrics

3.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car On foot By bike By public transit Other
3 [Work] 94 50 12 27 28 11
4 [School/College/University] 4 1 0 1 2 0
5 [Supermarket] 396 189 200 44 25 3
6 [Public/farmer’s market] 62 32 26 7 1 0
7 [Bakery] 47 18 26 5 1 3
8 [Specialty food store] 84 32 50 7 4 0
9 [Convenience store/Dépanneur] 12 2 9 1 0 1
11 [Bank] 73 21 47 5 2 1
12 [Hair salon/barbershop] 51 22 19 5 7 0
13 [Post office] 40 9 31 2 0 1
14 [Drugstore] 103 31 74 8 5 0
15 [Doctor/healthcare provider] 91 37 28 8 21 7
17 [Leisure-time physical activity] 89 55 30 18 4 0
18 [Park] 132 42 75 28 6 2
19 [Cultural activity] 41 20 9 7 9 0
20 [Volunteering place] 14 8 6 1 1 0
21 [Religious/spiritual activity] 8 4 2 0 0 2
22 [Restaurant, café, bar, etc.] 141 56 64 13 13 4
23 [Take-out] 36 19 17 0 1 0
24 [Walk] 115 29 84 21 5 0
25 [Other place] 357 176 136 50 48 5
26 [Social contact residence] 165 107 41 31 7 1
# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

3.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places with someone
description N Visited with someone Visited with someone (%)
3 [Work] 94 35 37.2
4 [School/College/University] 4 2 50.0
5 [Supermarket] 396 200 50.5
6 [Public/farmer’s market] 62 29 46.8
7 [Bakery] 47 24 51.1
8 [Specialty food store] 84 50 59.5
9 [Convenience store/Dépanneur] 12 9 75.0
11 [Bank] 73 42 57.5
12 [Hair salon/barbershop] 51 19 37.3
13 [Post office] 40 28 70.0
14 [Drugstore] 103 61 59.2
15 [Doctor/healthcare provider] 91 40 44.0
17 [Leisure-time physical activity] 89 49 55.1
18 [Park] 132 66 50.0
19 [Cultural activity] 41 30 73.2
20 [Volunteering place] 14 4 28.6
21 [Religious/spiritual activity] 8 5 62.5
22 [Restaurant, café, bar, etc.] 141 90 63.8
23 [Take-out] 36 16 44.4
24 [Walk] 115 55 47.8
25 [Other place] 357 184 51.5
26 [Social contact residence] 165 142 86.1
# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

3.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
3 [Work] 94 0 30 9.1 5.0 8.0
4 [School/College/University] 4 1 12 4.2 2.0 5.3
5 [Supermarket] 396 0 30 3.5 2.0 3.6
6 [Public/farmer’s market] 62 0 6 2.2 2.0 1.2
7 [Bakery] 47 0 6 1.9 1.0 1.3
8 [Specialty food store] 84 0 8 2.0 1.0 1.6
9 [Convenience store/Dépanneur] 12 0 18 4.2 2.5 5.1
11 [Bank] 73 0 11 2.0 1.0 1.8
12 [Hair salon/barbershop] 51 0 4 1.1 1.0 0.6
13 [Post office] 40 0 15 2.5 1.0 3.3
14 [Drugstore] 103 1 10 2.8 2.0 1.9
15 [Doctor/healthcare provider] 91 0 6 1.4 1.0 0.9
17 [Leisure-time physical activity] 89 1 25 5.7 4.0 5.3
18 [Park] 132 0 30 4.6 2.0 6.1
19 [Cultural activity] 41 1 15 2.0 1.0 2.4
20 [Volunteering place] 14 0 30 5.5 3.0 7.6
21 [Religious/spiritual activity] 8 1 15 5.1 4.0 4.7
22 [Restaurant, café, bar, etc.] 141 0 16 1.9 1.0 2.3
23 [Take-out] 36 0 4 1.2 1.0 0.8
24 [Walk] 115 0 30 6.1 4.0 6.7
25 [Other place] 357 0 30 2.8 2.0 4.1
26 [Social contact residence] 165 1 30 2.8 2.0 3.7
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'return'

3.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'return'

3.2.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 3 6 7.208633 10 32

3.2.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
2 4 6 7.75 10 32 11

3.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 4 4.928058 6.5 20

3.2.5.4 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 3.748201 5 12