1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

  • Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
  • Social contacts: people and/or groups frequented by participants
  • Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities
VERITAS entities

New participants and returning participants are presented separately below, as they were presented two slightly different question flows.

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

home_location <- locations[locations$location_category == 1, ]

# Remove particpants too far from Saskatoon (probable fake participants)
home_location <- home_location %>%
  filter(!interact_id %in% c("304176452", "304716030", "304831562", "304736025", "304434437", "304782756", "304313412"))

## version ggmap
skt_aoi <- st_bbox(home_location)
names(skt_aoi) <- c("left", "bottom", "right", "top")
skt_aoi[["left"]] <- skt_aoi[["left"]] - .07
skt_aoi[["right"]] <- skt_aoi[["right"]] + .07
skt_aoi[["top"]] <- skt_aoi[["top"]] + .01
skt_aoi[["bottom"]] <- skt_aoi[["bottom"]] - .01

bm <- get_stadiamap(skt_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Saskatoon 139
Corman Park 3

2.1.2 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 117
2 [No] 32

2.1.3 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I work…
working n
1 [From home only] 21
2 [Both from home and away from home] 48
3 [Away from home only] 48
N/A 32

2.1.4 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.5 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 59
2 [No] 90

2.1.6 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I study…
studying n
1 [From home only] 14
2 [Both from home and away from home] 34
3 [Away from home only] 11
N/A 90

2.1.7 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.8 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
shopping n
1 [Yes] 141
2 [No] 8

2.1.9 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

  1. Grocery store
  2. Market
  3. Bakery
  4. Specialty store
  5. Convenience store
  6. Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 154
6 [Public/farmer’s market] 39
7 [Bakery] 8
8 [Specialty food store] 11
9 [Convenience store/Dépanneur] 13
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 1.03 1 4
6 [Public/farmer’s market] 0 0.26 0 3
7 [Bakery] 0 0.05 0 2
8 [Specialty food store] 0 0.07 0 4
9 [Convenience store/Dépanneur] 0 0.09 0 1
10 [Liquor store/SAQ] 0 0.00 0 0

2.1.10 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
services n
1 [Yes] 101
2 [No] 48

2.1.11 Please list the places where you received services in the past month

The following location types are grouped under this question:

  1. Bank
  2. Hair salon/barbershop
  3. Post office
  4. Drugstore
  5. Doctor/healthcare provider
  6. Other service location
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
service locations by categories
location_category n
11 [Bank] 41
12 [Hair salon/barbershop] 21
13 [Post office] 8
14 [Drugstore] 41
15 [Doctor/healthcare provider] 28
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of service locations by participant and category
location_category min mean median max
11 [Bank] 0 0.28 0 2
12 [Hair salon/barbershop] 0 0.14 0 1
13 [Post office] 0 0.05 0 1
14 [Drugstore] 0 0.28 0 1
15 [Doctor/healthcare provider] 0 0.19 0 2

2.1.12 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
visiting_new0_w3 n
1 [Yes] 60
2 [No] 89

2.1.13 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

2.1.14 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
other_w3 n
1 [Yes] 52
2 [No] 97

2.1.15 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.16 Now, we will ask you questions about the people around you.

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 2 3 3 19
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

2.1.16.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people in household per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 1.6 2 9
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people in household among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 20 100 65.9 100 100

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 1.9 2 9
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 50 75 69.2 100 100

2.1.16.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 2 2 10
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 50 100 76.2 100 100

2.2 Derived metrics

2.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car On foot By bike By public transit Other
3 [Work] 106 60 19 18 67 3
4 [School/College/University] 63 29 23 3 38 0
5 [Supermarket] 154 100 32 11 50 1
6 [Public/farmer’s market] 39 21 18 13 18 0
7 [Bakery] 8 3 4 2 1 0
8 [Specialty food store] 11 5 4 0 2 0
9 [Convenience store/Dépanneur] 13 7 2 2 8 0
11 [Bank] 41 21 8 5 13 0
12 [Hair salon/barbershop] 21 11 6 5 8 0
13 [Post office] 8 5 4 3 4 0
14 [Drugstore] 41 20 18 7 6 0
15 [Doctor/healthcare provider] 28 21 2 1 7 0
17 [Leisure-time physical activity] 12 7 4 1 3 0
18 [Park] 27 10 14 8 2 0
19 [Cultural activity] 11 5 5 0 1 0
20 [Volunteering place] 2 2 0 0 0 0
21 [Religious/spiritual activity] 2 1 1 0 0 0
22 [Restaurant, café, bar, etc.] 64 40 31 2 8 0
23 [Take-out] 7 5 3 0 0 1
24 [Walk] 14 7 7 1 0 0
25 [Other place] 145 90 27 13 47 0
26 [Social contact residence] 87 65 16 14 14 2
# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places with someone
description N Visited with someone Visited with someone (%)
3 [Work] 106 56 52.8
4 [School/College/University] 63 29 46.0
5 [Supermarket] 154 101 65.6
6 [Public/farmer’s market] 39 29 74.4
7 [Bakery] 8 1 12.5
8 [Specialty food store] 11 5 45.5
9 [Convenience store/Dépanneur] 13 10 76.9
11 [Bank] 41 15 36.6
12 [Hair salon/barbershop] 21 6 28.6
13 [Post office] 8 3 37.5
14 [Drugstore] 41 18 43.9
15 [Doctor/healthcare provider] 28 8 28.6
17 [Leisure-time physical activity] 12 9 75.0
18 [Park] 27 24 88.9
19 [Cultural activity] 11 10 90.9
20 [Volunteering place] 2 0 0.0
21 [Religious/spiritual activity] 2 1 50.0
22 [Restaurant, café, bar, etc.] 64 54 84.4
23 [Take-out] 7 6 85.7
24 [Walk] 14 9 64.3
25 [Other place] 145 89 61.4
26 [Social contact residence] 87 66 75.9
# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

2.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
3 [Work] 106 1 30 16.7 20.0 8.5
4 [School/College/University] 63 1 40 15.4 15.0 10.0
5 [Supermarket] 154 1 25 3.9 3.0 3.7
6 [Public/farmer’s market] 39 1 20 4.4 4.0 3.0
7 [Bakery] 8 1 20 4.8 3.0 6.2
8 [Specialty food store] 11 1 3 2.1 2.0 0.7
9 [Convenience store/Dépanneur] 13 1 25 8.5 4.0 7.9
11 [Bank] 41 1 35 2.7 2.0 5.3
12 [Hair salon/barbershop] 21 1 3 1.4 1.0 0.6
13 [Post office] 8 1 2 1.4 1.0 0.5
14 [Drugstore] 41 1 5 2.1 2.0 1.0
15 [Doctor/healthcare provider] 28 1 3 1.5 1.0 0.7
17 [Leisure-time physical activity] 12 1 27 6.0 3.5 7.7
18 [Park] 27 1 30 5.3 4.0 5.7
19 [Cultural activity] 11 1 2 1.4 1.0 0.5
20 [Volunteering place] 2 2 3 2.5 2.5 0.7
21 [Religious/spiritual activity] 2 3 5 4.0 4.0 1.4
22 [Restaurant, café, bar, etc.] 64 1 6 1.9 1.0 1.3
23 [Take-out] 7 1 3 1.6 1.0 0.8
24 [Walk] 14 1 25 7.7 4.0 7.8
25 [Other place] 145 1 35 3.1 2.0 4.2
26 [Social contact residence] 87 1 40 3.9 2.0 5.8
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Saskatoon' AND wave_id = 3 AND status = 'new'

2.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Saskatoon' AND wave_id = 3 AND status = 'new'

2.2.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 2 2.966443 3 19

2.2.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
2 2 3 3.84466 4.5 19 46

2.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 2.040268 2 10

2.2.5.4 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 1.885906 2 9

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

# extract and recode
.home <- veritas_main[c("interact_id", "home_geom_confirmation")] %>% dplyr::rename(home_geom_confirmation_code = home_geom_confirmation)
.home$home_geom_confirmation <- factor(ifelse(.home$home_geom_confirmation_code == 1, "1 [Yes]",
  ifelse(.home$home_geom_confirmation_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .home) +
  geom_histogram(aes(x = home_geom_confirmation), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "home_geom_confirmation")

.home_cnt <- .home %>%
  group_by(home_geom_confirmation) %>%
  dplyr::count() %>%
  arrange(home_geom_confirmation)
kable(.home_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
home_geom_confirmation n
NA 134

NB this question has been skipped, due to a bug in the questionnaire branching

3.1.2 Place your home on map

home_location <- locations[locations$location_category == 1, ]

## version ggmap
skt_aoi <- st_bbox(home_location)
names(skt_aoi) <- c("left", "bottom", "right", "top")
skt_aoi[["left"]] <- skt_aoi[["left"]] - .07
skt_aoi[["right"]] <- skt_aoi[["right"]] + .07
skt_aoi[["top"]] <- skt_aoi[["top"]] + .01
skt_aoi[["bottom"]] <- skt_aoi[["bottom"]] - .01

bm <- get_stadiamap(skt_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3) + # see https://github.com/r-spatial/sf/issues/336
  theme(legend.position = "bottom")

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Saskatoon 133
Corman Park 1

3.1.3 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 101
2 [No] 33

3.1.4 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I work…
working n
1 [From home only] 15
2 [Both from home and away from home] 36
3 [Away from home only] 50
N/A 33

3.1.5 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.6 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 28
2 [No] 106

3.1.7 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
I study…
studying n
1 [From home only] 7
2 [Both from home and away from home] 16
3 [Away from home only] 5
N/A 106

3.1.8 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.9 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
shopping n
1 [Yes] 130
2 [No] 4

3.1.10 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

  1. Grocery store
  2. Market
  3. Bakery
  4. Specialty store
  5. Convenience store
  6. Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 292
6 [Public/farmer’s market] 12
7 [Bakery] 10
8 [Specialty food store] 24
9 [Convenience store/Dépanneur] 6
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.18 2 5
6 [Public/farmer’s market] 0 0.09 0 2
7 [Bakery] 0 0.07 0 2
8 [Specialty food store] 0 0.18 0 2
9 [Convenience store/Dépanneur] 0 0.04 0 1
10 [Liquor store/SAQ] 0 0.00 0 0

3.1.11 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
services n
1 [Yes] 93
2 [No] 41

3.1.12 Please list the places where you received services in the past month

The following location types are grouped under this question:

  1. Bank
  2. Hair salon/barbershop
  3. Post office
  4. Drugstore
  5. Doctor/healthcare provider
  6. Other service location
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
service locations by categories
location_category n
11 [Bank] 30
12 [Hair salon/barbershop] 36
13 [Post office] 17
14 [Drugstore] 46
15 [Doctor/healthcare provider] 63
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of service locations by participant and category
location_category min mean median max
11 [Bank] 0 0.22 0 1
12 [Hair salon/barbershop] 0 0.27 0 2
13 [Post office] 0 0.13 0 2
14 [Drugstore] 0 0.34 0 1
15 [Doctor/healthcare provider] 0 0.47 0 4

3.1.13 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
visiting_new0_w3 n
1 [Yes] 91
2 [No] 43

3.1.14 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

3.1.15 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
other_w3 n
1 [Yes] 74
2 [No] 60

3.1.16 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.17 Now, we will ask you questions about the people around you

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 2 3 4.2 5 27
# histogram
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

3.1.17.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people in household per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 1.5 2 6
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people in household among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 14.9 50 55.3 100 100

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 1 2.4 4 16
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
8.3 50 69 69.8 100 100

3.1.17.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 2 2.9 4 18
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
8.3 50 100 76.8 100 100

3.2 Derived metrics

3.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car On foot By bike By public transit Other
3 [Work] 105 68 20 27 53 2
4 [School/College/University] 22 14 9 5 13 2
5 [Supermarket] 292 240 50 25 32 1
6 [Public/farmer’s market] 12 5 4 3 1 0
7 [Bakery] 10 7 4 3 0 0
8 [Specialty food store] 24 15 9 1 3 0
9 [Convenience store/Dépanneur] 6 2 3 1 1 0
11 [Bank] 30 22 10 1 1 0
12 [Hair salon/barbershop] 36 24 6 3 5 0
13 [Post office] 17 11 6 4 2 0
14 [Drugstore] 46 26 21 4 6 1
15 [Doctor/healthcare provider] 63 50 9 2 9 0
17 [Leisure-time physical activity] 24 21 5 2 0 0
18 [Park] 37 13 24 5 0 0
19 [Cultural activity] 24 14 11 2 1 1
20 [Volunteering place] 2 1 1 0 0 0
21 [Religious/spiritual activity] 4 3 0 1 0 0
22 [Restaurant, café, bar, etc.] 130 80 41 6 13 2
23 [Take-out] 10 7 3 1 0 0
24 [Walk] 16 6 8 4 0 0
25 [Other place] 260 182 64 13 37 1
26 [Social contact residence] 168 123 34 21 14 2
# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

3.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places with someone
description N Visited with someone Visited with someone (%)
3 [Work] 105 45 42.9
4 [School/College/University] 22 11 50.0
5 [Supermarket] 292 169 57.9
6 [Public/farmer’s market] 12 8 66.7
7 [Bakery] 10 7 70.0
8 [Specialty food store] 24 11 45.8
9 [Convenience store/Dépanneur] 6 3 50.0
11 [Bank] 30 9 30.0
12 [Hair salon/barbershop] 36 5 13.9
13 [Post office] 17 3 17.6
14 [Drugstore] 46 17 37.0
15 [Doctor/healthcare provider] 63 11 17.5
17 [Leisure-time physical activity] 24 16 66.7
18 [Park] 37 32 86.5
19 [Cultural activity] 24 24 100.0
20 [Volunteering place] 2 0 0.0
21 [Religious/spiritual activity] 4 3 75.0
22 [Restaurant, café, bar, etc.] 130 102 78.5
23 [Take-out] 10 5 50.0
24 [Walk] 16 12 75.0
25 [Other place] 260 163 62.7
26 [Social contact residence] 168 148 88.1
# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

3.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
3 [Work] 105 0 10000000 95251 15 9.758988e+05
4 [School/College/University] 22 0 100000000000000000 4545454545454566 22 2.132007e+16
5 [Supermarket] 292 1 155 3 3 9.300000e+00
6 [Public/farmer’s market] 12 1 10 3 2 2.500000e+00
7 [Bakery] 10 1 5 2 2 1.400000e+00
8 [Specialty food store] 24 1 6 2 2 1.300000e+00
9 [Convenience store/Dépanneur] 6 3 8 4 5 1.900000e+00
11 [Bank] 30 1 4 1 1 8.000000e-01
12 [Hair salon/barbershop] 36 1 2 1 1 2.000000e-01
13 [Post office] 17 1 4 1 1 9.000000e-01
14 [Drugstore] 46 1 10 2 2 2.000000e+00
15 [Doctor/healthcare provider] 63 1 3 1 1 6.000000e-01
17 [Leisure-time physical activity] 24 1 15 3 3 3.800000e+00
18 [Park] 37 1 30 4 3 5.700000e+00
19 [Cultural activity] 24 1 5 1 1 1.100000e+00
20 [Volunteering place] 2 1 6 3 6 3.500000e+00
21 [Religious/spiritual activity] 4 3 25 9 3 1.040000e+01
22 [Restaurant, café, bar, etc.] 130 1 15 1 1 1.600000e+00
23 [Take-out] 10 1 5 1 2 1.200000e+00
24 [Walk] 16 1 15 4 3 3.700000e+00
25 [Other place] 260 1 60 3 2 5.900000e+00
26 [Social contact residence] 168 1 30 3 2 3.900000e+00
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Saskatoon' AND wave_id = 3 AND status = 'return'

3.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Saskatoon' AND wave_id = 3 AND status = 'return'

3.2.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 2 3 4.238806 5 27

3.2.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
2 2 4 5.213592 6.5 27 31

3.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 2 2.858209 4 18

3.2.5.4 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 1 2.38806 4 16