1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
Social contacts: people and/or groups frequented by participants
Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities

New participants and returning participants are presented separately below, as they were presented two slightly different question flows.

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

home_location <- locations[locations$location_category == 1, ]

## version ggmap
mtl_aoi <- st_bbox(home_location)
names(mtl_aoi) <- c("left", "bottom", "right", "top")
mtl_aoi[["left"]] <- mtl_aoi[["left"]] - .07
mtl_aoi[["right"]] <- mtl_aoi[["right"]] + .07
mtl_aoi[["top"]] <- mtl_aoi[["top"]] + .01
mtl_aoi[["bottom"]] <- mtl_aoi[["bottom"]] - .01

bm <- get_stadiamap(mtl_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of participants by municipalities
NAME	n
Montréal	99
Beaconsfield	2
Dollard-Des Ormeaux	2
Brossard	1
Côte-Saint-Luc	1
Kirkland	1
Laval	1
Longueuil	1
Montréal-Est	1
Pointe-Claire	1

2.1.2 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently working
working	n
1 [Yes]	80
2 [No]	30

2.1.3 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I work…
working	n
1 [From home only]	10
2 [Both from home and away from home]	45
3 [Away from home only]	25
N/A	30

2.1.4 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.5 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently studying
studying	n
1 [Yes]	21
2 [No]	89

2.1.6 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I study…
studying	n
1 [From home only]	5
2 [Both from home and away from home]	9
3 [Away from home only]	7
N/A	89

2.1.7 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.8 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

shopping	n
1 [Yes]	110

2.1.9 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

Grocery store
Market
Bakery
Specialty store
Convenience store
Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
5 [Supermarket]	244
6 [Public/farmer’s market]	34
7 [Bakery]	21
8 [Specialty food store]	28
9 [Convenience store/Dépanneur]	5

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of shopping locations by participant and category
location_category	mean	median	max
5 [Supermarket]	2.22	2	6
6 [Public/farmer’s market]	0.31	0	2
7 [Bakery]	0.19	0	3
8 [Specialty food store]	0.25	0	5
9 [Convenience store/Dépanneur]	0.05	0	2
10 [Liquor store/SAQ]	0.00	0	0

2.1.10 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

services	n
1 [Yes]	80
2 [No]	30

2.1.11 Please list the places where you received services in the past month

The following location types are grouped under this question:

Bank
Hair salon/barbershop
Post office
Drugstore
Doctor/healthcare provider
Other service location

serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

service locations by categories
location_category	n
11 [Bank]	26
12 [Hair salon/barbershop]	21
13 [Post office]	11
14 [Drugstore]	31
15 [Doctor/healthcare provider]	55

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of service locations by participant and category
location_category	mean	max
11 [Bank]	0.24	2
12 [Hair salon/barbershop]	0.19	2
13 [Post office]	0.10	2
14 [Drugstore]	0.28	4
15 [Doctor/healthcare provider]	0.50	3

2.1.12 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

visiting_new0_w3	n
1 [Yes]	84
2 [No]	26

2.1.13 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

2.1.14 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

other_w3	n
1 [Yes]	60
2 [No]	50

2.1.15 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.16 Now, we will ask you questions about the people around you.

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	4	5	7	22

ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

2.1.16.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people in household per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	1	1.1	2	5

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people in household among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0	20	38.5	100	100	2

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of important people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	2	3.3	4.8	16

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of important people among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	50	84.5	71	100	100	2

2.1.16.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people with whom to socialize per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	3.4	5	17

kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people with whom to socialize among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	50	86.6	73.8	100	100	2

2.2 Derived metrics

2.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Transportation mode preferences
description	N	By car	On foot	By bike	By public transit	Other
3 [Work]	81	32	25	24	45	1
4 [School/College/University]	28	5	6	5	20	5
5 [Supermarket]	244	113	111	34	21	1
6 [Public/farmer’s market]	34	14	14	10	3	0
7 [Bakery]	21	3	16	6	5	0
8 [Specialty food store]	28	8	13	8	5	0
9 [Convenience store/Dépanneur]	5	0	3	0	2	0
11 [Bank]	26	4	11	4	7	1
12 [Hair salon/barbershop]	21	4	7	7	4	0
13 [Post office]	11	0	10	1	1	0
14 [Drugstore]	31	10	21	3	7	0
15 [Doctor/healthcare provider]	55	16	17	9	20	1
17 [Leisure-time physical activity]	37	15	14	9	4	1
18 [Park]	76	11	42	29	5	1
19 [Cultural activity]	62	17	14	9	27	3
20 [Volunteering place]	4	0	2	0	0	3
22 [Restaurant, café, bar, etc.]	112	30	49	11	32	2
23 [Take-out]	9	2	4	2	1	0
24 [Walk]	21	7	10	2	2	3
25 [Other place]	228	102	64	33	49	19
26 [Social contact residence]	180	86	50	32	46	3

# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visiting places with someone
description	N	Visited with someone	Visited with someone (%)
3 [Work]	81	29	35.8
4 [School/College/University]	28	9	32.1
5 [Supermarket]	244	128	52.5
6 [Public/farmer’s market]	34	22	64.7
7 [Bakery]	21	3	14.3
8 [Specialty food store]	28	10	35.7
9 [Convenience store/Dépanneur]	5	4	80.0
11 [Bank]	26	5	19.2
12 [Hair salon/barbershop]	21	3	14.3
13 [Post office]	11	0	0.0
14 [Drugstore]	31	7	22.6
15 [Doctor/healthcare provider]	55	10	18.2
17 [Leisure-time physical activity]	37	21	56.8
18 [Park]	76	64	84.2
19 [Cultural activity]	62	47	75.8
20 [Volunteering place]	4	0	0.0
22 [Restaurant, café, bar, etc.]	112	95	84.8
23 [Take-out]	9	5	55.6
24 [Walk]	21	16	76.2
25 [Other place]	228	118	51.8
26 [Social contact residence]	180	141	78.3

# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

2.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visit frequency (expressed in times/year)
description	N	min	max	mean	median	sd
3 [Work]	81	1	31	10.2	7.0	8.6
4 [School/College/University]	28	1	40	5.8	3.0	8.4
5 [Supermarket]	244	1	30	3.7	3.0	3.9
6 [Public/farmer’s market]	34	1	20	3.9	3.0	3.8
7 [Bakery]	21	1	10	3.1	2.0	2.8
8 [Specialty food store]	28	1	20	2.9	2.0	3.6
9 [Convenience store/Dépanneur]	5	1	5	2.8	3.0	1.5
11 [Bank]	26	1	5	1.7	1.0	1.1
12 [Hair salon/barbershop]	21	1	2	1.1	1.0	0.3
13 [Post office]	11	1	10	2.2	1.0	2.6
14 [Drugstore]	31	1	4	2.0	2.0	0.9
15 [Doctor/healthcare provider]	55	1	30	1.9	1.0	3.9
17 [Leisure-time physical activity]	37	1	20	6.4	4.0	5.8
18 [Park]	76	1	20	4.6	3.0	4.6
19 [Cultural activity]	62	1	10	1.6	1.0	1.5
20 [Volunteering place]	4	2	4	3.2	3.5	1.0
22 [Restaurant, café, bar, etc.]	112	1	10	1.5	1.0	1.2
23 [Take-out]	9	1	4	1.8	1.0	1.3
24 [Walk]	21	1	25	3.8	2.0	5.3
25 [Other place]	228	1	25	2.6	1.0	3.6
26 [Social contact residence]	180	1	30	2.8	2.0	3.8

# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'new'

2.2.4.1 Indicators related to the lifestyle

Indicators	Measurement approach
Number of activity places (`n_acti_places`)	Count of activity places
Number of visits to places per week (`n_weekly_vst`)	Number of activity places per individual multiplied by the frequency of visit per week to each location, excluding the residence
Number of activity types (`n_acti_types`)	6 types of activities considered: 1-Residential; 2-Occupation; 3-Shopping activities; 4-Services; 5-Transportation; 6-Leisure activities (NB original categories were as follow: 1-Residential; 2-work; 3-food and other services; 4-transport station/stop; 5-recreational activity; 6-social activity)

LLindic <- ess.tab.camille %>%
  select("interact_id", "n_acti_places", "n_weekly_vst", "n_acti_types")
.llmtx <- as.matrix(summary(LLindic[c("n_acti_places", "n_weekly_vst", "n_acti_types")], digits = 1))
.llmtx <- apply(.llmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.llmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
kable(.llmtx, caption = "Lifestyle statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Lifestyle statistics
	n_acti_places	n_weekly_vst	n_acti_types
Min.	3	0.9	2
1st Qu.	7	5.3	4
Median	11	8.2	4
Mean	13	9.4	4
3rd Qu.	17	12.2	5
Max.	44	31.4	5

2.2.4.2 Indicators related to the geometry of the activity space

Indicators	Measurement approach
Perimeter of the convex hull (`cvx_perimeter`)	Perimeter of the smallest polygon containing all the activity locations of the participant (unit: km)
Surface of the convex hull (`cvx_surface`)	Surface of the smallest polygon containing all the activity locations of the participant (unit: km2)
Minimal road network distance from the residence to an activity place (`min_length`)	Minimal distance from the residence to an activity place using the road network (in meters)
Maximal road network distance from the residence to an activity place (`max_length`)	Maximal distance from the residence to an activity place using the road network (in meters)
Median road network distance from the residence to all activity places (`median_length`)	Median distance from home to all activity places using the road network (in meters)

ASindic <- ess.tab.camille %>%
  select("interact_id", "cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")
.asmtx <- as.matrix(summary(ASindic[c("cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")], digits = 1))
.asmtx <- apply(.asmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.asmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
.asmtx[is.na(.asmtx)] <- 0
kable(.asmtx, caption = "Activity space statistics", digits = 1) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Activity space statistics
	cvx_perimeter	cvx_surface	min_length	max_length	median_length
Min.	3	0	1e-01	2e+03	453
1st Qu.	22	15	3e+02	9e+03	1943
Median	41	56	6e+02	2e+04	3239
Mean	101	915	1e+03	4e+04	4589
3rd Qu.	100	254	1e+03	4e+04	5841
Max.	1238	38551	1e+04	2e+05	26434

2.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'new'

2.2.5.1 Number of people in the network (`people_degree`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

people_degree
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	4	5.009091	7	22

2.2.5.2 Simmelian Brokerage (`simmelian`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

simmelian
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
2	3	4.5	5.815217	7.25	22	18

2.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	3.4	5	17

2.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	2	3.272727	4.75	16

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

# extract and recode
.home <- veritas_main[c("interact_id", "home_geom_confirmation")] %>% dplyr::rename(home_geom_confirmation_code = home_geom_confirmation)
.home$home_geom_confirmation <- factor(ifelse(.home$home_geom_confirmation_code == 1, "1 [Yes]",
  ifelse(.home$home_geom_confirmation_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .home) +
  geom_histogram(aes(x = home_geom_confirmation), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "home_geom_confirmation")

.home_cnt <- .home %>%
  group_by(home_geom_confirmation) %>%
  dplyr::count() %>%
  arrange(home_geom_confirmation)
kable(.home_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

home_geom_confirmation	n
1 [Yes]	234
2 [No]	74

3.1.2 Place your home on map

home_location <- locations[locations$location_category == 1, ] %>%
  inner_join(.home, by = "interact_id")

## version ggmap
mtl_aoi <- st_bbox(home_location)
names(mtl_aoi) <- c("left", "bottom", "right", "top")
mtl_aoi[["left"]] <- mtl_aoi[["left"]] - .07
mtl_aoi[["right"]] <- mtl_aoi[["right"]] + .07
mtl_aoi[["top"]] <- mtl_aoi[["top"]] + .01
mtl_aoi[["bottom"]] <- mtl_aoi[["bottom"]] - .01

bm <- get_stadiamap(mtl_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, aes(color = home_geom_confirmation), size = 1.8, alpha = .5) + # see https://github.com/r-spatial/sf/issues/336
  theme(legend.position = "bottom")

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of participants by municipalities
NAME	n
Montréal	242
Longueuil	21
Laval	17
Brossard	9
Saint-Lambert	6
Beaconsfield	2
Dollard-Des Ormeaux	2
Kirkland	2
Candiac	1
Dorval	1
Mont-Royal	1
Montréal-Ouest	1
Pointe-Claire	1
Rosemère	1
Westmount	1

3.1.3 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently working
working	n
1 [Yes]	211
2 [No]	97

3.1.4 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I work…
working	n
1 [From home only]	36
2 [Both from home and away from home]	124
3 [Away from home only]	51
N/A	97

3.1.5 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.6 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently studying
studying	n
1 [Yes]	25
2 [No]	283

3.1.7 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I study…
studying	n
1 [From home only]	9
2 [Both from home and away from home]	10
3 [Away from home only]	6
N/A	283

3.1.8 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.9 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

shopping	n
1 [Yes]	300
2 [No]	8

3.1.10 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

Grocery store
Market
Bakery
Specialty store
Convenience store
Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
5 [Supermarket]	783
6 [Public/farmer’s market]	126
7 [Bakery]	174
8 [Specialty food store]	235
9 [Convenience store/Dépanneur]	65

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of shopping locations by participant and category
location_category	mean	median	max
5 [Supermarket]	2.54	2	10
6 [Public/farmer’s market]	0.41	0	3
7 [Bakery]	0.56	0	5
8 [Specialty food store]	0.76	0	7
9 [Convenience store/Dépanneur]	0.21	0	3
10 [Liquor store/SAQ]	0.00	0	0

3.1.11 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

services	n
1 [Yes]	233
2 [No]	75

3.1.12 Please list the places where you received services in the past month

The following location types are grouped under this question:

Bank
Hair salon/barbershop
Post office
Drugstore
Doctor/healthcare provider
Other service location

serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

service locations by categories
location_category	n
11 [Bank]	92
12 [Hair salon/barbershop]	90
13 [Post office]	79
14 [Drugstore]	201
15 [Doctor/healthcare provider]	179

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of service locations by participant and category
location_category	mean	median	max
11 [Bank]	0.30	0	2
12 [Hair salon/barbershop]	0.29	0	1
13 [Post office]	0.26	0	2
14 [Drugstore]	0.65	1	3
15 [Doctor/healthcare provider]	0.58	0	8

3.1.13 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

visiting_new0_w3	n
1 [Yes]	228
2 [No]	80

3.1.14 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

3.1.15 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

other_w3	n
1 [Yes]	171
2 [No]	137

3.1.16 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.17 Now, we will ask you questions about the people around you

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	3	5	6.8	9	73

# histogram
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

3.1.17.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people in household per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	1	1.3	2	10

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people in household among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	16.7	31.7	50	100

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of important people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	1	3	3.6	5	22

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of important people among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
4.5	33.3	63.6	64.1	100	100

3.1.17.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people with whom to socialize per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	2	3	4.6	6	34

kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people with whom to socialize among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
4.5	56.7	85	76.5	100	100

3.2 Derived metrics

3.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Transportation mode preferences
description	N	By car	On foot	By bike	By public transit	Other
3 [Work]	224	86	46	85	104	8
4 [School/College/University]	22	3	5	4	16	0
5 [Supermarket]	783	367	379	97	37	15
6 [Public/farmer’s market]	126	45	61	35	10	0
7 [Bakery]	174	36	115	29	12	0
8 [Specialty food store]	235	68	137	38	17	3
9 [Convenience store/Dépanneur]	65	15	54	0	0	0
11 [Bank]	92	31	51	14	5	4
12 [Hair salon/barbershop]	90	35	35	9	15	1
13 [Post office]	79	23	50	7	2	5
14 [Drugstore]	201	57	145	25	6	3
15 [Doctor/healthcare provider]	179	68	52	39	30	9
17 [Leisure-time physical activity]	157	62	50	46	19	6
18 [Park]	278	45	200	62	11	6
19 [Cultural activity]	190	63	52	44	62	4
20 [Volunteering place]	27	11	12	3	3	2
21 [Religious/spiritual activity]	9	5	4	0	1	0
22 [Restaurant, café, bar, etc.]	356	114	176	48	56	7
23 [Take-out]	87	38	41	12	2	4
24 [Walk]	178	30	136	24	16	3
25 [Other place]	829	375	335	97	104	15
26 [Social contact residence]	480	319	80	93	50	5

# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

3.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visiting places with someone
description	N	Visited with someone	Visited with someone (%)
3 [Work]	224	95	42.4
4 [School/College/University]	22	3	13.6
5 [Supermarket]	783	463	59.1
6 [Public/farmer’s market]	126	68	54.0
7 [Bakery]	174	105	60.3
8 [Specialty food store]	235	130	55.3
9 [Convenience store/Dépanneur]	65	49	75.4
11 [Bank]	92	68	73.9
12 [Hair salon/barbershop]	90	53	58.9
13 [Post office]	79	63	79.7
14 [Drugstore]	201	140	69.7
15 [Doctor/healthcare provider]	179	77	43.0
17 [Leisure-time physical activity]	157	104	66.2
18 [Park]	278	153	55.0
19 [Cultural activity]	190	125	65.8
20 [Volunteering place]	27	15	55.6
21 [Religious/spiritual activity]	9	6	66.7
22 [Restaurant, café, bar, etc.]	356	269	75.6
23 [Take-out]	87	52	59.8
24 [Walk]	178	104	58.4
25 [Other place]	829	497	60.0
26 [Social contact residence]	480	410	85.4

# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

3.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visit frequency (expressed in times/year)
description	N	min	max	mean	median	sd
3 [Work]	224	1	32	9.2	8.0	7.3
4 [School/College/University]	22	1	30	5.8	2.5	7.6
5 [Supermarket]	783	-35	68	3.2	2.0	4.7
6 [Public/farmer’s market]	126	0	30	3.5	2.5	4.2
7 [Bakery]	174	-38	20	2.1	2.0	4.9
8 [Specialty food store]	235	-57	25	1.7	2.0	5.9
9 [Convenience store/Dépanneur]	65	-27	710	13.3	2.0	87.9
11 [Bank]	92	-2	5	1.6	1.0	1.1
12 [Hair salon/barbershop]	90	-26	3	0.6	1.0	2.9
13 [Post office]	79	-35	4	0.9	1.0	4.3
14 [Drugstore]	201	-2	25	2.4	2.0	2.4
15 [Doctor/healthcare provider]	179	0	7	1.4	1.0	1.1
17 [Leisure-time physical activity]	157	-3	34	4.8	3.0	5.6
18 [Park]	278	-30	115	4.6	2.0	9.2
19 [Cultural activity]	190	-42	10	1.2	1.0	3.4
20 [Volunteering place]	27	0	30	4.1	2.0	7.4
21 [Religious/spiritual activity]	9	-1	25	5.2	4.0	7.6
22 [Restaurant, café, bar, etc.]	356	-27	25	1.1	1.0	3.1
23 [Take-out]	87	-43	25	1.6	1.0	6.0
24 [Walk]	178	-55	31	3.6	2.0	9.6
25 [Other place]	829	-2	30	3.1	2.0	4.4
26 [Social contact residence]	480	1	30	2.2	1.0	3.0

# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'return'

3.2.4.1 Indicators related to the lifestyle

Indicators	Measurement approach
Number of activity places (`n_acti_places`)	Count of activity places
Number of visits to places per week (`n_weekly_vst`)	Number of activity places per individual multiplied by the frequency of visit per week to each location, excluding the residence
Number of activity types (`n_acti_types`)	6 types of activities considered: 1-Residential; 2-Occupation; 3-Shopping activities; 4-Services; 5-Transportation; 6-Leisure activities (NB original categories were as follow: 1-Residential; 2-work; 3-food and other services; 4-transport station/stop; 5-recreational activity; 6-social activity)

LLindic <- ess.tab.camille %>%
  select("interact_id", "n_acti_places", "n_weekly_vst", "n_acti_types")
.llmtx <- as.matrix(summary(LLindic[c("n_acti_places", "n_weekly_vst", "n_acti_types")], digits = 1))
.llmtx <- apply(.llmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.llmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
.llmtx[is.na(.llmtx)] <- 0
kable(.llmtx, caption = "Lifestyle statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Lifestyle statistics
	n_acti_places	n_weekly_vst	n_acti_types
Min.	2	-61	2
1st Qu.	11	6	4
Median	15	10	4
Mean	17	11	4
3rd Qu.	20	14	5
Max.	61	217	5

3.2.4.2 Indicators related to the geometry of the activity space

Indicators	Measurement approach
Perimeter of the convex hull (`cvx_perimeter`)	Perimeter of the smallest polygon containing all the activity locations of the participant (unit: km)
Surface of the convex hull (`cvx_surface`)	Surface of the smallest polygon containing all the activity locations of the participant (unit: km2)
Minimal road network distance from the residence to an activity place (`min_length`)	Minimal distance from the residence to an activity place using the road network (in meters)
Maximal road network distance from the residence to an activity place (`max_length`)	Maximal distance from the residence to an activity place using the road network (in meters)
Median road network distance from the residence to all activity places (`median_length`)	Median distance from home to all activity places using the road network (in meters)

ASindic <- ess.tab.camille %>%
  select("interact_id", "cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")
.asmtx <- as.matrix(summary(ASindic[c("cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")], digits = 1))
.asmtx <- apply(.asmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.asmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
.asmtx[is.na(.asmtx)] <- 0
kable(.asmtx, caption = "Activity space statistics", digits = 1) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Activity space statistics
	cvx_perimeter	cvx_surface	min_length	max_length	median_length
Min.	1	0	8e-02	7e+02	329
1st Qu.	21	18	2e+02	8e+03	1158
Median	47	79	3e+02	2e+04	2022
Mean	217	1496	6e+02	5e+04	3164
3rd Qu.	200	607	6e+02	7e+04	4177
Max.	11412	36237	2e+04	2e+05	29073

3.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Montréal' AND wave_id = 3 AND status = 'return'

3.2.5.1 Number of people in the network (`people_degree`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

people_degree
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	3	5	6.844156	9	73

3.2.5.2 Simmelian Brokerage (`simmelian`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

simmelian
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
2	3	6	7.642066	9	73	37

3.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	2	3	4.571429	6	34

3.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	1	3	3.590909	5	22

INTERACT Montréal Participant VERITAS Summary - W3 - new and returning

Benoit THIERRY

04 April, 2024

1 VERITAS dataset description

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

2.1.2 Are you currently working?

2.1.3 Do you work…

2.1.4 Where do you work?

2.1.5 Are you currently a registered student?

2.1.6 Do you study…

2.1.7 Where do you study?

2.1.8 Did you go food shopping at least once in the past month?

2.1.9 Please list the places where you shopped for food in the past month

2.1.10 Did you visit places to receive services at least once in the past month?

2.1.11 Please list the places where you received services in the past month

2.1.12 Did you visit someone at their home at least once in the past month?

2.1.13 Please list the places you visited someone in the past month

2.1.14 Are there other places you visited in the past month that we have not mentioned?

2.1.15 Please list the places you visited in the past month

2.1.16 Now, we will ask you questions about the people around you.

2.1.16.1 Please list the people with whom you live

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

2.1.16.3 Among these people, who do you like to socialize with?

2.2 Derived metrics

2.2.1 Transportation mode preferences

2.2.2 Visiting places with someone

2.2.3 Visit frequency

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

2.2.4.1 Indicators related to the lifestyle

2.2.4.2 Indicators related to the geometry of the activity space

2.2.4.3 Indicators related to the importance of the residential neighborhood

2.2.5 Social indicators: Alexandre Naud’s toolbox

2.2.5.1 Number of people in the network (people_degree)

2.2.5.2 Simmelian Brokerage (simmelian)

2.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

2.2.5.4 Number of people with whom the participant discuss important matters (important_size)

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

3.1.2 Place your home on map

3.1.3 Are you currently working?

3.1.4 Do you work…

3.1.5 Where do you work?

3.1.6 Are you currently a registered student?

3.1.7 Do you study…

3.1.8 Where do you study?

3.1.9 Did you go food shopping at least once in the past month?

3.1.10 Please list the places where you shopped for food in the past month

3.1.11 Did you visit places to receive services at least once in the past month?

3.1.12 Please list the places where you received services in the past month

3.1.13 Did you visit someone at their home at least once in the past month?

3.1.14 Please list the places you visited someone in the past month

3.1.15 Are there other places you visited in the past month that we have not mentioned?

3.1.16 Please list the places you visited in the past month

3.1.17 Now, we will ask you questions about the people around you

3.1.17.1 Please list the people with whom you live

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

3.1.17.3 Among these people, who do you like to socialize with?

3.2 Derived metrics

3.2.1 Transportation mode preferences

3.2.2 Visiting places with someone

3.2.3 Visit frequency

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

3.2.4.1 Indicators related to the lifestyle

3.2.4.2 Indicators related to the geometry of the activity space

3.2.4.3 Indicators related to the importance of the residential neighborhood

3.2.5 Social indicators: Alexandre Naud’s toolbox

3.2.5.1 Number of people in the network (people_degree)

3.2.5.2 Simmelian Brokerage (simmelian)

3.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

3.2.5.4 Number of people with whom the participant discuss important matters (important_size)

2.2.5.1 Number of people in the network (`people_degree`)

2.2.5.2 Simmelian Brokerage (`simmelian`)

2.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

2.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)

3.2.5.1 Number of people in the network (`people_degree`)

3.2.5.2 Simmelian Brokerage (`simmelian`)

3.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

3.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)