1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
Social contacts: people and/or groups frequented by participants
Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities

New participants and returning participants are presented separately below, as they were presented two slightly different question flows.

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

home_location <- locations[locations$location_category == 1, ]

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .07
van_aoi[["right"]] <- van_aoi[["right"]] + .07
van_aoi[["top"]] <- van_aoi[["top"]] + .01
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .01

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of participants by municipalities
NAME	n
Vancouver	48
Surrey	1

2.1.2 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently working
working	n
1 [Yes]	40
2 [No]	9

2.1.3 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I work…
working	n
1 [From home only]	2
2 [Both from home and away from home]	26
3 [Away from home only]	12
N/A	9

2.1.4 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.5 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently studying
studying	n
1 [Yes]	12
2 [No]	37

2.1.6 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I study…
studying	n
1 [From home only]	4
2 [Both from home and away from home]	6
3 [Away from home only]	2
N/A	37

2.1.7 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.8 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

shopping	n
1 [Yes]	49

2.1.9 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

Grocery store
Market
Bakery
Specialty store
Convenience store
Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
5 [Supermarket]	112
6 [Public/farmer’s market]	17
7 [Bakery]	8
8 [Specialty food store]	6
9 [Convenience store/Dépanneur]	6

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of shopping locations by participant and category
location_category	min	mean	median	max
5 [Supermarket]	1	2.29	2	6
6 [Public/farmer’s market]	0	0.35	0	2
7 [Bakery]	0	0.16	0	2
8 [Specialty food store]	0	0.12	0	2
9 [Convenience store/Dépanneur]	0	0.12	0	1
10 [Liquor store/SAQ]	0	0.00	0	0

2.1.10 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

services	n
1 [Yes]	39
2 [No]	10

2.1.11 Please list the places where you received services in the past month

The following location types are grouped under this question:

Bank
Hair salon/barbershop
Post office
Drugstore
Doctor/healthcare provider
Other service location

serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

service locations by categories
location_category	n
11 [Bank]	11
12 [Hair salon/barbershop]	17
13 [Post office]	6
14 [Drugstore]	20
15 [Doctor/healthcare provider]	25

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of service locations by participant and category
location_category	mean	max
11 [Bank]	0.22	1
12 [Hair salon/barbershop]	0.35	1
13 [Post office]	0.12	1
14 [Drugstore]	0.41	2
15 [Doctor/healthcare provider]	0.51	3

2.1.12 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

visiting_new0_w3	n
1 [Yes]	35
2 [No]	14

2.1.13 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

2.1.14 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

other_w3	n
1 [Yes]	28
2 [No]	21

2.1.15 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.1.16 Now, we will ask you questions about the people around you.

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	3	4	6.6	10	24

ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

2.1.16.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people in household per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	1	1.4	2	5

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people in household among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0	20.7	35.9	54.2	100	1

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of important people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	3	4.6	6	24

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of important people among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	53.4	84.5	74.3	100	100	1

2.1.16.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people with whom to socialize per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	4.3	6	24

kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people with whom to socialize among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	50	70.7	68.1	100	100	1

2.2 Derived metrics

2.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Transportation mode preferences
description	N	By car	On foot	By bike	By public transit	Other
3 [Work]	45	19	9	19	27	0
4 [School/College/University]	8	2	1	2	7	0
5 [Supermarket]	112	39	56	36	17	1
6 [Public/farmer’s market]	17	4	10	5	2	0
7 [Bakery]	8	0	8	2	1	0
8 [Specialty food store]	6	0	5	2	0	0
9 [Convenience store/Dépanneur]	6	1	6	2	0	0
11 [Bank]	11	1	9	3	2	0
12 [Hair salon/barbershop]	17	4	4	5	5	0
13 [Post office]	6	0	6	0	0	0
14 [Drugstore]	20	1	17	4	1	0
15 [Doctor/healthcare provider]	25	9	7	7	6	0
17 [Leisure-time physical activity]	27	15	8	9	3	0
18 [Park]	63	12	30	28	5	0
19 [Cultural activity]	16	2	5	4	7	0
20 [Volunteering place]	3	1	1	1	1	0
22 [Restaurant, café, bar, etc.]	93	15	51	23	16	0
23 [Take-out]	23	2	5	16	7	0
24 [Walk]	14	5	6	8	0	1
25 [Other place]	119	23	54	44	30	0
26 [Social contact residence]	73	37	24	20	14	1

# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visiting places with someone
description	N	Visited with someone	Visited with someone (%)
3 [Work]	45	14	31.1
4 [School/College/University]	8	2	25.0
5 [Supermarket]	112	52	46.4
6 [Public/farmer’s market]	17	11	64.7
7 [Bakery]	8	5	62.5
8 [Specialty food store]	6	6	100.0
9 [Convenience store/Dépanneur]	6	4	66.7
11 [Bank]	11	1	9.1
12 [Hair salon/barbershop]	17	0	0.0
13 [Post office]	6	2	33.3
14 [Drugstore]	20	9	45.0
15 [Doctor/healthcare provider]	25	7	28.0
17 [Leisure-time physical activity]	27	23	85.2
18 [Park]	63	50	79.4
19 [Cultural activity]	16	14	87.5
20 [Volunteering place]	3	1	33.3
22 [Restaurant, café, bar, etc.]	93	66	71.0
23 [Take-out]	23	11	47.8
24 [Walk]	14	8	57.1
25 [Other place]	119	61	51.3
26 [Social contact residence]	73	59	80.8

# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

2.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visit frequency (expressed in times/year)
description	N	min	max	mean	median	sd
3 [Work]	45	1	35	10.7	8.0	8.2
4 [School/College/University]	8	1	28	10.2	7.0	10.3
5 [Supermarket]	112	0	20	3.4	2.0	2.8
6 [Public/farmer’s market]	17	1	4	2.6	3.0	1.1
7 [Bakery]	8	1	10	3.0	2.0	2.9
8 [Specialty food store]	6	1	8	3.0	2.0	2.5
9 [Convenience store/Dépanneur]	6	2	9	4.2	3.5	2.6
11 [Bank]	11	1	3	1.8	2.0	0.6
12 [Hair salon/barbershop]	17	1	1	1.0	1.0	0.0
13 [Post office]	6	1	2	1.3	1.0	0.5
14 [Drugstore]	20	1	10	3.1	2.0	2.8
15 [Doctor/healthcare provider]	25	1	4	1.6	1.0	1.0
17 [Leisure-time physical activity]	27	1	12	3.4	2.0	2.7
18 [Park]	63	1	28	3.7	2.0	4.9
19 [Cultural activity]	16	1	4	1.6	1.0	1.0
20 [Volunteering place]	3	2	16	7.0	3.0	7.8
22 [Restaurant, café, bar, etc.]	93	1	8	1.7	1.0	1.5
23 [Take-out]	23	1	4	1.5	1.0	0.9
24 [Walk]	14	1	3	1.8	2.0	0.8
25 [Other place]	119	1	20	2.8	2.0	3.1
26 [Social contact residence]	73	1	30	3.8	2.0	5.8

# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'new'

2.2.4.1 Indicators related to the lifestyle

Indicators	Measurement approach
Number of activity places (`n_acti_places`)	Count of activity places
Number of visits to places per week (`n_weekly_vst`)	Number of activity places per individual multiplied by the frequency of visit per week to each location, excluding the residence
Number of activity types (`n_acti_types`)	6 types of activities considered: 1-Residential; 2-Occupation; 3-Shopping activities; 4-Services; 5-Transportation; 6-Leisure activities (NB original categories were as follow: 1-Residential; 2-work; 3-food and other services; 4-transport station/stop; 5-recreational activity; 6-social activity)

LLindic <- ess.tab.camille %>%
  select("interact_id", "n_acti_places", "n_weekly_vst", "n_acti_types")
.llmtx <- as.matrix(summary(LLindic[c("n_acti_places", "n_weekly_vst", "n_acti_types")], digits = 1))
.llmtx <- apply(.llmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.llmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
kable(.llmtx, caption = "Lifestyle statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Lifestyle statistics
	n_acti_places	n_weekly_vst	n_acti_types
Min.	5	3	3
1st Qu.	11	7	4
Median	13	11	5
Mean	16	11	5
3rd Qu.	20	14	5
Max.	35	34	5

2.2.4.2 Indicators related to the geometry of the activity space

Indicators	Measurement approach
Perimeter of the convex hull (`cvx_perimeter`)	Perimeter of the smallest polygon containing all the activity locations of the participant (unit: km)
Surface of the convex hull (`cvx_surface`)	Surface of the smallest polygon containing all the activity locations of the participant (unit: km2)
Minimal road network distance from the residence to an activity place (`min_length`)	Minimal distance from the residence to an activity place using the road network (in meters)
Maximal road network distance from the residence to an activity place (`max_length`)	Maximal distance from the residence to an activity place using the road network (in meters)
Median road network distance from the residence to all activity places (`median_length`)	Median distance from home to all activity places using the road network (in meters)

ASindic <- ess.tab.camille %>%
  select("interact_id", "cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")
.asmtx <- as.matrix(summary(ASindic[c("cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")], digits = 1))
.asmtx <- apply(.asmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.asmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
.asmtx[is.na(.asmtx)] <- 0
kable(.asmtx, caption = "Activity space statistics", digits = 1) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Activity space statistics
	cvx_perimeter	cvx_surface	min_length	max_length	median_length
Min.	15	7	28	5e+03	912
1st Qu.	29	29	212	1e+04	2078
Median	44	52	416	2e+04	2482
Mean	614	1814	553	3e+04	3169
3rd Qu.	74	178	783	3e+04	3928
Max.	19414	46911	1850	1e+05	7861

2.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'new'

2.2.5.1 Number of people in the network (`people_degree`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

people_degree
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	3	4	6.571429	10	24

2.2.5.2 Simmelian Brokerage (`simmelian`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

simmelian
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
2	3	5	7.227273	10	24	5

2.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	4.346939	6	24

2.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	3	4.632653	6	24

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

# extract and recode
.home <- veritas_main[c("interact_id", "home_geom_confirmation")] %>% dplyr::rename(home_geom_confirmation_code = home_geom_confirmation)
.home$home_geom_confirmation <- factor(ifelse(.home$home_geom_confirmation_code == 1, "1 [Yes]",
  ifelse(.home$home_geom_confirmation_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .home) +
  geom_histogram(aes(x = home_geom_confirmation), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "home_geom_confirmation")

.home_cnt <- .home %>%
  group_by(home_geom_confirmation) %>%
  dplyr::count() %>%
  arrange(home_geom_confirmation)
kable(.home_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

home_geom_confirmation	n
1 [Yes]	85
2 [No]	54

3.1.2 Place your home on map

home_location <- locations[locations$location_category == 1, ] %>%
  inner_join(.home, by = "interact_id")

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .07
van_aoi[["right"]] <- van_aoi[["right"]] + .07
van_aoi[["top"]] <- van_aoi[["top"]] + .01
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .01

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, aes(color = home_geom_confirmation), size = 1.8, alpha = .5) + # see https://github.com/r-spatial/sf/issues/336
  theme(legend.position = "bottom")

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of participants by municipalities
NAME	n
Vancouver	138
Cowichan Valley	1

3.1.3 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently working
working	n
1 [Yes]	82
2 [No]	57

3.1.4 Do you work…

# extract and recode
.work <- veritas_main[c("interact_id", "working_home")] %>% dplyr::rename(working_code = working_home)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [From home only]",
  ifelse(.work$working_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$working_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "I work...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I work…
working	n
1 [From home only]	22
2 [Both from home and away from home]	38
3 [Away from home only]	22
N/A	57

3.1.5 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.6 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently studying
studying	n
1 [Yes]	6
2 [No]	133

3.1.7 Do you study…

# extract and recode
.work <- veritas_main[c("interact_id", "studying_home")] %>% dplyr::rename(studying_code = studying_home)
.work$studying <- factor(ifelse(.work$studying_code == 1, "1 [From home only]",
  ifelse(.work$studying_code == 2, "2 [Both from home and away from home]", 
         ifelse(.work$studying_code == 3, "3 [Away from home only]", "N/A"))
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "studying")

.work_cnt <- .work %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.work_cnt, caption = "I study...") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

I study…
studying	n
1 [From home only]	2
2 [Both from home and away from home]	4
N/A	133

3.1.8 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.9 Did you go food shopping at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "shopping")] %>% dplyr::rename(shopping_code = shopping)
.transp$shopping <- factor(ifelse(.transp$shopping_code == 1, "1 [Yes]",
  ifelse(.transp$shopping_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = shopping), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "shopping")

.transp_cnt <- .transp %>%
  group_by(shopping) %>%
  dplyr::count() %>%
  arrange(shopping)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

shopping	n
1 [Yes]	138
2 [No]	1

3.1.10 Please list the places where you shopped for food in the past month

The following location types are grouped under this question:

Grocery store
Market
Bakery
Specialty store
Convenience store
Other shopping location

NB Contrary to previous waves, SAQ/liquor store is not listed among the choices.

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
5 [Supermarket]	396
6 [Public/farmer’s market]	62
7 [Bakery]	47
8 [Specialty food store]	84
9 [Convenience store/Dépanneur]	12

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of shopping locations by participant and category
location_category	mean	median	max
5 [Supermarket]	2.85	3	8
6 [Public/farmer’s market]	0.45	0	3
7 [Bakery]	0.34	0	3
8 [Specialty food store]	0.60	0	4
9 [Convenience store/Dépanneur]	0.09	0	2
10 [Liquor store/SAQ]	0.00	0	0

3.1.11 Did you visit places to receive services at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "services")] %>% dplyr::rename(services_code = services)
.transp$services <- factor(ifelse(.transp$services_code == 1, "1 [Yes]",
  ifelse(.transp$services_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = services), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "services")

.transp_cnt <- .transp %>%
  group_by(services) %>%
  dplyr::count() %>%
  arrange(services)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

services	n
1 [Yes]	122
2 [No]	17

3.1.12 Please list the places where you received services in the past month

The following location types are grouped under this question:

Bank
Hair salon/barbershop
Post office
Drugstore
Doctor/healthcare provider
Other service location

serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "service locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

service locations by categories
location_category	n
11 [Bank]	73
12 [Hair salon/barbershop]	51
13 [Post office]	40
14 [Drugstore]	103
15 [Doctor/healthcare provider]	91

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of service locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of service locations by participant and category
location_category	mean	median	max
11 [Bank]	0.53	0	3
12 [Hair salon/barbershop]	0.37	0	1
13 [Post office]	0.29	0	1
14 [Drugstore]	0.74	1	4
15 [Doctor/healthcare provider]	0.65	0	5

3.1.13 Did you visit someone at their home at least once in the past month?

# extract and recode
.transp <- veritas_main[c("interact_id", "visiting_new0_w3")] %>% dplyr::rename(visiting_new0_w3_code = visiting_new0_w3)
.transp$visiting_new0_w3 <- factor(ifelse(.transp$visiting_new0_w3_code == 1, "1 [Yes]",
  ifelse(.transp$visiting_new0_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = visiting_new0_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new0_w3")

.transp_cnt <- .transp %>%
  group_by(visiting_new0_w3) %>%
  dplyr::count() %>%
  arrange(visiting_new0_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

visiting_new0_w3	n
1 [Yes]	93
2 [No]	46

3.1.14 Please list the places you visited someone in the past month

visit_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visit_location), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

3.1.15 Are there other places you visited in the past month that we have not mentioned?

# extract and recode
.transp <- veritas_main[c("interact_id", "other_w3")] %>% dplyr::rename(other_w3_code = other_w3)
.transp$other_w3 <- factor(ifelse(.transp$other_w3_code == 1, "1 [Yes]",
  ifelse(.transp$other_w3_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = other_w3), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_w3")

.transp_cnt <- .transp %>%
  group_by(other_w3) %>%
  dplyr::count() %>%
  arrange(other_w3)
kable(.transp_cnt) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

other_w3	n
1 [Yes]	85
2 [No]	54

3.1.16 Please list the places you visited in the past month

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.1.17 Now, we will ask you questions about the people around you

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	3	6	7.2	10	32

# histogram
ggplot(data = .pl_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count)), position = "dodge") +
  labs(x = "Social network size", fill = element_blank())

3.1.17.1 Please list the people with whom you live

# extract number of household people / participant
.n_household <- household %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_household, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_household = n.y) %>%
  mutate(pct = 100 * n_household / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_household))),
  caption = "Number of people in household per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people in household per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	1	1.1	2	6

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of people in household among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people in household among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0	16.7	29.9	48.2	100	1

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of important people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	3	3.7	5	12

kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of important people among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	36.4	61.2	61.5	98.1	100	1

3.1.17.3 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people with whom to socialize per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	4	4.9	6.5	20

kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people with whom to socialize among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	50	79.3	71.3	100	100	1

3.2 Derived metrics

3.2.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car" = sum(location_tmode2_1),
    "On foot" = sum(location_tmode2_4),
    "By bike" = sum(location_tmode2_5),
    "By public transit" = sum(location_tmode2_6),
    "Other" = sum(location_tmode2_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Transportation mode preferences
description	N	By car	On foot	By bike	By public transit	Other
3 [Work]	94	50	12	27	28	11
4 [School/College/University]	4	1	0	1	2	0
5 [Supermarket]	396	189	200	44	25	3
6 [Public/farmer’s market]	62	32	26	7	1	0
7 [Bakery]	47	18	26	5	1	3
8 [Specialty food store]	84	32	50	7	4	0
9 [Convenience store/Dépanneur]	12	2	9	1	0	1
11 [Bank]	73	21	47	5	2	1
12 [Hair salon/barbershop]	51	22	19	5	7	0
13 [Post office]	40	9	31	2	0	1
14 [Drugstore]	103	31	74	8	5	0
15 [Doctor/healthcare provider]	91	37	28	8	21	7
17 [Leisure-time physical activity]	89	55	30	18	4	0
18 [Park]	132	42	75	28	6	2
19 [Cultural activity]	41	20	9	7	9	0
20 [Volunteering place]	14	8	6	1	1	0
21 [Religious/spiritual activity]	8	4	2	0	0	2
22 [Restaurant, café, bar, etc.]	141	56	64	13	13	4
23 [Take-out]	36	19	17	0	1	0
24 [Walk]	115	29	84	21	5	0
25 [Other place]	357	176	136	50	48	5
26 [Social contact residence]	165	107	41	31	7	1

# graph
.tm1 <- .tm %>%
  filter(location_tmode2_1 == 1) %>%
  mutate(tm = "[1] By car")
.tm4 <- .tm %>%
  filter(location_tmode2_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode2_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode2_6 == 1) %>%
  mutate(tm = "[6] By public transit")
.tm99 <- .tm %>%
  filter(location_tmode2_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- .tm1 %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

3.2.2 Visiting places with someone

Based on the answers to the question In the past month, were you at this place with someone from your network?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.ppl <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_people_recode = case_when(
    location_people == 1 ~ 1,
    location_people == 2 ~ 0
  ))

.ppl_grouped <- .ppl %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited with someone" = sum(location_people_recode),
    "Visited with someone (%)" = round(sum(location_people_recode) * 100.0 / n(), digits = 1)
  )

kable(.ppl_grouped, caption = "Visiting places with someone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visiting places with someone
description	N	Visited with someone	Visited with someone (%)
3 [Work]	94	35	37.2
4 [School/College/University]	4	2	50.0
5 [Supermarket]	396	200	50.5
6 [Public/farmer’s market]	62	29	46.8
7 [Bakery]	47	24	51.1
8 [Specialty food store]	84	50	59.5
9 [Convenience store/Dépanneur]	12	9	75.0
11 [Bank]	73	42	57.5
12 [Hair salon/barbershop]	51	19	37.3
13 [Post office]	40	28	70.0
14 [Drugstore]	103	61	59.2
15 [Doctor/healthcare provider]	91	40	44.0
17 [Leisure-time physical activity]	89	49	55.1
18 [Park]	132	66	50.0
19 [Cultural activity]	41	30	73.2
20 [Volunteering place]	14	4	28.6
21 [Religious/spiritual activity]	8	5	62.5
22 [Restaurant, café, bar, etc.]	141	90	63.8
23 [Take-out]	36	16	44.4
24 [Walk]	115	55	47.8
25 [Other place]	357	184	51.5
26 [Social contact residence]	165	142	86.1

# histogram of answers
ggplot(data = .ppl) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_people)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("With someone", "Alone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited with someone", x = element_blank()) +
  coord_flip()

3.2.3 Visit frequency

Based on the answers to the question In the past month, I visited this place….

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit2),
    max = max(location_freq_visit2),
    mean = mean(location_freq_visit2),
    median = median(location_freq_visit2),
    sd = sd(location_freq_visit2)
  )

kable(.freq_grouped, 
      caption = "Visit frequency (expressed in times/year)",
      digits = 1) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visit frequency (expressed in times/year)
description	N	min	max	mean	median	sd
3 [Work]	94	0	30	9.1	5.0	8.0
4 [School/College/University]	4	1	12	4.2	2.0	5.3
5 [Supermarket]	396	0	30	3.5	2.0	3.6
6 [Public/farmer’s market]	62	0	6	2.2	2.0	1.2
7 [Bakery]	47	0	6	1.9	1.0	1.3
8 [Specialty food store]	84	0	8	2.0	1.0	1.6
9 [Convenience store/Dépanneur]	12	0	18	4.2	2.5	5.1
11 [Bank]	73	0	11	2.0	1.0	1.8
12 [Hair salon/barbershop]	51	0	4	1.1	1.0	0.6
13 [Post office]	40	0	15	2.5	1.0	3.3
14 [Drugstore]	103	1	10	2.8	2.0	1.9
15 [Doctor/healthcare provider]	91	0	6	1.4	1.0	0.9
17 [Leisure-time physical activity]	89	1	25	5.7	4.0	5.3
18 [Park]	132	0	30	4.6	2.0	6.1
19 [Cultural activity]	41	1	15	2.0	1.0	2.4
20 [Volunteering place]	14	0	30	5.5	3.0	7.6
21 [Religious/spiritual activity]	8	1	15	5.1	4.0	4.7
22 [Restaurant, café, bar, etc.]	141	0	16	1.9	1.0	2.3
23 [Take-out]	36	0	4	1.2	1.0	0.8
24 [Walk]	115	0	30	6.1	4.0	6.7
25 [Other place]	357	0	30	2.8	2.0	4.1
26 [Social contact residence]	165	1	30	2.8	2.0	3.7

# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit2)) +
  scale_y_continuous(limits = c(0, 31)) +
  labs(y = "Visits/month (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'return'

3.2.4.1 Indicators related to the lifestyle

Indicators	Measurement approach
Number of activity places (`n_acti_places`)	Count of activity places
Number of visits to places per week (`n_weekly_vst`)	Number of activity places per individual multiplied by the frequency of visit per week to each location, excluding the residence
Number of activity types (`n_acti_types`)	6 types of activities considered: 1-Residential; 2-Occupation; 3-Shopping activities; 4-Services; 5-Transportation; 6-Leisure activities (NB original categories were as follow: 1-Residential; 2-work; 3-food and other services; 4-transport station/stop; 5-recreational activity; 6-social activity)

LLindic <- ess.tab.camille %>%
  select("interact_id", "n_acti_places", "n_weekly_vst", "n_acti_types")
.llmtx <- as.matrix(summary(LLindic[c("n_acti_places", "n_weekly_vst", "n_acti_types")], digits = 1))
.llmtx <- apply(.llmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.llmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
.llmtx[is.na(.llmtx)] <- 0
kable(.llmtx, caption = "Lifestyle statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Lifestyle statistics
	n_acti_places	n_weekly_vst	n_acti_types
Min.	3	0.5	2
1st Qu.	11	6.8	4
Median	16	10.6	4
Mean	17	12.0	4
3rd Qu.	21	15.9	5
Max.	37	35.8	5

3.2.4.2 Indicators related to the geometry of the activity space

Indicators	Measurement approach
Perimeter of the convex hull (`cvx_perimeter`)	Perimeter of the smallest polygon containing all the activity locations of the participant (unit: km)
Surface of the convex hull (`cvx_surface`)	Surface of the smallest polygon containing all the activity locations of the participant (unit: km2)
Minimal road network distance from the residence to an activity place (`min_length`)	Minimal distance from the residence to an activity place using the road network (in meters)
Maximal road network distance from the residence to an activity place (`max_length`)	Maximal distance from the residence to an activity place using the road network (in meters)
Median road network distance from the residence to all activity places (`median_length`)	Median distance from home to all activity places using the road network (in meters)

ASindic <- ess.tab.camille %>%
  select("interact_id", "cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")
.asmtx <- as.matrix(summary(ASindic[c("cvx_perimeter", "cvx_surface", "min_length", "max_length", "median_length")], digits = 1))
.asmtx <- apply(.asmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.asmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ", "N/A.")
.asmtx[is.na(.asmtx)] <- 0
kable(.asmtx, caption = "Activity space statistics", digits = 1) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Activity space statistics
	cvx_perimeter	cvx_surface	min_length	max_length	median_length
Min.	2	0e+00	6e-02	8e+02	378
1st Qu.	17	1e+01	2e+02	7e+03	1188
Median	29	4e+01	3e+02	1e+04	1804
Mean	530	1e+05	4e+02	2e+04	2361
3rd Qu.	62	1e+02	6e+02	2e+04	2982
Max.	21083	2e+07	4e+03	1e+05	12353
N/A.	0	0	1	1	1

3.2.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 3 AND status = 'return'

3.2.5.1 Number of people in the network (`people_degree`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

people_degree
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	3	6	7.208633	10	32

3.2.5.2 Simmelian Brokerage (`simmelian`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

simmelian
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
2	4	6	7.75	10	32	11

3.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	4	4.928058	6.5	20

3.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	3	3.748201	5	12

INTERACT Vancouver Participant VERITAS Summary - W3 - new and returning

Benoit THIERRY

04 April, 2024

1 VERITAS dataset description

2 Basic descriptive statistics for new participants

2.1 Main questionnaire

2.1.1 Please indicate on the map your place of residence

2.1.2 Are you currently working?

2.1.3 Do you work…

2.1.4 Where do you work?

2.1.5 Are you currently a registered student?

2.1.6 Do you study…

2.1.7 Where do you study?

2.1.8 Did you go food shopping at least once in the past month?

2.1.9 Please list the places where you shopped for food in the past month

2.1.10 Did you visit places to receive services at least once in the past month?

2.1.11 Please list the places where you received services in the past month

2.1.12 Did you visit someone at their home at least once in the past month?

2.1.13 Please list the places you visited someone in the past month

2.1.14 Are there other places you visited in the past month that we have not mentioned?

2.1.15 Please list the places you visited in the past month

2.1.16 Now, we will ask you questions about the people around you.

2.1.16.1 Please list the people with whom you live

2.1.16.2 Please list the people with whom you discuss important matters with (in person, or virtually)

2.1.16.3 Among these people, who do you like to socialize with?

2.2 Derived metrics

2.2.1 Transportation mode preferences

2.2.2 Visiting places with someone

2.2.3 Visit frequency

2.2.4 Spatial indicators: Camille Perchoux’s toolbox

2.2.4.1 Indicators related to the lifestyle

2.2.4.2 Indicators related to the geometry of the activity space

2.2.4.3 Indicators related to the importance of the residential neighborhood

2.2.5 Social indicators: Alexandre Naud’s toolbox

2.2.5.1 Number of people in the network (people_degree)

2.2.5.2 Simmelian Brokerage (simmelian)

2.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

2.2.5.4 Number of people with whom the participant discuss important matters (important_size)

3 Basic descriptive statistics for returning participants

3.1 Main questionnaire

3.1.1 Now, let’s start with your home. Do you confirm this is your address?

3.1.2 Place your home on map

3.1.3 Are you currently working?

3.1.4 Do you work…

3.1.5 Where do you work?

3.1.6 Are you currently a registered student?

3.1.7 Do you study…

3.1.8 Where do you study?

3.1.9 Did you go food shopping at least once in the past month?

3.1.10 Please list the places where you shopped for food in the past month

3.1.11 Did you visit places to receive services at least once in the past month?

3.1.12 Please list the places where you received services in the past month

3.1.13 Did you visit someone at their home at least once in the past month?

3.1.14 Please list the places you visited someone in the past month

3.1.15 Are there other places you visited in the past month that we have not mentioned?

3.1.16 Please list the places you visited in the past month

3.1.17 Now, we will ask you questions about the people around you

3.1.17.1 Please list the people with whom you live

3.1.17.2 Please list the people with whom you discuss important matters with (in person, or virtually)

3.1.17.3 Among these people, who do you like to socialize with?

3.2 Derived metrics

3.2.1 Transportation mode preferences

3.2.2 Visiting places with someone

3.2.3 Visit frequency

3.2.4 Spatial indicators: Camille Perchoux’s toolbox

3.2.4.1 Indicators related to the lifestyle

3.2.4.2 Indicators related to the geometry of the activity space

3.2.4.3 Indicators related to the importance of the residential neighborhood

3.2.5 Social indicators: Alexandre Naud’s toolbox

3.2.5.1 Number of people in the network (people_degree)

3.2.5.2 Simmelian Brokerage (simmelian)

3.2.5.3 Number of people with whom the participant like to socialize (socialize_size)

3.2.5.4 Number of people with whom the participant discuss important matters (important_size)

2.2.5.1 Number of people in the network (`people_degree`)

2.2.5.2 Simmelian Brokerage (`simmelian`)

2.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

2.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)

3.2.5.1 Number of people in the network (`people_degree`)

3.2.5.2 Simmelian Brokerage (`simmelian`)

3.2.5.3 Number of people with whom the participant like to socialize (`socialize_size`)

3.2.5.4 Number of people with whom the participant discuss important matters (`important_size`)