1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

  • Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
  • Social contacts: people and/or groups frequented by participants
  • Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities
VERITAS entities

New participants and returning participants are presented separately below, as they were presented two slightly different question flows.

2 Basic descriptive statistics for new participants

2.1 Section 1: Residence and Neighbourhood

2.1.1 Now, let’s start with your home. What is your address?

home_location <- locations[locations$location_category == 1, ]

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .07
van_aoi[["right"]] <- van_aoi[["right"]] + .07
van_aoi[["top"]] <- van_aoi[["top"]] + .01
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .01

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3) # see https://github.com/r-spatial/sf/issues/336

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Vancouver 105

2.1.2 If you were asked to draw the boundaries of your neighbourhood, what would they be?

prn <- poly_geom[poly_geom$area_type == "neighborhood", ]

## version ggmap
bm + geom_sf(data = prn, inherit.aes = FALSE, fill = alpha("blue", 0.05), color = alpha("blue", 0.3))

# Min, max, median & mean area of PRN
prn$area_m2 <- st_area(prn$geom)
prn$area_m2 <- st_area(prn$geom)
kable(t(as.matrix(summary(prn$area_m2))),
  caption = "Area (in square meters) of the perceived residential neighborhood",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived residential neighborhood
Min. 1st Qu. Median Mean 3rd Qu. Max.
8188.4 1040135 2251171 19138319 3741706 1508170656

NB only 95 valid neighborhoods were collected, as many participants struggled to draw polygons on the map.

2.1.3 How attached are you to your neighbourhood?

# extract and recode
.ngh_att <- veritas_main[veritas_main$neighbourhood_attach != 99, c("interact_id", "neighbourhood_attach")] %>% dplyr::rename(neighbourhood_attach_code = neighbourhood_attach)
.ngh_att$neighbourhood_attach <- factor(ifelse(.ngh_att$neighbourhood_attach_code == 1, "1 [Not attached at all]",
  ifelse(.ngh_att$neighbourhood_attach_code == 6, "6 [Very attached]",
    .ngh_att$neighbourhood_attach_code
  )
))

# histogram of attachment
ggplot(data = .ngh_att) +
  geom_histogram(aes(x = neighbourhood_attach), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "neighbourhood_attach")

.ngh_att_cnt <- .ngh_att %>%
  group_by(neighbourhood_attach) %>%
  dplyr::count() %>%
  arrange(neighbourhood_attach)
kable(.ngh_att_cnt, caption = "Neigbourhood attachment") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Neigbourhood attachment
neighbourhood_attach n
1 [Not attached at all] 1
2 7
3 6
4 20
5 31
6 [Very attached] 39

2.1.4 On average, how many hours per day do you spend outside of your home?

# histogram of n hours out
ggplot(data = veritas_main) +
  geom_histogram(aes(x = hours_out))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$hours_out))),
  caption = "Hours/day outside home",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside home
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 4.7 6 24

2.1.5 Of this time spent outside your home, on average how many hours do you spend outside your neighbourhood?

# histogram of n hours out
ggplot(data = veritas_main) +
  geom_histogram(aes(x = hours_out_neighb))

# Min, max, median & mean hours/day out of neighborhood
kable(t(as.matrix(summary(veritas_main$hours_out_neighb))),
  caption = "Hours/day outside neighbourhood",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside neighbourhood
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 2.3 3 10

2.1.6 Are there one or more areas close to where you live that you tend to avoid because you do not feel safe there (for any reason)?

# extract and recode
.unsafe <- veritas_main[c("interact_id", "unsafe_area")] %>% dplyr::rename(unsafe_area_code = unsafe_area)
.unsafe$unsafe_area <- factor(ifelse(.unsafe$unsafe_area_code == 1, "1 [Yes]",
  ifelse(.unsafe$unsafe_area_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .unsafe) +
  geom_histogram(aes(x = unsafe_area), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "unsafe_area")

.unsafe_cnt <- .unsafe %>%
  group_by(unsafe_area) %>%
  dplyr::count() %>%
  arrange(unsafe_area)
kable(.unsafe_cnt, caption = "Unsafe areas") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Unsafe areas
unsafe_area n
1 [Yes] 13
2 [No] 92
# map
unsafe <- poly_geom[poly_geom$area_type == "unsafe area", ]

## version ggmap
bm + geom_sf(data = unsafe, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
unsafe$area_m2 <- st_area(unsafe$geom)
kable(t(as.matrix(summary(unsafe$area_m2))),
  caption = "Area (in square meters) of the perceived unsafe area",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived unsafe area
Min. 1st Qu. Median Mean 3rd Qu. Max.
12580.6 73237.7 125212.8 2412351 227064.4 25172400

2.1.7 Do you spend the night somewhere other than your home at least once per week?

# extract and recode
.o_res <- veritas_main[c("interact_id", "other_resid")] %>% dplyr::rename(other_resid_code = other_resid)
.o_res$other_resid <- factor(ifelse(.o_res$other_resid_code == 1, "1 [Yes]",
  ifelse(.o_res$other_resid_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .o_res) +
  geom_histogram(aes(x = other_resid), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_resid")

.o_res_cnt <- .o_res %>%
  group_by(other_resid) %>%
  dplyr::count() %>%
  arrange(other_resid)
kable(.o_res_cnt, caption = "Other residence") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Other residence
other_resid n
1 [Yes] 21
2 [No] 84

2.2 Section 2: Occupation

2.2.1 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 64
2 [No] 41

2.2.2 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.2.3 On average, how many hours per week do you work?

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$working == 1, ]) +
  geom_histogram(aes(x = work_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$work_hours[veritas_main$working == 1]))),
  caption = "Work hours/week",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Work hours/week
Min. 1st Qu. Median Mean 3rd Qu. Max.
4 24 35 33.2 44.2 65

2.2.4 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 11
2 [No] 94

2.2.5 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.2.6 On average, how many hours per week do you study?

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$studying == 1, ]) +
  geom_histogram(aes(x = study_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$study_hours[veritas_main$studying == 1]))),
  caption = "study hours/week",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
study hours/week
Min. 1st Qu. Median Mean 3rd Qu. Max.
5 11.5 20 21.8 30 50

2.3 Section 3: Shopping activities

The following questions are used to generate the locations grouped into this section:

  1. Do you shop for groceries at a supermarket at least once per month?
  2. Do you shop at a public/farmer’s market at least once per month?
  3. Do you shop at a bakery at least once per month?
  4. Do you go to a specialty food store at least once per month? For example: a cheese shop, fruit and vegetable store, butcher’s shop, natural and health food store.
  5. Do you go to a convenience store at least once per month?
  6. Do you go to a liquor store at least once per month?
shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 303
6 [Public/farmer’s market] 41
7 [Bakery] 86
8 [Specialty food store] 87
9 [Convenience store/Dépanneur] 27
10 [Liquor store/SAQ] 91
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.89 3 5
6 [Public/farmer’s market] 0 0.39 0 3
7 [Bakery] 0 0.82 0 5
8 [Specialty food store] 0 0.83 0 5
9 [Convenience store/Dépanneur] 0 0.26 0 3
10 [Liquor store/SAQ] 0 0.87 1 4

2.4 Section 4: Services

The following questions are used to generate the locations grouped into this section:

  1. Where is the bank you go to most often located?
  2. Where is the hair salon or barber shop you go to most often?
  3. Where is the post office where you go to most often?
  4. Where is the drugstore you go to most often?
  5. If you need to visit a doctor or other healthcare provider, where do you go most often?
serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
11 [Bank] 76
12 [Hair salon/barbershop] 65
13 [Post office] 67
14 [Drugstore] 80
15 [Doctor/healthcare provider] 88
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
11 [Bank] 0 0.72 1 1
12 [Hair salon/barbershop] 0 0.62 1 1
13 [Post office] 0 0.64 1 1
14 [Drugstore] 0 0.76 1 1
15 [Doctor/healthcare provider] 0 0.84 1 3

2.5 Section 5: Transportation

2.5.1 Do you use public transit from your home?

# extract and recode
.transp <- veritas_main[c("interact_id", "public_transit")] %>% dplyr::rename(public_transit_code = public_transit)
.transp$public_transit <- factor(ifelse(.transp$public_transit_code == 1, "1 [Yes]",
  ifelse(.transp$public_transit_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = public_transit), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "public_transit")

.transp_cnt <- .transp %>%
  group_by(public_transit) %>%
  dplyr::count() %>%
  arrange(public_transit)
kable(.transp_cnt, caption = "Use public transit") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Use public transit
public_transit n
1 [Yes] 27
2 [No] 78

2.5.2 Where are the public transit stops that you access from your home?

transp_location <- locations[locations$location_category == 16, ]

bm + geom_sf(data = transp_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.6 Section 6: Leisure activities

The following questions are used to generate the locations grouped into this section:

  1. Do you participate in any (individual or group) sports or leisure-time physical activities at least once per month?
  2. Do you visit a park at least once per month?
  3. Do you participate in or attend as a spectator a cultural or non-sport leisure activity at least once per month? For example: singing or drawing lessons, book or poker club, concert or play.
  4. Do you volunteer at least once per month?
  5. Do you engage in any religious or spiritual activities at least once per month?
  6. Do you go to a restaurant, café, bar or other food and drink establishment at least once per month?
  7. Do you get take-out food at least once per month?
  8. Do you regularly go for walks?
leisure_lut <- data.frame(
  location_category_code = c(17, 18, 19, 20, 21, 22, 23, 24),
  location_category = factor(c(
    "17 [Leisure-time physical activity]",
    "18 [Park]",
    "19 [Cultural activity]",
    "20 [Volunteering place]",
    "21 [Religious or spiritual activity]",
    "22 [Restaurant, café, bar, etc. ]",
    "23 [Take-out]",
    "24 [Walk]"
  ))
)
leisure_location <- locations[locations$location_category %in% leisure_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(leisure_lut, by = "location_category_code")

# map
bm + geom_sf(data = leisure_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = leisure_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Leisure locations by categories")

.location_category_cnt <- as.data.frame(leisure_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
17 [Leisure-time physical activity] 104
18 [Park] 219
19 [Cultural activity] 4
20 [Volunteering place] 24
21 [Religious or spiritual activity] 15
22 [Restaurant, café, bar, etc. ] 137
23 [Take-out] 166
24 [Walk] 208
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(leisure_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = leisure_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_leisure_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)

.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_leisure_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of leisure locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of leisure locations by participant and category
location_category min mean median max
17 [Leisure-time physical activity] 0 0.99 1 5
18 [Park] 0 2.09 1 5
19 [Cultural activity] 0 0.04 0 2
20 [Volunteering place] 0 0.23 0 4
21 [Religious or spiritual activity] 0 0.14 0 1
22 [Restaurant, café, bar, etc. ] 0 1.30 1 5
23 [Take-out] 0 1.58 1 5
24 [Walk] 0 1.98 1 5

2.7 Section 7: Other places/activities

2.7.1 Are there other places that you go to at least once per month that we have not mentioned? For example: a mall, a daycare, a hardware store, or a community center.

# extract and recode
.other <- veritas_main[c("interact_id", "other")] %>% dplyr::rename(other_code = other)
.other$other <- factor(ifelse(.other$other_code == 1, "1 [Yes]",
  ifelse(.other$other_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .other) +
  geom_histogram(aes(x = other), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other")

.other_cnt <- .other %>%
  group_by(other) %>%
  dplyr::count() %>%
  arrange(other)
kable(.other_cnt, caption = "Other places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Other places
other n
1 [Yes] 42
2 [No] 63

2.7.2 Can you locate this place?

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.8 Section 8: Areas of change

2.8.1 Can you locate areas where you have noticed an improvement of the urban environment?

# extract and recode
.improv <- veritas_main[c("interact_id", "improvement_none")] %>% dplyr::rename(improvement_none_code = improvement_none)
.improv$improvement_none <- factor(ifelse(.improv$improvement_none_code == 1, "1 [TRUE]",
  ifelse(.improv$improvement_none_code == 0, "0 [FALSE]", "N/A")
))

# histogram of answers
ggplot(data = .improv) +
  geom_histogram(aes(x = improvement_none), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "improvement_none")

.improv_cnt <- .improv %>%
  group_by(improvement_none) %>%
  dplyr::count() %>%
  arrange(improvement_none)
kable(.improv_cnt, caption = "No area of improvement") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
No area of improvement
improvement_none n
0 [FALSE] 41
1 [TRUE] 64
# polgon extraction
improv <- poly_geom[poly_geom$area_type == "improvement", ]

# Map
bm + geom_sf(data = improv, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
improv <- improv %>%
  mutate(area_m2 = st_area(.))
kable(t(as.matrix(summary(improv$area_m2))),
  caption = "Area (in square meters) of the perceived improvement areas",
  digits = 1
) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived improvement areas
Min. 1st Qu. Median Mean 3rd Qu. Max.
3695.1 27384.5 100153.8 551179.7 277806.7 13691632

2.8.2 Can you locate areas where you have noticed a deterioration of the urban environment?

# extract and recode
.deter <- veritas_main[c("interact_id", "deterioration_none")] %>% dplyr::rename(deterioration_none_code = deterioration_none)
.deter$deterioration_none <- factor(ifelse(.deter$deterioration_none_code == 1, "1 [TRUE]",
  ifelse(.deter$deterioration_none_code == 0, "0 [FALSE]", "N/A")
))

# histogram of answers
ggplot(data = .deter) +
  geom_histogram(aes(x = deterioration_none), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "deterioration_none")

.deter_cnt <- .deter %>%
  group_by(deterioration_none) %>%
  dplyr::count() %>%
  arrange(deterioration_none)
kable(.deter_cnt, caption = "No area of deterioration") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
No area of deterioration
deterioration_none n
0 [FALSE] 31
1 [TRUE] 74
# polgon extraction
deter <- poly_geom[poly_geom$area_type == "deterioration", ]

# Map
bm + geom_sf(data = deter, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
deter <- deter %>%
  mutate(area_m2 = st_area(.))
kable(t(as.matrix(summary(deter$area_m2))),
  caption = "Area (in square meters) of the perceived deterioration areas",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived deterioration areas
Min. 1st Qu. Median Mean 3rd Qu. Max.
5650.9 28673.4 111075.7 488635.7 401606.7 4210374

2.9 Section 9: Social contact

2.9.1 Do you visit anyone at his or her home at least once per month?

# extract and recode
.visiting <- veritas_main[c("interact_id", "visiting")] %>% dplyr::rename(visiting_code = visiting)
.visiting$visiting <- factor(ifelse(.visiting$visiting_code == 1, "1 [Yes]",
  ifelse(.visiting$visiting_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .visiting) +
  geom_histogram(aes(x = visiting), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting")

.visiting_cnt <- .visiting %>%
  group_by(visiting) %>%
  dplyr::count() %>%
  arrange(visiting)
kable(.visiting_cnt, caption = "Social contact") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Social contact
visiting n
1 [Yes] 45
2 [No] 60

2.9.2 Great, we are almost done completing this questionnaire. You have documented all your activity places on a map, and specified with whom you generally do these activities. These last few questions concern the people you documented earlier.

# compute statistics on groups / participant
# > one needs to account for participants who did not report any group
.gr_iid_cnt <- as.data.frame(group[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_gr_iid <- anti_join(veritas_main[c("interact_id")], .gr_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.gr_iid_cnt <- bind_rows(.gr_iid_cnt, .no_gr_iid)

kable(t(as.matrix(summary(.gr_iid_cnt$n))), caption = "Number of groups per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of groups per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0.6857143 1 6
# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))), caption = "Number of people per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 4.190476 6 31
# histogram
.sc_iid_cnt <- .pl_iid_cnt %>% mutate(soc_type = "people")
.sc_iid_cnt <- .gr_iid_cnt %>%
  mutate(soc_type = "group") %>%
  bind_rows(.sc_iid_cnt)

ggplot(data = .sc_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count), fill = soc_type), position = "dodge") +
  labs(x = "Social network size by element type", fill = element_blank())

2.9.2.1 Among these people, who do you discuss important matters with?

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 2.3 3 10
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 47.2 85.7 70.9 100 100 10

2.9.2.2 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 3.2 5 18
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 71.4 100 81.8 100 100 10

2.9.2.3 Among these people, who do you meet often with but do not necessarily feel close to?

# extract number of important people / participant
.n_not_close <- not_close %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_not_close <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_not_close, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_not_close = n.y) %>%
  mutate(pct = 100 * n_not_close / n_people)

kable(t(as.matrix(summary(.n_people_not_close$n_not_close))),
  caption = "Number of not so close people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of not so close people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0.5 0 10
kable(t(as.matrix(summary(.n_people_not_close$pct))),
  caption = "% of not so close people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of not so close people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 0 0 9.6 4.8 100 10

2.9.2.4 Among these people, who knows whom?

# extract number of who knows who relationships
.n_relat <- relationship %>%
  filter(relationship_type == 1) %>%
  dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_relat <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_relat, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_relat = n.y) %>%
  mutate(pct = 100 * n_relat * 2 / (n_people * (n_people - 1))) # potential number of relationships = N x (N -1) / 2

kable(t(as.matrix(summary(.n_people_relat$n_relat))),
  caption = "Number of relationships « who knows who » per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of relationships « who knows who » per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 8.7 9 112
kable(t(as.matrix(summary(.n_people_relat$pct))),
  caption = "% of relationships « who knows who » per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of relationships « who knows who » per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 50 80 71.8 100 100 40

2.10 Derived metrics

2.10.1 Existence of improvement and deterioration areas by participant

Combination of improvement and/or deterioration areas per participant

# cross tab of improvement vs deteriation areas
.improv <- improv[c("interact_id")] %>%
  mutate(improv = "Improvement")
.deter <- deter[c("interact_id")] %>%
  mutate(deter = "Deterioration")
.ct_impr_deter <- veritas_main[c("interact_id")] %>%
  transmute(interact_id = as.character(interact_id)) %>%
  left_join(.improv, by = "interact_id") %>%
  left_join(.deter, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), "N/A"))

kable(table(.ct_impr_deter$improv, .ct_impr_deter$deter), caption = "Improvement vs. deterioration") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left", row_label_position = "r") %>%
  column_spec(1, bold = T)
Improvement vs. deterioration
Deterioration N/A
Improvement 11 25
N/A 18 51

2.10.2 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

# code  en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99    Other

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car (driver)" = sum(location_tmode_1),
    "By car (passenger)" = sum(location_tmode_2),
    "By taxi/Uber" = sum(location_tmode_3),
    "On foot" = sum(location_tmode_4),
    "By bike" = sum(location_tmode_5),
    "By bus" = sum(location_tmode_6),
    "By train" = sum(location_tmode_7),
    "Other" = sum(location_tmode_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car (driver) By car (passenger) By taxi/Uber On foot By bike By bus By train Other
2 [Other residence] 24 9 9 0 5 3 4 3 3
3 [Work] 84 23 5 3 22 17 11 6 28
4 [School/College/University] 13 0 0 0 4 1 3 1 8
5 [Supermarket] 303 125 24 1 177 22 7 3 1
6 [Public/farmer’s market] 41 17 3 0 15 7 1 0 1
7 [Bakery] 86 26 3 0 58 7 2 0 0
8 [Specialty food store] 87 25 7 1 59 7 3 0 1
9 [Convenience store/Dépanneur] 27 4 2 0 21 2 1 2 2
10 [Liquor store/SAQ] 91 32 5 0 53 4 2 0 2
11 [Bank] 76 17 1 0 61 6 3 1 1
12 [Hair salon/barbershop] 65 28 2 0 26 16 6 5 1
13 [Post office] 67 10 0 0 54 7 0 0 0
14 [Drugstore] 80 16 4 0 61 8 3 0 0
15 [Doctor/healthcare provider] 88 42 5 0 31 13 3 2 2
16 [Public transit stop] 54 0 1 0 45 3 7 8 0
17 [Leisure-time physical activity] 104 36 10 0 44 20 3 2 8
18 [Park] 219 37 15 0 156 41 4 0 2
19 [Cultural activity] 4 2 0 0 1 0 0 0 1
20 [Volunteering place] 24 8 0 0 8 3 1 1 10
21 [Religious/spiritual activity] 15 4 1 0 2 1 0 1 8
22 [Restaurant, café, bar, etc.] 137 34 19 3 78 8 4 4 4
23 [Take-out] 166 48 28 0 75 3 2 1 18
24 [Walk] 208 30 13 0 163 18 6 2 4
25 [Other place] 75 33 6 0 19 19 4 4 1
# graph
.tm1 <- .tm %>%
  filter(location_tmode_1 == 1) %>%
  mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
  filter(location_tmode_2 == 1) %>%
  mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
  filter(location_tmode_3 == 1) %>%
  mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
  filter(location_tmode_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode_6 == 1) %>%
  mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
  filter(location_tmode_7 == 1) %>%
  mutate(tm = "[7] By train")
.tm99 <- .tm %>%
  filter(location_tmode_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
  bind_rows(.tm3) %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm7) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.10.3 Visiting places alone

Based on the answers to the question Do you usually go to this place alone or with other people?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.alone <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_alone_recode = case_when(
    location_alone2 == 1 ~ 1,
    location_alone2 == 2 ~ 0
  ))

.alone_grouped <- .alone %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited alone" = sum(location_alone_recode),
    "Visited alone (%)" = round(sum(location_alone_recode) * 100.0 / n(), digits = 1)
  )

kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places alone
description N Visited alone Visited alone (%)
2 [Other residence] 24 3 12.5
3 [Work] 84 39 46.4
4 [School/College/University] 13 9 69.2
5 [Supermarket] 303 217 71.6
6 [Public/farmer’s market] 41 21 51.2
7 [Bakery] 86 64 74.4
8 [Specialty food store] 87 67 77.0
9 [Convenience store/Dépanneur] 27 22 81.5
10 [Liquor store/SAQ] 91 76 83.5
11 [Bank] 76 70 92.1
12 [Hair salon/barbershop] 65 62 95.4
13 [Post office] 67 63 94.0
14 [Drugstore] 80 66 82.5
15 [Doctor/healthcare provider] 88 77 87.5
16 [Public transit stop] 54 50 92.6
17 [Leisure-time physical activity] 104 62 59.6
18 [Park] 219 89 40.6
19 [Cultural activity] 4 2 50.0
20 [Volunteering place] 24 11 45.8
21 [Religious/spiritual activity] 15 10 66.7
22 [Restaurant, café, bar, etc.] 137 38 27.7
23 [Take-out] 166 93 56.0
24 [Walk] 208 108 51.9
25 [Other place] 75 31 41.3
# histogram of answers
ggplot(data = .alone) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_alone2)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("Alone", "With someone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited alone", x = element_blank()) +
  coord_flip()

2.10.4 Visit frequency

Based on the answers to the question How often do you go there?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit),
    max = max(location_freq_visit),
    mean = mean(location_freq_visit),
    median = median(location_freq_visit),
    sd = sd(location_freq_visit)
  )

kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
2 [Other residence] 24 1 364 84 52 1.017032e+02
3 [Work] 84 0 365 202 208 1.311800e+02
4 [School/College/University] 13 0 365 205 260 1.671449e+02
5 [Supermarket] 303 1 364 50 36 5.420799e+01
6 [Public/farmer’s market] 41 1 104 30 24 2.363637e+01
7 [Bakery] 86 1 364 31 24 4.459628e+01
8 [Specialty food store] 87 3 208 40 24 4.005311e+01
9 [Convenience store/Dépanneur] 27 12 260 46 24 5.858191e+01
10 [Liquor store/SAQ] 91 1 156 22 12 2.431764e+01
11 [Bank] 76 1 260 18 12 3.330390e+01
12 [Hair salon/barbershop] 65 0 12 5 4 3.905002e+00
13 [Post office] 67 1 104 16 12 1.660865e+01
14 [Drugstore] 80 3 312 36 24 4.253930e+01
15 [Doctor/healthcare provider] 88 1 52 7 3 1.045485e+01
16 [Public transit stop] 54 1 728 89 36 1.235449e+02
17 [Leisure-time physical activity] 104 3 520000000000000 5000000000115 104 5.099020e+13
18 [Park] 219 1 780 86 36 1.119249e+02
19 [Cultural activity] 4 3 312 83 6 1.525459e+02
20 [Volunteering place] 24 4 364 57 24 8.437132e+01
21 [Religious/spiritual activity] 15 12 364 126 52 1.420959e+02
22 [Restaurant, café, bar, etc.] 137 1 364 28 12 4.832638e+01
23 [Take-out] 166 0 156 18 12 1.939343e+01
24 [Walk] 208 1 728 108 52 1.251722e+02
25 [Other place] 75 1 260 40 24 6.204869e+01
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit)) +
  scale_y_continuous(limits = c(0, 365)) +
  labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.10.5 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 2 AND status = 'new'

2.10.6 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 2 AND status = 'new'

2.10.6.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 4.190476 6 31

2.10.6.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
1 2 3.666667 4.82141 6.05 27.27222 26

2.10.6.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 3.228571 5 18

2.10.6.4 Weekly face-to-face interactions among people with whom the participant like to socialize (socialize_meet)

ggplot(filter(ess.tab.alex, socialize_meet < 100)) +
  geom_histogram(aes(x = socialize_meet)) +
  annotate(geom = "text", x = 75, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_meet))), caption = "socialize_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_meet
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 104 364 2313.552 573 156064

2.10.6.5 Weekly ICT interactions among people with whom the participant like to socialize (socialize_chat)

ggplot(filter(ess.tab.alex, socialize_chat < 100)) +
  geom_histogram(aes(x = socialize_chat)) +
  annotate(geom = "text", x = 55, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_chat))), caption = "socialize_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_chat
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 208 468 1583.714 1180 52000

2.10.6.6 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 1 2.323809 3 10

2.10.6.7 Number of people in all groups (group_degree)

ggplot(filter(ess.tab.alex, group_degree < 100)) +
  geom_histogram(aes(x = group_degree)) +
  annotate(geom = "text", x = 20, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$group_degree))), caption = "group_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
group_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 2.2 3 18

3 Basic descriptive statistics for returning participants

3.1 Section 1: Residence and Neighbourhood

3.1.1 Now, let’s start with your home. What is your address?

home_location <- locations[locations$location_category == 1, ]

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .07
van_aoi[["right"]] <- van_aoi[["right"]] + .07
van_aoi[["top"]] <- van_aoi[["top"]] + .01
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .01

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3) # see https://github.com/r-spatial/sf/issues/336

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- as.data.frame(home_by_municipalites) %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Vancouver 114

3.1.2 If you were asked to draw the boundaries of your neighbourhood, what would they be?

prn <- poly_geom[poly_geom$area_type == "neighborhood", ]

## version ggmap
bm + geom_sf(data = prn, inherit.aes = FALSE, fill = alpha("blue", 0.05), color = alpha("blue", 0.3))

# Min, max, median & mean area of PRN
prn$area_m2 <- st_area(prn$geom)
kable(t(as.matrix(summary(prn$area_m2))),
  caption = "Area (in square meters) of the perceived residential neighborhood",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived residential neighborhood
Min. 1st Qu. Median Mean 3rd Qu. Max.
56113.6 1036930 1980461 3996700 4076921 72732060

NB only 102 valid neighborhoods were collected, as many participants struggled to draw polygons on the map.

3.1.3 How attached are you to your neighbourhood?

# extract and recode
.ngh_att <- veritas_main[veritas_main$neighbourhood_attach != 99, c("interact_id", "neighbourhood_attach")] %>% dplyr::rename(neighbourhood_attach_code = neighbourhood_attach)
.ngh_att$neighbourhood_attach <- factor(ifelse(.ngh_att$neighbourhood_attach_code == 1, "1 [Not attached at all]",
  ifelse(.ngh_att$neighbourhood_attach_code == 6, "6 [Very attached]",
    .ngh_att$neighbourhood_attach_code
  )
))

# histogram of attachment
ggplot(data = .ngh_att) +
  geom_histogram(aes(x = neighbourhood_attach), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "neighbourhood_attach")

.ngh_att_cnt <- .ngh_att %>%
  group_by(neighbourhood_attach) %>%
  dplyr::count() %>%
  arrange(neighbourhood_attach)
kable(.ngh_att_cnt, caption = "Neigbourhood attachment") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Neigbourhood attachment
neighbourhood_attach n
2 3
3 6
4 14
5 43
6 [Very attached] 45

3.1.4 On average, how many hours per day do you spend outside of your home?

# histogram of n hours out
ggplot(data = veritas_main) +
  geom_histogram(aes(x = hours_out))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$hours_out))),
  caption = "Hours/day outside home",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside home
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 4.7 6 24

3.1.5 Of this time spent outside your home, on average how many hours do you spend outside your neighbourhood?

# histogram of n hours out
ggplot(data = veritas_main) +
  geom_histogram(aes(x = hours_out_neighb))

# Min, max, median & mean hours/day out of neighborhood
kable(t(as.matrix(summary(veritas_main$hours_out_neighb))),
  caption = "Hours/day outside neighbourhood",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside neighbourhood
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 2.8 5 24

3.1.6 Are there one or more areas close to where you live that you tend to avoid because you do not feel safe there (for any reason)?

# extract and recode
.unsafe <- veritas_main[c("interact_id", "unsafe_area")] %>% dplyr::rename(unsafe_area_code = unsafe_area)
.unsafe$unsafe_area <- factor(ifelse(.unsafe$unsafe_area_code == 1, "1 [Yes]",
  ifelse(.unsafe$unsafe_area_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .unsafe) +
  geom_histogram(aes(x = unsafe_area), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "unsafe_area")

.unsafe_cnt <- .unsafe %>%
  group_by(unsafe_area) %>%
  dplyr::count() %>%
  arrange(unsafe_area)
kable(.unsafe_cnt, caption = "Unsafe areas") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Unsafe areas
unsafe_area n
1 [Yes] 8
2 [No] 106
# map
unsafe <- poly_geom[poly_geom$area_type == "unsafe area", ]

## version ggmap
bm + geom_sf(data = unsafe, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
unsafe$area_m2 <- st_area(unsafe$geom)
kable(t(as.matrix(summary(unsafe$area_m2))),
  caption = "Area (in square meters) of the perceived unsafe area",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived unsafe area
Min. 1st Qu. Median Mean 3rd Qu. Max.
22353.1 165889.9 394019.2 399435.1 584906.1 907372.4

3.1.7 Do you spend the night somewhere other than your home at least once per week?

# extract and recode
.o_res <- veritas_main[c("interact_id", "other_resid")] %>% dplyr::rename(other_resid_code = other_resid)
.o_res$other_resid <- factor(ifelse(.o_res$other_resid_code == 1, "1 [Yes]",
  ifelse(.o_res$other_resid_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .o_res) +
  geom_histogram(aes(x = other_resid), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_resid")

.o_res_cnt <- .o_res %>%
  group_by(other_resid) %>%
  dplyr::count() %>%
  arrange(other_resid)
kable(.o_res_cnt, caption = "Other residence") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Other residence
other_resid n
1 [Yes] 14
2 [No] 100

3.2 Section 2: Occupation

3.2.1 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 59
2 [No] 55

3.2.2 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.2.3 On average, how many hours per week do you work?

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$working == 1, ]) +
  geom_histogram(aes(x = work_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$work_hours[veritas_main$working == 1]))),
  caption = "Work hours/week",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Work hours/week
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 25 35 36.5 40 240

3.2.4 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 5
2 [No] 109

3.2.5 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.2.6 On average, how many hours per week do you study?

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$studying == 1, ]) +
  geom_histogram(aes(x = study_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$study_hours[veritas_main$studying == 1]))),
  caption = "study hours/week",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
study hours/week
Min. 1st Qu. Median Mean 3rd Qu. Max.
4 4 7 24 35 70

3.3 Section 3: Shopping activities

3.3.1 In Date of Previous Data Collection Wave, you reported shopping at these locations. Do you still visit these places?

shop_lut <- data.frame(
  location_category_code = c(5, 6, 7, 8, 9, 10),
  location_category = factor(c(
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]"
  ))
)
shop_location <- locations[locations$location_category %in% shop_lut$location_category_code, ] %>%
  filter(location_current == 1) %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(shop_lut, by = "location_category_code")

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 271
6 [Public/farmer’s market] 32
7 [Bakery] 51
8 [Specialty food store] 80
9 [Convenience store/Dépanneur] 17
10 [Liquor store/SAQ] 74
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE to build list of all combination iid/shopping categ
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = shop_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.38 2 7
6 [Public/farmer’s market] 0 0.28 0 2
7 [Bakery] 0 0.45 0 4
8 [Specialty food store] 0 0.70 0 4
9 [Convenience store/Dépanneur] 0 0.15 0 3
10 [Liquor store/SAQ] 0 0.65 1 3

3.3.2 Thinking about the places where you shop, are there other supermarkets, farmers markets, bakeries, specialty stores, convenience stores or liquor stores you visit at least once per month?

# extract and recode
.grp_shopping <- veritas_main[c("interact_id", "grp_shopping_new")] %>% dplyr::rename(grp_shopping_new_code = grp_shopping_new)
.grp_shopping$grp_shopping_new <- factor(ifelse(.grp_shopping$grp_shopping_new_code == 1, "1 [Yes]",
  ifelse(.grp_shopping$grp_shopping_new_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .grp_shopping) +
  geom_histogram(aes(x = grp_shopping_new), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "grp_shopping_new")

.grp_shopping_cnt <- .grp_shopping %>%
  group_by(grp_shopping_new) %>%
  dplyr::count() %>%
  arrange(grp_shopping_new)
kable(.grp_shopping_cnt, caption = "New shopping places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
New shopping places
grp_shopping_new n
1 [Yes] 86
2 [No] 28

3.4 Section 4: Services

3.4.1 In Date of Previous Data Collection Wave, you reported using services at these locations. Do you still visit these places?

serv_lut <- data.frame(
  location_category_code = c(11, 12, 13, 14, 15),
  location_category = factor(c(
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 Doctor/healthcare provider]"
  ))
)
serv_location <- locations[locations$location_category %in% serv_lut$location_category_code, ] %>%
  filter(location_current == 1) %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(serv_lut, by = "location_category_code")

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Service locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
11 [Bank] 52
12 [Hair salon/barbershop] 35
13 [Post office] 39
14 [Drugstore] 90
15 Doctor/healthcare provider] 30
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = serv_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
11 [Bank] 0 0.46 0 1
12 [Hair salon/barbershop] 0 0.31 0 1
13 [Post office] 0 0.34 0 1
14 [Drugstore] 0 0.79 1 1
15 Doctor/healthcare provider] 0 0.26 0 2

3.4.2 Thinking about the places where you use services, are there other banks, hair salons, post offices, drugstores, doctors or other healthcare providers you visit at least once per month?

NB: Variable grp_services_new has not been properly recorded in Vancouver wave 2 for returning participants.

# extract and recode
.grp_services <- veritas_main[c("interact_id", "grp_services_new")] %>% dplyr::rename(grp_services_new_code = grp_services_new)
.grp_services$grp_services_new <- factor(ifelse(.grp_services$grp_services_new_code == 1, "1 [Yes]",
  ifelse(.grp_services$grp_services_new_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .grp_services) +
  geom_histogram(aes(x = grp_services_new), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "grp_services_new")

.grp_services_cnt <- .grp_services %>%
  group_by(grp_services_new) %>%
  dplyr::count() %>%
  arrange(grp_services_new)
kable(.grp_services_cnt, caption = "New services places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

3.5 Section 5: Transportation

3.5.1 In Date of Previous Data Collection Wave, you reported accessing these public transit stops from your home. Do you still access these places?

transp_location <- locations[locations$location_category == 16, ] %>% filter(location_current == 1)

bm + geom_sf(data = transp_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.5.2 Are there other public transit stops you access from your home at least once per month?

NB: Variable grp_ptransit_new has not been properly recorded in Vancouver wave 2 for returning participants.

# extract and recode
.grp_ptransit <- veritas_main[c("interact_id", "grp_ptransit_new")] %>% dplyr::rename(grp_ptransit_new_code = grp_ptransit_new)
.grp_ptransit$grp_ptransit_new <- factor(ifelse(.grp_ptransit$grp_ptransit_new_code == 1, "1 [Yes]",
  ifelse(.grp_ptransit$grp_ptransit_new_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .grp_ptransit) +
  geom_histogram(aes(x = grp_ptransit_new), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "grp_ptransit_new")

.grp_ptransit_cnt <- .grp_ptransit %>%
  group_by(grp_ptransit_new) %>%
  dplyr::count() %>%
  arrange(grp_ptransit_new)
kable(.grp_ptransit_cnt, caption = "New transit places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

3.6 Section 6: Leisure activities

3.6.1 In Date of Previous Data Collection Wave, you reported doing leisure activities at these locations. Do you still visit these places?

leisure_lut <- data.frame(
  location_category_code = c(17, 18, 19, 20, 21, 22, 23, 24),
  location_category = factor(c(
    "17 [Leisure-time physical activity]",
    "18 [Park]",
    "19 [Cultural activity]",
    "20 [Volunteering place]",
    "21 [Religious or spiritual activity]",
    "22 [Restaurant, café, bar, etc. ]",
    "23 [Take-out]",
    "24 [Walk]"
  ))
)
leisure_location <- locations[locations$location_category %in% leisure_lut$location_category_code, ] %>%
  dplyr::rename(location_category_code = location_category) %>%
  inner_join(leisure_lut, by = "location_category_code")

# map
bm + geom_sf(data = leisure_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = leisure_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Leisure locations by categories")

.location_category_cnt <- as.data.frame(leisure_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
17 [Leisure-time physical activity] 64
18 [Park] 113
19 [Cultural activity] 8
20 [Volunteering place] 17
21 [Religious or spiritual activity] 3
22 [Restaurant, café, bar, etc. ] 57
23 [Take-out] 16
24 [Walk] 128
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(leisure_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data_frame(
  interact_id = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- data_frame(
    interact_id = as.character(iid),
    location_category = leisure_lut$location_category
  )
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_leisure_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)

.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_leisure_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of leisure locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of leisure locations by participant and category
location_category min mean median max
17 [Leisure-time physical activity] 0 0.56 0 4
18 [Park] 0 0.99 1 5
19 [Cultural activity] 0 0.07 0 1
20 [Volunteering place] 0 0.15 0 2
21 [Religious or spiritual activity] 0 0.03 0 1
22 [Restaurant, café, bar, etc. ] 0 0.50 0 4
23 [Take-out] 0 0.14 0 3
24 [Walk] 0 1.12 1 5

3.6.2 Thinking about the places where you do leisure activities, are there other parks, gyms, movie theaters, concert halls, churchs, temples, restaurants, cafés, bars or any places where you do leisure activities and that you visit at least once per month?

NB: Variable grp_leisure_new has not been properly recorded in Vancouver wave 2 for returning participants.

# extract and recode
.grp_leisure <- veritas_main[c("interact_id", "grp_leisure_new")] %>% dplyr::rename(grp_leisure_new_code = grp_leisure_new)
.grp_leisure$grp_leisure_new <- factor(ifelse(.grp_leisure$grp_leisure_new_code == 1, "1 [Yes]",
  ifelse(.grp_leisure$grp_leisure_new_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .grp_leisure) +
  geom_histogram(aes(x = grp_leisure_new), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "grp_leisure_new")

.grp_leisure_cnt <- .grp_leisure %>%
  group_by(grp_leisure_new) %>%
  dplyr::count() %>%
  arrange(grp_leisure_new)
kable(.grp_leisure_cnt, caption = "New leisure places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

3.7 Section 7: Other places/activities

3.7.1 Here are the other places you reported regularly visiting in Date of Previous Data Collection Wave. Do you still visit these places?

other_location <- locations[locations$location_category == 25, ] %>% filter(location_current == 1)

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.7.2 Are there other places that you go to at least once per month that we have not mentioned? For example: a mall, a daycare, a hardware store, or a community center.

NB: Variable other_new has not been properly recorded in Vancouver wave 2 for returning participants.

# extract and recode
.other <- veritas_main[c("interact_id", "other_new")] %>% dplyr::rename(other_new_code = other_new)
.other$other_new <- factor(ifelse(.other$other_new_code == 1, "1 [Yes]",
  ifelse(.other$other_new_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .other) +
  geom_histogram(aes(x = other_new), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_new")

.other_cnt <- .other %>%
  group_by(other_new) %>%
  dplyr::count() %>%
  arrange(other_new)
kable(.other_cnt, caption = "New other places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

3.8 Section 8: Areas of change

3.8.1 Can you locate areas where you have noticed an improvement of the urban environment?

# extract and recode
.improv <- veritas_main[c("interact_id", "improvement_none")] %>% dplyr::rename(improvement_none_code = improvement_none)
.improv$improvement_none <- factor(ifelse(.improv$improvement_none_code == 1, "1 [TRUE]",
  ifelse(.improv$improvement_none_code == 0, "0 [FALSE]", "N/A")
))

# histogram of answers
ggplot(data = .improv) +
  geom_histogram(aes(x = improvement_none), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "improvement_none")

.improv_cnt <- .improv %>%
  group_by(improvement_none) %>%
  dplyr::count() %>%
  arrange(improvement_none)
kable(.improv_cnt, caption = "No area of improvement") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
No area of improvement
improvement_none n
0 [FALSE] 34
1 [TRUE] 80
# polgon extraction
improv <- poly_geom[poly_geom$area_type == "improvement", ]

# Map
bm + geom_sf(data = improv, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
improv <- improv %>%
  mutate(area_m2 = st_area(.))
kable(t(as.matrix(summary(improv$area_m2))),
  caption = "Area (in square meters) of the perceived improvement areas",
  digits = 1
) %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived improvement areas
Min. 1st Qu. Median Mean 3rd Qu. Max.
395.2 8388.1 50788 254214.4 244756.9 1795921

3.8.2 Can you locate areas where you have noticed a deterioration of the urban environment?

# extract and recode
.deter <- veritas_main[c("interact_id", "deterioration_none")] %>% dplyr::rename(deterioration_none_code = deterioration_none)
.deter$deterioration_none <- factor(ifelse(.deter$deterioration_none_code == 1, "1 [TRUE]",
  ifelse(.deter$deterioration_none_code == 0, "0 [FALSE]", "N/A")
))

# histogram of answers
ggplot(data = .deter) +
  geom_histogram(aes(x = deterioration_none), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "deterioration_none")

.deter_cnt <- .deter %>%
  group_by(deterioration_none) %>%
  dplyr::count() %>%
  arrange(deterioration_none)
kable(.deter_cnt, caption = "No area of deterioration") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
No area of deterioration
deterioration_none n
0 [FALSE] 25
1 [TRUE] 89
# polgon extraction
deter <- poly_geom[poly_geom$area_type == "deterioration", ]

# Map
bm + geom_sf(data = deter, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
deter <- deter %>%
  mutate(area_m2 = st_area(.))
kable(t(as.matrix(summary(deter$area_m2))),
  caption = "Area (in square meters) of the perceived deterioration areas",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived deterioration areas
Min. 1st Qu. Median Mean 3rd Qu. Max.
3376.6 26423.4 149723.4 418591 507291.5 2187502

3.9 Section 9: Social contact

3.9.1 In Date of Previous Data Collection Wave, you reported visiting people at their home. Do you still visit these places?

visiting_location <- locations[locations$location_category == 26, ] %>% filter(location_current == 1)

bm + geom_sf(data = visiting_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

3.9.2 Do you visit anyone else at his or her home at least once per month?

NB: Variable visiting_new has not been properly recorded in Vancouver wave 2 for returning participants.

# extract and recode
.visiting <- veritas_main[c("interact_id", "visiting_new")] %>% dplyr::rename(visiting_code = visiting_new)
.visiting$visiting_new <- factor(ifelse(.visiting$visiting_code == 1, "1 [Yes]",
  ifelse(.visiting$visiting_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .visiting) +
  geom_histogram(aes(x = visiting_new), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting_new")

.visiting_cnt <- .visiting %>%
  group_by(visiting_new) %>%
  dplyr::count() %>%
  arrange(visiting_new)
kable(.visiting_cnt, caption = "Social contact") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

3.9.3 Great, we are almost done completing this questionnaire. You have documented all your activity places on a map, and specified with whom you generally do these activities. These last few questions concern the people you documented earlier.

# compute statistics on groups / participant
# > one needs to account for participants who did not report any group
.gr_iid_cnt <- as.data.frame(group[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_gr_iid <- anti_join(veritas_main[c("interact_id")], .gr_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.gr_iid_cnt <- bind_rows(.gr_iid_cnt, .no_gr_iid)

kable(t(as.matrix(summary(.gr_iid_cnt$n))),
  caption = "Number of groups per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of groups per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 1.9 3 8
# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))),
  caption = "Number of people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 5 5.9 8 28
# histogram
.sc_iid_cnt <- .pl_iid_cnt %>% mutate(soc_type = "people")
.sc_iid_cnt <- .gr_iid_cnt %>%
  mutate(soc_type = "group") %>%
  bind_rows(.sc_iid_cnt)

ggplot(data = .sc_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count), fill = soc_type), position = "dodge") +
  labs(x = "Social network size by element type", fill = element_blank())

3.9.3.1 Among these people, who do you discuss important matters with?

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))),
  caption = "Number of important people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 3.2 4 19
kable(t(as.matrix(summary(.n_people_imp$pct))),
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 33.3 52.3 57.8 87.5 100 6

3.9.3.2 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))),
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3.5 4.2 6 19
kable(t(as.matrix(summary(.n_people_soc$pct))),
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 51.8 83.3 73.7 100 100 6

3.9.3.3 Among these people, who do you meet often with but do not necessarily feel close to?

# extract number of important people / participant
.n_not_close <- not_close %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_not_close <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_not_close, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_not_close = n.y) %>%
  mutate(pct = 100 * n_not_close / n_people)

kable(t(as.matrix(summary(.n_people_not_close$n_not_close))),
  caption = "Number of not so close people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of not so close people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0.8 1 9
kable(t(as.matrix(summary(.n_people_not_close$pct))),
  caption = "% of not so close people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of not so close people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 0 0 11.1 14.9 100 6

3.9.3.4 Among these people, who knows whom?

# extract number of who knows who relationships
.n_relat <- relationship %>%
  filter(relationship_type == 1) %>%
  dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_relat <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_relat, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_relat = n.y) %>%
  mutate(pct = 100 * n_relat * 2 / (n_people * (n_people - 1))) # potential number of relationships = N x (N -1) / 2

kable(t(as.matrix(summary(.n_people_relat$n_relat))),
  caption = "Number of relationships « who knows who » per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of relationships « who knows who » per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 6 11.7 14 199
kable(t(as.matrix(summary(.n_people_relat$pct))),
  caption = "% of relationships « who knows who » per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of relationships « who knows who » per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 36.7 60.7 61.9 100 100 17

3.10 Derived metrics

3.10.1 Existence of improvement and deterioration areas by participant

Combination of improvement and/or deterioration areas per participant

# cross tab of improvement vs deteriation areas
.improv <- improv[c("interact_id")] %>%
  mutate(improv = "Improvement")
.deter <- deter[c("interact_id")] %>%
  mutate(deter = "Deterioration")
.ct_impr_deter <- veritas_main[c("interact_id")] %>%
  transmute(interact_id = as.character(interact_id)) %>%
  left_join(.improv, by = "interact_id") %>%
  left_join(.deter, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), "N/A"))

kable(table(.ct_impr_deter$improv, .ct_impr_deter$deter), caption = "Improvement vs. deterioration") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left", row_label_position = "r") %>%
  column_spec(1, bold = T)
Improvement vs. deterioration
Deterioration N/A
Improvement 6 24
N/A 15 69

3.10.2 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

# code  en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99    Other

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1 & location_current == 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car (driver)" = sum(location_tmode_1),
    "By car (passenger)" = sum(location_tmode_2),
    "By taxi/Uber" = sum(location_tmode_3),
    "On foot" = sum(location_tmode_4),
    "By bike" = sum(location_tmode_5),
    "By bus" = sum(location_tmode_6),
    "By train" = sum(location_tmode_7),
    "Other" = sum(location_tmode_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car (driver) By car (passenger) By taxi/Uber On foot By bike By bus By train Other
2 [Other residence] 15 6 5 0 0 1 0 0 8
3 [Work] 95 28 6 1 20 27 11 7 25
4 [School/College/University] 6 1 0 0 3 1 0 0 1
5 [Supermarket] 271 99 18 0 162 34 10 0 0
6 [Public/farmer’s market] 32 7 5 0 20 8 0 0 0
7 [Bakery] 51 14 3 0 36 9 1 0 0
8 [Specialty food store] 80 25 0 0 56 21 2 0 0
9 [Convenience store/Dépanneur] 17 6 0 0 13 0 0 0 0
10 [Liquor store/SAQ] 74 26 2 0 50 12 1 0 1
11 [Bank] 52 11 1 0 39 8 0 0 0
12 [Hair salon/barbershop] 35 14 3 0 19 5 3 0 1
13 [Post office] 39 6 0 0 35 5 0 0 0
14 [Drugstore] 90 24 4 0 67 13 3 0 0
15 [Doctor/healthcare provider] 30 9 2 0 15 3 5 1 1
16 [Public transit stop] 41 0 0 0 36 2 4 0 0
17 [Leisure-time physical activity] 64 22 8 0 26 12 0 0 4
18 [Park] 113 14 7 0 77 25 3 0 1
19 [Cultural activity] 8 6 3 0 0 1 0 0 1
20 [Volunteering place] 17 4 1 0 10 1 1 0 2
21 [Religious/spiritual activity] 3 1 1 0 1 0 0 0 0
22 [Restaurant, café, bar, etc.] 57 19 7 1 28 12 2 0 0
23 [Take-out] 16 11 1 0 6 1 0 0 0
24 [Walk] 128 18 9 0 94 20 1 0 0
25 [Other place] 84 28 1 1 46 14 7 0 2
26 [Social contact residence] 38 20 5 0 11 11 3 0 1
# graph
.tm1 <- .tm %>%
  filter(location_tmode_1 == 1) %>%
  mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
  filter(location_tmode_2 == 1) %>%
  mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
  filter(location_tmode_3 == 1) %>%
  mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
  filter(location_tmode_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode_6 == 1) %>%
  mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
  filter(location_tmode_7 == 1) %>%
  mutate(tm = "[7] By train")
.tm99 <- .tm %>%
  filter(location_tmode_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
  bind_rows(.tm3) %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm7) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

3.10.3 Visiting places alone

Based on the answers to the question Do you usually go to this place alone or with other people?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.alone <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1 & location_current == 1) %>%
  left_join(loc_labels) %>%
  mutate(location_alone_recode = case_when(
    location_alone2 == 1 ~ 1,
    location_alone2 == 2 ~ 0
  ))

.alone_grouped <- .alone %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited alone" = sum(location_alone_recode),
    "Visited alone (%)" = round(sum(location_alone_recode) * 100.0 / n(), digits = 1)
  )

kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places alone
description N Visited alone Visited alone (%)
2 [Other residence] 15 0 0.0
3 [Work] 95 38 40.0
4 [School/College/University] 6 3 50.0
5 [Supermarket] 271 202 74.5
6 [Public/farmer’s market] 32 17 53.1
7 [Bakery] 51 38 74.5
8 [Specialty food store] 80 64 80.0
9 [Convenience store/Dépanneur] 17 15 88.2
10 [Liquor store/SAQ] 74 62 83.8
11 [Bank] 52 49 94.2
12 [Hair salon/barbershop] 35 31 88.6
13 [Post office] 39 34 87.2
14 [Drugstore] 90 72 80.0
15 [Doctor/healthcare provider] 30 26 86.7
16 [Public transit stop] 41 40 97.6
17 [Leisure-time physical activity] 64 26 40.6
18 [Park] 113 52 46.0
19 [Cultural activity] 8 2 25.0
20 [Volunteering place] 17 7 41.2
21 [Religious/spiritual activity] 3 2 66.7
22 [Restaurant, café, bar, etc.] 57 17 29.8
23 [Take-out] 16 13 81.2
24 [Walk] 128 60 46.9
25 [Other place] 84 56 66.7
26 [Social contact residence] 38 NA NA
# histogram of answers
ggplot(data = .alone) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_alone2)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("N/A", "Alone", "With someone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited alone", x = element_blank()) +
  coord_flip()

3.10.4 Visit frequency

Based on the answers to the question How often do you go there?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1 & location_current == 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit),
    max = max(location_freq_visit),
    mean = mean(location_freq_visit),
    median = median(location_freq_visit),
    sd = sd(location_freq_visit)
  )

kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
2 [Other residence] 15 5 364 89.266667 12 135.896214
3 [Work] 95 0 5200 234.063158 208 532.336496
4 [School/College/University] 6 1 364 251.500000 364 175.028855
5 [Supermarket] 271 1 260 50.822878 36 52.035013
6 [Public/farmer’s market] 32 1 104 30.343750 24 22.176141
7 [Bakery] 51 2 240 30.117647 24 42.757758
8 [Specialty food store] 80 1 156 38.462500 24 34.587604
9 [Convenience store/Dépanneur] 17 8 260 52.941176 48 64.614695
10 [Liquor store/SAQ] 74 3 156 27.148649 12 26.640006
11 [Bank] 52 0 104 17.500000 12 16.589035
12 [Hair salon/barbershop] 35 2 24 9.771429 12 5.636466
13 [Post office] 39 2 120 35.538461 24 34.217452
14 [Drugstore] 90 2 208 39.600000 24 37.380762
15 [Doctor/healthcare provider] 30 1 104 12.133333 12 18.969546
16 [Public transit stop] 41 12 260 51.317073 24 64.788286
17 [Leisure-time physical activity] 64 12 364 109.250000 52 106.136480
18 [Park] 113 0 364 77.610619 36 88.053498
19 [Cultural activity] 8 12 260 76.500000 12 108.104447
20 [Volunteering place] 17 12 365 148.764706 104 129.303678
21 [Religious/spiritual activity] 3 12 364 142.666667 52 192.720869
22 [Restaurant, café, bar, etc.] 57 1 208 25.157895 12 32.086262
23 [Take-out] 16 12 52 22.000000 18 13.145341
24 [Walk] 128 1 1456 110.085938 52 173.575282
25 [Other place] 84 2 260 29.500000 24 33.098429
26 [Social contact residence] 38 2 365 47.684210 24 66.682428
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit)) +
  scale_y_continuous(limits = c(0, 365)) +
  labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

3.10.5 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 2 AND status = 'return'

3.10.6 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 2 AND status = 'return'

3.10.6.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 5 5.587719 8 27

3.10.6.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
1 3.05 6.380952 8.296198 11.85268 31.64865 8

3.10.6.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3.5 4.18421 6 19

3.10.6.4 Weekly face-to-face interactions among people with whom the participant like to socialize (socialize_meet)

ggplot(filter(ess.tab.alex, socialize_meet < 100)) +
  geom_histogram(aes(x = socialize_meet)) +
  annotate(geom = "text", x = 75, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_meet))), caption = "socialize_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_meet
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 117 364 1253.711 658.25 52046

3.10.6.5 Weekly ICT interactions among people with whom the participant like to socialize (socialize_chat)

ggplot(filter(ess.tab.alex, socialize_chat < 100)) +
  geom_histogram(aes(x = socialize_chat)) +
  annotate(geom = "text", x = 55, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_chat))), caption = "socialize_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_chat
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 156 416 732.2456 871 8008

3.10.6.6 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 3.22807 4 19

3.10.6.7 Number of people in all groups (group_degree)

ggplot(filter(ess.tab.alex, group_degree < 100)) +
  geom_histogram(aes(x = group_degree)) +
  annotate(geom = "text", x = 20, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$group_degree))), caption = "group_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
group_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 4 6.008772 9 35