Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:
The diagram below illustrates the various entities collected throught the VERITAS questionnaire:
home_location <- locations[locations$location_category == 1, ]
## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .02
van_aoi[["right"]] <- van_aoi[["right"]] + .01
van_aoi[["top"]] <- van_aoi[["top"]] + .018
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .018
bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3) # see https://github.com/r-spatial/sf/issues/336

NB: Home locations have been randomly shifted from their original position to protect privacy.
# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"]) %>%
as.data.frame()
home_by_mun_cnt <- home_by_municipalites %>%
group_by(NAME) %>%
dplyr::count() %>%
arrange(desc(n), NAME)
home_by_mun_cnt$geom <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| NAME | n |
|---|---|
| Vancouver | 208 |
| Burnaby | 1 |
| Greater Vancouver | 1 |
# N of addresses by date of move
year_of_move <- veritas_main[c("interact_id", "home_move_date")]
year_of_move$home_move_date <- year(ymd(year_of_move$home_move_date))
ggplot(data = year_of_move) +
geom_histogram(aes(x = home_move_date))

# recode date of move
year_of_move$home_move_date_recode <- as.character(year_of_move$home_move_date)
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2005] <- "2005 - 2001"
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2000] <- "2000 - 1991"
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 1990] <- paste("1990 -", min(year_of_move$home_move_date))
year_of_move_cnt <- year_of_move %>%
group_by(home_move_date_recode) %>%
dplyr::count() %>%
arrange(desc(home_move_date_recode))
kable(year_of_move_cnt, caption = "Year of move to current address") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| home_move_date_recode | n |
|---|---|
| 2018 | 17 |
| 2017 | 8 |
| 2016 | 17 |
| 2015 | 7 |
| 2014 | 7 |
| 2013 | 8 |
| 2012 | 6 |
| 2011 | 6 |
| 2010 | 7 |
| 2009 | 7 |
| 2008 | 3 |
| 2007 | 7 |
| 2006 | 3 |
| 2005 - 2001 | 35 |
| 2000 - 1991 | 46 |
| 1990 - 1950 | 26 |
prn <- poly_geom[poly_geom$area_type == "neighborhood", ]
## version ggmap
bm + geom_sf(data = prn, inherit.aes = FALSE, fill = alpha("blue", 0.05), color = alpha("blue", 0.3))

# Min, max, median & mean area of PRN
prn$area_m2 <- st_area(prn$geom)
kable(t(as.matrix(summary(prn$area_m2))),
caption = "Area (in square meters) of the perceived residential neighborhood",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|
| 738.9 | 780272.9 | 1846898 | 3372615 | 4217316 | 26812746 |
NB only 179 valid neighborhoods were collected, as many participants struggled to draw polygons on the map.
# extract and recode
.ngh_att <- veritas_main[veritas_main$neighbourhood_attach != 99, c("interact_id", "neighbourhood_attach")] %>% dplyr::rename(neighbourhood_attach_code = neighbourhood_attach)
.ngh_att$neighbourhood_attach <- factor(ifelse(.ngh_att$neighbourhood_attach_code == 1, "1 [Not attached at all]",
ifelse(.ngh_att$neighbourhood_attach_code == 6, "6 [Very attached]",
.ngh_att$neighbourhood_attach_code
)
))
# histogram of attachment
ggplot(data = .ngh_att) +
geom_histogram(aes(x = neighbourhood_attach), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "neighbourhood_attach")

.ngh_att_cnt <- .ngh_att %>%
group_by(neighbourhood_attach) %>%
dplyr::count() %>%
arrange(neighbourhood_attach)
kable(.ngh_att_cnt, caption = "Neigbourhood attachment") %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| neighbourhood_attach | n |
|---|---|
| 1 [Not attached at all] | 1 |
| 2 | 6 |
| 3 | 16 |
| 4 | 31 |
| 5 | 88 |
| 6 [Very attached] | 65 |
# histogram of n hours out
ggplot(data = veritas_main) +
geom_histogram(aes(x = hours_out))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$hours_out))),
caption = "Hours/day outside home",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|
| 1 | 4 | 6 | 7 | 10 | 16 |
# histogram of n hours out
ggplot(data = veritas_main) +
geom_histogram(aes(x = hours_out_neighb))

# Min, max, median & mean hours/day out of neighborhood
kable(t(as.matrix(summary(veritas_main$hours_out_neighb))),
caption = "Hours/day outside neighbourhood",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|
| 0 | 2 | 3 | 4.5 | 8 | 15 |
# extract and recode
.unsafe <- veritas_main[c("interact_id", "unsafe_area")] %>% dplyr::rename(unsafe_area_code = unsafe_area)
.unsafe$unsafe_area <- factor(ifelse(.unsafe$unsafe_area_code == 1, "1 [Yes]",
ifelse(.unsafe$unsafe_area_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .unsafe) +
geom_histogram(aes(x = unsafe_area), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "unsafe_area")

.unsafe_cnt <- .unsafe %>%
group_by(unsafe_area) %>%
dplyr::count() %>%
arrange(unsafe_area)
kable(.unsafe_cnt, caption = "unsafe_area") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| unsafe_area | n |
|---|---|
| 1 [Yes] | 9 |
| 2 [No] | 201 |
# map
unsafe <- poly_geom[poly_geom$area_type == "unsafe area", ]
## version ggmap
bm + geom_sf(data = unsafe, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
unsafe$area_m2 <- st_area(unsafe$geom)
kable(t(as.matrix(summary(unsafe$area_m2))),
caption = "Area (in square meters) of the perceived unsafe area",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|
| 8831.8 | 113728.2 | 204269.7 | 258345.4 | 318770.9 | 831151.6 |
# extract and recode
.o_res <- veritas_main[c("interact_id", "other_resid")] %>% dplyr::rename(other_resid_code = other_resid)
.o_res$other_resid <- factor(ifelse(.o_res$other_resid_code == 1, "1 [Yes]",
ifelse(.o_res$other_resid_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .o_res) +
geom_histogram(aes(x = other_resid), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "other_resid")

.o_res_cnt <- .o_res %>%
group_by(other_resid) %>%
dplyr::count() %>%
arrange(other_resid)
kable(.o_res_cnt, caption = "Other residence") %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| other_resid | n |
|---|---|
| 1 [Yes] | 11 |
| 2 [No] | 199 |
# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
ifelse(.work$working_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .work) +
geom_histogram(aes(x = working), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "working")

.work_cnt <- .work %>%
group_by(working) %>%
dplyr::count() %>%
arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| working | n |
|---|---|
| 1 [Yes] | 128 |
| 2 [No] | 82 |
work_location <- locations[locations$location_category == 3, ]
bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$working == 1, ]) +
geom_histogram(aes(x = work_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$work_hours[veritas_main$working == 1]))),
caption = "Work hours/week",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|
| 2 | 24.8 | 37 | 32.7 | 40 | 84 |
# extract and recode
.work_pa <- veritas_main[veritas_main$working == 1, c("interact_id", "work_pa")] %>% dplyr::rename(work_pa_code = work_pa)
.work_pa$work_pa <- factor(ifelse(.work_pa$work_pa_code == 1, "1 [Mainly sitting with slight arm movements]",
ifelse(.work_pa$work_pa_code == 2, "2 [Sitting and standing with some walking]",
ifelse(.work_pa$work_pa_code == 3, "3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)]",
ifelse(.work_pa$work_pa_code == 4, "4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)]", "N/A")
)
)
))
# histogram of answers
ggplot(data = .work_pa) +
geom_histogram(aes(x = work_pa), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Physical activity at work")

.work_pa_cnt <- .work_pa %>%
group_by(work_pa) %>%
dplyr::count() %>%
arrange(work_pa)
kable(.work_pa_cnt, caption = "Physical activity at work") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| work_pa | n |
|---|---|
| 1 [Mainly sitting with slight arm movements] | 55 |
| 2 [Sitting and standing with some walking] | 57 |
| 3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)] | 15 |
| 4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)] | 1 |
# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .study) +
geom_histogram(aes(x = studying), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Studying")

.study_cnt <- .study %>%
group_by(studying) %>%
dplyr::count() %>%
arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| studying | n |
|---|---|
| 1 [Yes] | 17 |
| 2 [No] | 193 |
study_location <- locations[locations$location_category == 4, ]
bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$studying == 1, ]) +
geom_histogram(aes(x = study_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$study_hours[veritas_main$studying == 1]))),
caption = "study hours/week",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|
| 2 | 12 | 20 | 23.6 | 30 | 60 |
The following questions are used to generate the locations grouped into this section:
shop_location <- locations[locations$location_category %in% c(5, 6, 7, 8, 9, 10), ] %>% dplyr::rename(location_category_code = location_category)
shop_location$location_category <- factor(ifelse(shop_location$location_category_code == 5, " 5 [Supermarket]",
ifelse(shop_location$location_category_code == 6, " 6 [Public/farmer’s market]",
ifelse(shop_location$location_category_code == 7, " 7 [Bakery]",
ifelse(shop_location$location_category_code == 8, " 8 [Specialty food store]",
ifelse(shop_location$location_category_code == 9, " 9 [Convenience store/Dépanneur]", "10 [Liquor store/SAQ]")
)
)
)
))
# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
scale_color_brewer(palette = "Accent") +
theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
geom_histogram(aes(x = location_category), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
group_by(location_category) %>%
dplyr::count() %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| location_category | n |
|---|---|
| 5 [Supermarket] | 549 |
| 6 [Public/farmer’s market] | 109 |
| 7 [Bakery] | 117 |
| 8 [Specialty food store] | 193 |
| 9 [Convenience store/Dépanneur] | 50 |
| 10 [Liquor store/SAQ] | 185 |
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
group_by(interact_id, location_category) %>%
dplyr::count()
# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
interact_iid = character(),
location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
.dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
.dmy$interact_id <- as.character(iid)
.dummy <- rbind(.dummy, .dmy)
}
# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)
.location_category_cnt <- .loc_iid_category_cnt %>%
group_by(location_category) %>%
dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| location_category | min | mean | median | max |
|---|---|---|---|---|
| 5 [Supermarket] | 0 | 2.61 | 3 | 5 |
| 6 [Public/farmer’s market] | 0 | 0.52 | 0 | 5 |
| 7 [Bakery] | 0 | 0.56 | 0 | 4 |
| 8 [Specialty food store] | 0 | 0.92 | 0 | 5 |
| 9 [Convenience store/Dépanneur] | 0 | 0.24 | 0 | 4 |
| 10 [Liquor store/SAQ] | 0 | 0.88 | 1 | 5 |
The following questions are used to generate the locations grouped into this section:
serv_location <- locations[locations$location_category %in% c(11, 12, 13, 14, 15), ] %>% dplyr::rename(location_category_code = location_category)
serv_location$location_category <- factor(ifelse(serv_location$location_category_code == 11, "11 [Bank]",
ifelse(serv_location$location_category_code == 12, "12 [Hair salon/barbershop]",
ifelse(serv_location$location_category_code == 13, "13 [Post office]",
ifelse(serv_location$location_category_code == 14, "14 [Drugstore]", "15 Doctor/healthcare provider]")
)
)
))
# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
scale_color_brewer(palette = "Accent") +
theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
geom_histogram(aes(x = location_category), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
group_by(location_category) %>%
dplyr::count() %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| location_category | n |
|---|---|
| 11 [Bank] | 159 |
| 12 [Hair salon/barbershop] | 149 |
| 13 [Post office] | 139 |
| 14 [Drugstore] | 192 |
| 15 Doctor/healthcare provider] | 211 |
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
group_by(interact_id, location_category) %>%
dplyr::count()
# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
interact_iid = character(),
location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
.dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
.dmy$interact_id <- as.character(iid)
.dummy <- rbind(.dummy, .dmy)
}
# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)
.location_category_cnt <- .loc_iid_category_cnt %>%
group_by(location_category) %>%
dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| location_category | min | mean | median | max |
|---|---|---|---|---|
| 11 [Bank] | 0 | 0.76 | 1 | 1 |
| 12 [Hair salon/barbershop] | 0 | 0.71 | 1 | 1 |
| 13 [Post office] | 0 | 0.66 | 1 | 1 |
| 14 [Drugstore] | 0 | 0.91 | 1 | 1 |
| 15 Doctor/healthcare provider] | 0 | 1.00 | 1 | 4 |
# extract and recode
.transp <- veritas_main[c("interact_id", "public_transit")] %>% dplyr::rename(public_transit_code = public_transit)
.transp$public_transit <- factor(ifelse(.transp$public_transit_code == 1, "1 [Yes]",
ifelse(.transp$public_transit_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .transp) +
geom_histogram(aes(x = public_transit), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "public_transit")

.transp_cnt <- .transp %>%
group_by(public_transit) %>%
dplyr::count() %>%
arrange(public_transit)
kable(.transp_cnt, caption = "Use public transit") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| public_transit | n |
|---|---|
| 1 [Yes] | 179 |
| 2 [No] | 31 |
transp_location <- locations[locations$location_category == 16, ]
bm + geom_sf(data = transp_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

The following questions are used to generate the locations grouped into this section:
leisure_location <- locations[locations$location_category %in% c(17, 18, 19, 20, 21, 22, 23, 24), ] %>% dplyr::rename(location_category_code = location_category)
leisure_location$location_category <- factor(ifelse(leisure_location$location_category_code == 17, "17 [Leisure-time physical activity]",
ifelse(leisure_location$location_category_code == 18, "18 [Park]",
ifelse(leisure_location$location_category_code == 19, "19 [Cultural activity]",
ifelse(leisure_location$location_category_code == 20, "20 [Volunteering place]",
ifelse(leisure_location$location_category_code == 21, "21 [Religious or spiritual activity]",
ifelse(leisure_location$location_category_code == 22, "22 [Restaurant, café, bar, etc. ]",
ifelse(leisure_location$location_category_code == 23, "23 [Take-out]", "24 [Walk]")
)
)
)
)
)
))
# map
bm + geom_sf(data = leisure_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
scale_color_brewer(palette = "Accent") +
theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = leisure_location) +
geom_histogram(aes(x = location_category), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Leisure locations by categories")

.location_category_cnt <- as.data.frame(leisure_location[c("location_category")]) %>%
group_by(location_category) %>%
dplyr::count() %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| location_category | n |
|---|---|
| 17 [Leisure-time physical activity] | 301 |
| 18 [Park] | 348 |
| 19 [Cultural activity] | 169 |
| 20 [Volunteering place] | 138 |
| 21 [Religious or spiritual activity] | 46 |
| 22 [Restaurant, café, bar, etc. ] | 440 |
| 23 [Take-out] | 132 |
| 24 [Walk] | 364 |
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(leisure_location[c("interact_id", "location_category")]) %>%
group_by(interact_id, location_category) %>%
dplyr::count()
# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
interact_iid = character(),
location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
.dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
.dmy$interact_id <- as.character(iid)
.dummy <- rbind(.dummy, .dmy)
}
# (cont'd) find iid/categ combination without match in veritas locations
.no_leisure_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_leisure_iid)
.location_category_cnt <- .loc_iid_category_cnt %>%
group_by(location_category) %>%
dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Number of leisure locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| location_category | min | mean | median | max |
|---|---|---|---|---|
| 17 [Leisure-time physical activity] | 0 | 1.43 | 1 | 5 |
| 18 [Park] | 0 | 1.66 | 1 | 5 |
| 19 [Cultural activity] | 0 | 0.80 | 0 | 5 |
| 20 [Volunteering place] | 0 | 0.66 | 0 | 5 |
| 21 [Religious or spiritual activity] | 0 | 0.22 | 0 | 3 |
| 22 [Restaurant, café, bar, etc. ] | 0 | 2.10 | 2 | 5 |
| 23 [Take-out] | 0 | 0.63 | 0 | 5 |
| 24 [Walk] | 0 | 1.73 | 1 | 5 |
# extract and recode
.other <- veritas_main[c("interact_id", "other")] %>% dplyr::rename(other_code = other)
.other$other <- factor(ifelse(.other$other_code == 1, "1 [Yes]",
ifelse(.other$other_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .other) +
geom_histogram(aes(x = other), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "other")

.other_cnt <- .other %>%
group_by(other) %>%
dplyr::count() %>%
arrange(other)
kable(.other_cnt, caption = "Other places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| other | n |
|---|---|
| 1 [Yes] | 85 |
| 2 [No] | 125 |
other_location <- locations[locations$location_category == 25, ]
bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

Based on the answers to the question Usually, how do you go there? (Check all that apply.).
# code en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99 Other
loc_labels <- data.frame(location_category = c(2:26), description = c(
" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"
))
# extract and summary stats
.tm <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels)
.tm_grouped <- .tm %>%
group_by(description) %>%
dplyr::summarise(
N = n(), "By car (driver)" = sum(location_tmode_1),
"By car (passenger)" = sum(location_tmode_2),
"By taxi/Uber" = sum(location_tmode_3),
"On foot" = sum(location_tmode_4),
"By bike" = sum(location_tmode_5),
"By bus" = sum(location_tmode_6),
"By train" = sum(location_tmode_7),
"Other" = sum(location_tmode_99)
)
kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| description | N | By car (driver) | By car (passenger) | By taxi/Uber | On foot | By bike | By bus | By train | Other |
|---|---|---|---|---|---|---|---|---|---|
| 2 [Other residence] | 11 | 3 | 5 | 1 | 2 | 3 | 5 | 1 | 1 |
| 3 [Work] | 176 | 71 | 4 | 1 | 45 | 53 | 41 | 13 | 17 |
| 4 [School/College/University] | 24 | 5 | 0 | 0 | 8 | 2 | 15 | 3 | 4 |
| 5 [Supermarket] | 549 | 231 | 50 | 0 | 298 | 87 | 39 | 3 | 4 |
| 6 [Public/farmer’s market] | 109 | 34 | 8 | 0 | 58 | 35 | 9 | 0 | 0 |
| 7 [Bakery] | 117 | 27 | 3 | 0 | 85 | 18 | 5 | 0 | 1 |
| 8 [Specialty food store] | 193 | 46 | 8 | 0 | 130 | 37 | 17 | 2 | 1 |
| 9 [Convenience store/Dépanneur] | 50 | 13 | 1 | 0 | 40 | 1 | 1 | 0 | 0 |
| 10 [Liquor store/SAQ] | 185 | 80 | 13 | 0 | 98 | 27 | 6 | 0 | 0 |
| 11 [Bank] | 159 | 34 | 3 | 0 | 124 | 21 | 9 | 2 | 2 |
| 12 [Hair salon/barbershop] | 149 | 59 | 6 | 0 | 76 | 23 | 26 | 2 | 3 |
| 13 [Post office] | 139 | 23 | 0 | 0 | 121 | 21 | 3 | 0 | 0 |
| 14 [Drugstore] | 192 | 59 | 6 | 0 | 145 | 26 | 8 | 0 | 0 |
| 15 [Doctor/healthcare provider] | 211 | 88 | 7 | 0 | 93 | 34 | 49 | 7 | 2 |
| 16 [Public transit stop] | 422 | 3 | 3 | 0 | 397 | 5 | 0 | 0 | 26 |
| 17 [Leisure-time physical activity] | 301 | 109 | 22 | 0 | 141 | 87 | 17 | 6 | 10 |
| 18 [Park] | 348 | 75 | 31 | 0 | 224 | 97 | 18 | 1 | 6 |
| 19 [Cultural activity] | 169 | 55 | 34 | 2 | 50 | 26 | 56 | 4 | 1 |
| 20 [Volunteering place] | 138 | 62 | 7 | 0 | 63 | 21 | 21 | 3 | 5 |
| 21 [Religious/spiritual activity] | 46 | 22 | 4 | 0 | 18 | 8 | 9 | 1 | 0 |
| 22 [Restaurant, café, bar, etc.] | 440 | 129 | 39 | 2 | 260 | 53 | 61 | 3 | 10 |
| 23 [Take-out] | 132 | 42 | 8 | 0 | 74 | 6 | 9 | 1 | 15 |
| 24 [Walk] | 364 | 51 | 15 | 0 | 313 | 39 | 10 | 0 | 5 |
| 25 [Other place] | 146 | 67 | 13 | 0 | 63 | 30 | 28 | 9 | 7 |
| 26 [Social contact residence] | 158 | 79 | 26 | 0 | 50 | 28 | 26 | 6 | 1 |
# graph
.tm1 <- .tm %>%
filter(location_tmode_1 == 1) %>%
mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
filter(location_tmode_2 == 1) %>%
mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
filter(location_tmode_3 == 1) %>%
mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
filter(location_tmode_4 == 1) %>%
mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
filter(location_tmode_5 == 1) %>%
mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
filter(location_tmode_6 == 1) %>%
mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
filter(location_tmode_7 == 1) %>%
mutate(tm = "[7] By train")
.tm99 <- .tm %>%
filter(location_tmode_99 == 1) %>%
mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
bind_rows(.tm3) %>%
bind_rows(.tm4) %>%
bind_rows(.tm5) %>%
bind_rows(.tm6) %>%
bind_rows(.tm7) %>%
bind_rows(.tm99)
# histogram of answers
ggplot(data = .tm) +
geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
scale_fill_brewer(palette = "Set3", name = "Transport modes") +
scale_y_continuous(labels = percent) +
labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
coord_flip() +
theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
guides(fill = guide_legend(nrow = 3))

Based on the answers to the question Do you usually go to this place alone or with other people?.
loc_labels <- data.frame(location_category = c(2:26), description = c(
" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"
))
# extract and summary stats
.alone <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels) %>%
mutate(location_alone_recode = case_when(
location_alone == 1 ~ 1,
location_alone == 2 ~ 0
))
.alone_grouped <- .alone %>%
group_by(description) %>%
dplyr::summarise(
N = n(), "Visited alone" = sum(location_alone_recode),
"Visited alone (%)" = round(sum(location_alone_recode) * 100.0 / n(), digits = 1)
)
kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| description | N | Visited alone | Visited alone (%) |
|---|---|---|---|
| 2 [Other residence] | 11 | NA | NA |
| 3 [Work] | 176 | 72 | 40.9 |
| 4 [School/College/University] | 24 | 20 | 83.3 |
| 5 [Supermarket] | 549 | 425 | 77.4 |
| 6 [Public/farmer’s market] | 109 | 62 | 56.9 |
| 7 [Bakery] | 117 | 89 | 76.1 |
| 8 [Specialty food store] | 193 | 156 | 80.8 |
| 9 [Convenience store/Dépanneur] | 50 | 45 | 90.0 |
| 10 [Liquor store/SAQ] | 185 | 144 | 77.8 |
| 11 [Bank] | 159 | 149 | 93.7 |
| 12 [Hair salon/barbershop] | 149 | 140 | 94.0 |
| 13 [Post office] | 139 | 132 | 95.0 |
| 14 [Drugstore] | 192 | 170 | 88.5 |
| 15 [Doctor/healthcare provider] | 211 | 194 | 91.9 |
| 16 [Public transit stop] | 422 | 352 | 83.4 |
| 17 [Leisure-time physical activity] | 301 | 150 | 49.8 |
| 18 [Park] | 348 | 148 | 42.5 |
| 19 [Cultural activity] | 169 | 44 | 26.0 |
| 20 [Volunteering place] | 138 | 57 | 41.3 |
| 21 [Religious/spiritual activity] | 46 | 15 | 32.6 |
| 22 [Restaurant, café, bar, etc.] | 440 | 96 | 21.8 |
| 23 [Take-out] | 132 | 65 | 49.2 |
| 24 [Walk] | 364 | 189 | 51.9 |
| 25 [Other place] | 146 | 82 | 56.2 |
| 26 [Social contact residence] | 158 | 70 | 44.3 |
# histogram of answers
ggplot(data = .alone) +
geom_bar(aes(x = fct_rev(description), fill = factor(location_alone)), position = "fill") +
scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("N/A", "Alone", "With someone")) +
scale_y_continuous(labels = percent) +
labs(y = "Proportion of places visited alone", x = element_blank()) +
coord_flip()

Based on the answers to the question How often do you go there?.
loc_labels <- data.frame(location_category = c(2:26), description = c(
" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"
))
# extract and summary stats
.freq <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels)
.freq_grouped <- .freq %>%
group_by(description) %>%
dplyr::summarise(
N = n(), min = min(location_freq_visit),
max = max(location_freq_visit),
mean = mean(location_freq_visit),
median = median(location_freq_visit),
sd = sd(location_freq_visit)
)
kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
| description | N | min | max | mean | median | sd |
|---|---|---|---|---|---|---|
| 2 [Other residence] | 11 | 2 | 208 | 75.454545 | 52 | 62.110166 |
| 3 [Work] | 176 | 1 | 520 | 177.369318 | 208 | 115.107031 |
| 4 [School/College/University] | 24 | 4 | 364 | 154.000000 | 130 | 102.007672 |
| 5 [Supermarket] | 549 | 2 | 1040 | 62.704918 | 52 | 73.032792 |
| 6 [Public/farmer’s market] | 109 | 1 | 364 | 44.963303 | 24 | 62.284432 |
| 7 [Bakery] | 117 | 3 | 260 | 37.179487 | 24 | 41.135314 |
| 8 [Specialty food store] | 193 | 3 | 1040 | 50.668394 | 24 | 86.380842 |
| 9 [Convenience store/Dépanneur] | 50 | 1 | 520 | 59.740000 | 24 | 83.901986 |
| 10 [Liquor store/SAQ] | 185 | 1 | 208 | 31.843243 | 24 | 34.100431 |
| 11 [Bank] | 159 | 1 | 260 | 30.666667 | 24 | 36.516282 |
| 12 [Hair salon/barbershop] | 149 | 1 | 36 | 6.953020 | 6 | 5.344024 |
| 13 [Post office] | 139 | 2 | 104 | 14.589928 | 8 | 19.129541 |
| 14 [Drugstore] | 192 | 1 | 208 | 42.151042 | 24 | 44.589613 |
| 15 [Doctor/healthcare provider] | 211 | 1 | 104 | 5.535545 | 3 | 8.809757 |
| 16 [Public transit stop] | 422 | 1 | 364 | 61.715640 | 24 | 83.875231 |
| 17 [Leisure-time physical activity] | 301 | 2 | 364 | 102.458472 | 104 | 86.560590 |
| 18 [Park] | 348 | 1 | 1560 | 74.514368 | 33 | 119.288874 |
| 19 [Cultural activity] | 169 | 1 | 208 | 17.698225 | 6 | 31.598637 |
| 20 [Volunteering place] | 138 | 1 | 5200 | 116.913044 | 52 | 445.811938 |
| 21 [Religious/spiritual activity] | 46 | 4 | 364 | 76.260870 | 52 | 102.167930 |
| 22 [Restaurant, café, bar, etc.] | 440 | 1 | 364 | 26.950000 | 12 | 43.894544 |
| 23 [Take-out] | 132 | 2 | 364 | 25.037879 | 12 | 39.906648 |
| 24 [Walk] | 364 | 1 | 1560 | 99.156593 | 52 | 136.668166 |
| 25 [Other place] | 146 | 1 | 520 | 42.164384 | 24 | 63.740749 |
| 26 [Social contact residence] | 158 | 3 | 312 | 36.645570 | 24 | 46.663325 |
# graph
ggplot(data = .freq) +
geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit)) +
scale_y_continuous(limits = c(0, 365)) +
labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x = element_blank()) +
coord_flip()

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).
-- Reading Camille tbx indics from Essence table
SELECT interact_id,
n_acti_places, n_weekly_vst, n_acti_types,
cvx_perimeter, cvx_surface,
min_length, max_length, median_length,
pct_visits_neighb,
n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 1
2.9.5 Social indicators: Alexandre Naud’s toolbox
See Alex’s document for a more comprehensive presentation of the social indicators.
2.9.5.1 Number of people in the network (
people_degree)2.9.5.2 Simmelian Brokerage (
simmelian)2.9.5.3 Number of people with whom the participant like to socialize (
socialize_size)2.9.5.4 Weekly face-to-face interactions among people with whom the participant like to socialize (
socialize_meet)2.9.5.5 Weekly ICT interactions among people with whom the participant like to socialize (
socialize_chat)2.9.5.6 Number of people with whom the participant discuss important matters (
important_size)2.9.5.7 Number of people in all groups (
group_degree)