Section 1: Residence
and Neighbourhood
Now, let’s start
with your home. What is your address?
home_location <- locations[locations$location_category == 1, ]
## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .02
van_aoi[["right"]] <- van_aoi[["right"]] + .01
van_aoi[["top"]] <- van_aoi[["top"]] + .018
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .018
bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3) # see https://github.com/r-spatial/sf/issues/336

NB: Home locations have been randomly shifted from their
original position to protect privacy.
# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"]) %>%
as.data.frame()
home_by_mun_cnt <- home_by_municipalites %>%
group_by(NAME) %>%
dplyr::count() %>%
arrange(desc(n), NAME)
home_by_mun_cnt$geom <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME
|
n
|
Vancouver
|
208
|
Burnaby
|
1
|
Greater Vancouver
|
1
|
When did you move
to your current address?
# N of addresses by date of move
year_of_move <- veritas_main[c("interact_id", "home_move_date")]
year_of_move$home_move_date <- year(ymd(year_of_move$home_move_date))
ggplot(data = year_of_move) +
geom_histogram(aes(x = home_move_date))

# recode date of move
year_of_move$home_move_date_recode <- as.character(year_of_move$home_move_date)
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2005] <- "2005 - 2001"
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2000] <- "2000 - 1991"
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 1990] <- paste("1990 -", min(year_of_move$home_move_date))
year_of_move_cnt <- year_of_move %>%
group_by(home_move_date_recode) %>%
dplyr::count() %>%
arrange(desc(home_move_date_recode))
kable(year_of_move_cnt, caption = "Year of move to current address") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Year of move to current address
home_move_date_recode
|
n
|
2018
|
17
|
2017
|
8
|
2016
|
17
|
2015
|
7
|
2014
|
7
|
2013
|
8
|
2012
|
6
|
2011
|
6
|
2010
|
7
|
2009
|
7
|
2008
|
3
|
2007
|
7
|
2006
|
3
|
2005 - 2001
|
35
|
2000 - 1991
|
46
|
1990 - 1950
|
26
|
If you were asked
to draw the boundaries of your neighbourhood, what would they be?
prn <- poly_geom[poly_geom$area_type == "neighborhood", ]
## version ggmap
bm + geom_sf(data = prn, inherit.aes = FALSE, fill = alpha("blue", 0.05), color = alpha("blue", 0.3))

# Min, max, median & mean area of PRN
prn$area_m2 <- st_area(prn$geom)
kable(t(as.matrix(summary(prn$area_m2))),
caption = "Area (in square meters) of the perceived residential neighborhood",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived residential neighborhood
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
738.9
|
780272.9
|
1846898
|
3372615
|
4217316
|
26812746
|
NB only 179 valid neighborhoods were collected, as many
participants struggled to draw polygons on the map.
How attached are
you to your neighbourhood?
# extract and recode
.ngh_att <- veritas_main[veritas_main$neighbourhood_attach != 99, c("interact_id", "neighbourhood_attach")] %>% dplyr::rename(neighbourhood_attach_code = neighbourhood_attach)
.ngh_att$neighbourhood_attach <- factor(ifelse(.ngh_att$neighbourhood_attach_code == 1, "1 [Not attached at all]",
ifelse(.ngh_att$neighbourhood_attach_code == 6, "6 [Very attached]",
.ngh_att$neighbourhood_attach_code
)
))
# histogram of attachment
ggplot(data = .ngh_att) +
geom_histogram(aes(x = neighbourhood_attach), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "neighbourhood_attach")

.ngh_att_cnt <- .ngh_att %>%
group_by(neighbourhood_attach) %>%
dplyr::count() %>%
arrange(neighbourhood_attach)
kable(.ngh_att_cnt, caption = "Neigbourhood attachment") %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Neigbourhood attachment
neighbourhood_attach
|
n
|
1 [Not attached at all]
|
1
|
2
|
6
|
3
|
16
|
4
|
31
|
5
|
88
|
6 [Very attached]
|
65
|
On average, how
many hours per day do you spend outside of your home?
# histogram of n hours out
ggplot(data = veritas_main) +
geom_histogram(aes(x = hours_out))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$hours_out))),
caption = "Hours/day outside home",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside home
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
1
|
4
|
6
|
7
|
10
|
16
|
Of this time spent
outside your home, on average how many hours do you spend outside your
neighbourhood?
# histogram of n hours out
ggplot(data = veritas_main) +
geom_histogram(aes(x = hours_out_neighb))

# Min, max, median & mean hours/day out of neighborhood
kable(t(as.matrix(summary(veritas_main$hours_out_neighb))),
caption = "Hours/day outside neighbourhood",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside neighbourhood
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
2
|
3
|
4.5
|
8
|
15
|
Are there one or
more areas close to where you live that you tend to avoid because you do
not feel safe there (for any reason)?
# extract and recode
.unsafe <- veritas_main[c("interact_id", "unsafe_area")] %>% dplyr::rename(unsafe_area_code = unsafe_area)
.unsafe$unsafe_area <- factor(ifelse(.unsafe$unsafe_area_code == 1, "1 [Yes]",
ifelse(.unsafe$unsafe_area_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .unsafe) +
geom_histogram(aes(x = unsafe_area), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "unsafe_area")

.unsafe_cnt <- .unsafe %>%
group_by(unsafe_area) %>%
dplyr::count() %>%
arrange(unsafe_area)
kable(.unsafe_cnt, caption = "unsafe_area") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
unsafe_area
unsafe_area
|
n
|
1 [Yes]
|
9
|
2 [No]
|
201
|
# map
unsafe <- poly_geom[poly_geom$area_type == "unsafe area", ]
## version ggmap
bm + geom_sf(data = unsafe, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
unsafe$area_m2 <- st_area(unsafe$geom)
kable(t(as.matrix(summary(unsafe$area_m2))),
caption = "Area (in square meters) of the perceived unsafe area",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived unsafe area
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
8831.8
|
113728.2
|
204269.7
|
258345.4
|
318770.9
|
831151.6
|
Do you spend the
night somewhere other than your home at least once per week?
# extract and recode
.o_res <- veritas_main[c("interact_id", "other_resid")] %>% dplyr::rename(other_resid_code = other_resid)
.o_res$other_resid <- factor(ifelse(.o_res$other_resid_code == 1, "1 [Yes]",
ifelse(.o_res$other_resid_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .o_res) +
geom_histogram(aes(x = other_resid), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "other_resid")

.o_res_cnt <- .o_res %>%
group_by(other_resid) %>%
dplyr::count() %>%
arrange(other_resid)
kable(.o_res_cnt, caption = "Other residence") %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Other residence
other_resid
|
n
|
1 [Yes]
|
11
|
2 [No]
|
199
|
Section 2:
Occupation
Are you currently
working?
# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
ifelse(.work$working_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .work) +
geom_histogram(aes(x = working), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "working")

.work_cnt <- .work %>%
group_by(working) %>%
dplyr::count() %>%
arrange(working)
kable(.work_cnt, caption = "Currently working") %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working
|
n
|
1 [Yes]
|
128
|
2 [No]
|
82
|
Where do you
work?
work_location <- locations[locations$location_category == 3, ]
bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

On average, how
many hours per week do you work?
# histogram of n hours out
ggplot(data = veritas_main[veritas_main$working == 1, ]) +
geom_histogram(aes(x = work_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$work_hours[veritas_main$working == 1]))),
caption = "Work hours/week",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Work hours/week
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
2
|
24.8
|
37
|
32.7
|
40
|
84
|
Which of the
following categories best describes the amount of physical activity
required for your job?
# extract and recode
.work_pa <- veritas_main[veritas_main$working == 1, c("interact_id", "work_pa")] %>% dplyr::rename(work_pa_code = work_pa)
.work_pa$work_pa <- factor(ifelse(.work_pa$work_pa_code == 1, "1 [Mainly sitting with slight arm movements]",
ifelse(.work_pa$work_pa_code == 2, "2 [Sitting and standing with some walking]",
ifelse(.work_pa$work_pa_code == 3, "3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)]",
ifelse(.work_pa$work_pa_code == 4, "4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)]", "N/A")
)
)
))
# histogram of answers
ggplot(data = .work_pa) +
geom_histogram(aes(x = work_pa), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Physical activity at work")

.work_pa_cnt <- .work_pa %>%
group_by(work_pa) %>%
dplyr::count() %>%
arrange(work_pa)
kable(.work_pa_cnt, caption = "Physical activity at work") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Physical activity at work
work_pa
|
n
|
1 [Mainly sitting with slight arm movements]
|
55
|
2 [Sitting and standing with some walking]
|
57
|
3 [Walking, with some handling of materials generally weighing less than
25 kg (55 lbs)]
|
15
|
4 [Walking and heavy manual work often requiring handling of materials
weighing over 25 kg (50 lbs)]
|
1
|
Are you currently a
registered student?
# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))
# histogram of answers
ggplot(data = .study) +
geom_histogram(aes(x = studying), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Studying")

.study_cnt <- .study %>%
group_by(studying) %>%
dplyr::count() %>%
arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying
|
n
|
1 [Yes]
|
17
|
2 [No]
|
193
|
Where do you
study?
study_location <- locations[locations$location_category == 4, ]
bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

On average, how
many hours per week do you study?
# histogram of n hours out
ggplot(data = veritas_main[veritas_main$studying == 1, ]) +
geom_histogram(aes(x = study_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$study_hours[veritas_main$studying == 1]))),
caption = "study hours/week",
digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
study hours/week
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
2
|
12
|
20
|
23.6
|
30
|
60
|
Section 3: Shopping
activities
The following questions are used to generate the locations grouped
into this section:
- Do you shop for groceries at a supermarket at least once per
month?
- Do you shop at a public/farmer’s market at least once per
month?
- Do you shop at a bakery at least once per month?
- Do you go to a specialty food store at least once per month? For
example: a cheese shop, fruit and vegetable store, butcher’s shop,
natural and health food store.
- Do you go to a convenience store at least once per month?
- Do you go to a liquor store at least once per month?
shop_location <- locations[locations$location_category %in% c(5, 6, 7, 8, 9, 10), ] %>% dplyr::rename(location_category_code = location_category)
shop_location$location_category <- factor(ifelse(shop_location$location_category_code == 5, " 5 [Supermarket]",
ifelse(shop_location$location_category_code == 6, " 6 [Public/farmer’s market]",
ifelse(shop_location$location_category_code == 7, " 7 [Bakery]",
ifelse(shop_location$location_category_code == 8, " 8 [Specialty food store]",
ifelse(shop_location$location_category_code == 9, " 9 [Convenience store/Dépanneur]", "10 [Liquor store/SAQ]")
)
)
)
))
# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
scale_color_brewer(palette = "Accent") +
theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
geom_histogram(aes(x = location_category), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
group_by(location_category) %>%
dplyr::count() %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category
|
n
|
5 [Supermarket]
|
549
|
6 [Public/farmer’s market]
|
109
|
7 [Bakery]
|
117
|
8 [Specialty food store]
|
193
|
9 [Convenience store/Dépanneur]
|
50
|
10 [Liquor store/SAQ]
|
185
|
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
group_by(interact_id, location_category) %>%
dplyr::count()
# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
interact_iid = character(),
location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
.dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
.dmy$interact_id <- as.character(iid)
.dummy <- rbind(.dummy, .dmy)
}
# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)
.location_category_cnt <- .loc_iid_category_cnt %>%
group_by(location_category) %>%
dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category
|
min
|
mean
|
median
|
max
|
5 [Supermarket]
|
0
|
2.61
|
3
|
5
|
6 [Public/farmer’s market]
|
0
|
0.52
|
0
|
5
|
7 [Bakery]
|
0
|
0.56
|
0
|
4
|
8 [Specialty food store]
|
0
|
0.92
|
0
|
5
|
9 [Convenience store/Dépanneur]
|
0
|
0.24
|
0
|
4
|
10 [Liquor store/SAQ]
|
0
|
0.88
|
1
|
5
|
Section 4:
Services
The following questions are used to generate the locations grouped
into this section:
- Where is the bank you go to most often located?
- Where is the hair salon or barber shop you go to most often?
- Where is the post office where you go to most often?
- Where is the drugstore you go to most often?
- If you need to visit a doctor or other healthcare provider, where do
you go most often?
serv_location <- locations[locations$location_category %in% c(11, 12, 13, 14, 15), ] %>% dplyr::rename(location_category_code = location_category)
serv_location$location_category <- factor(ifelse(serv_location$location_category_code == 11, "11 [Bank]",
ifelse(serv_location$location_category_code == 12, "12 [Hair salon/barbershop]",
ifelse(serv_location$location_category_code == 13, "13 [Post office]",
ifelse(serv_location$location_category_code == 14, "14 [Drugstore]", "15 Doctor/healthcare provider]")
)
)
))
# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
scale_color_brewer(palette = "Accent") +
theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
geom_histogram(aes(x = location_category), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
group_by(location_category) %>%
dplyr::count() %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category
|
n
|
11 [Bank]
|
159
|
12 [Hair salon/barbershop]
|
149
|
13 [Post office]
|
139
|
14 [Drugstore]
|
192
|
15 Doctor/healthcare provider]
|
211
|
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
group_by(interact_id, location_category) %>%
dplyr::count()
# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
interact_iid = character(),
location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
.dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
.dmy$interact_id <- as.character(iid)
.dummy <- rbind(.dummy, .dmy)
}
# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)
.location_category_cnt <- .loc_iid_category_cnt %>%
group_by(location_category) %>%
dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category
|
min
|
mean
|
median
|
max
|
11 [Bank]
|
0
|
0.76
|
1
|
1
|
12 [Hair salon/barbershop]
|
0
|
0.71
|
1
|
1
|
13 [Post office]
|
0
|
0.66
|
1
|
1
|
14 [Drugstore]
|
0
|
0.91
|
1
|
1
|
15 Doctor/healthcare provider]
|
0
|
1.00
|
1
|
4
|
Section 6: Leisure
activities
The following questions are used to generate the locations grouped
into this section:
- Do you participate in any (individual or group) sports or
leisure-time physical activities at least once per month?
- Do you visit a park at least once per month?
- Do you participate in or attend as a spectator a cultural or
non-sport leisure activity at least once per month? For example: singing
or drawing lessons, book or poker club, concert or play.
- Do you volunteer at least once per month?
- Do you engage in any religious or spiritual activities at least once
per month?
- Do you go to a restaurant, café, bar or other food and drink
establishment at least once per month?
- Do you get take-out food at least once per month?
- Do you regularly go for walks?
leisure_location <- locations[locations$location_category %in% c(17, 18, 19, 20, 21, 22, 23, 24), ] %>% dplyr::rename(location_category_code = location_category)
leisure_location$location_category <- factor(ifelse(leisure_location$location_category_code == 17, "17 [Leisure-time physical activity]",
ifelse(leisure_location$location_category_code == 18, "18 [Park]",
ifelse(leisure_location$location_category_code == 19, "19 [Cultural activity]",
ifelse(leisure_location$location_category_code == 20, "20 [Volunteering place]",
ifelse(leisure_location$location_category_code == 21, "21 [Religious or spiritual activity]",
ifelse(leisure_location$location_category_code == 22, "22 [Restaurant, café, bar, etc. ]",
ifelse(leisure_location$location_category_code == 23, "23 [Take-out]", "24 [Walk]")
)
)
)
)
)
))
# map
bm + geom_sf(data = leisure_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
scale_color_brewer(palette = "Accent") +
theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = leisure_location) +
geom_histogram(aes(x = location_category), stat = "count") +
scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
labs(x = "Leisure locations by categories")

.location_category_cnt <- as.data.frame(leisure_location[c("location_category")]) %>%
group_by(location_category) %>%
dplyr::count() %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category
|
n
|
17 [Leisure-time physical activity]
|
301
|
18 [Park]
|
348
|
19 [Cultural activity]
|
169
|
20 [Volunteering place]
|
138
|
21 [Religious or spiritual activity]
|
46
|
22 [Restaurant, café, bar, etc. ]
|
440
|
23 [Take-out]
|
132
|
24 [Walk]
|
364
|
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(leisure_location[c("interact_id", "location_category")]) %>%
group_by(interact_id, location_category) %>%
dplyr::count()
# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
interact_iid = character(),
location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
.dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
.dmy$interact_id <- as.character(iid)
.dummy <- rbind(.dummy, .dmy)
}
# (cont'd) find iid/categ combination without match in veritas locations
.no_leisure_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_leisure_iid)
.location_category_cnt <- .loc_iid_category_cnt %>%
group_by(location_category) %>%
dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
arrange(location_category)
kable(.location_category_cnt, caption = "Number of leisure locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of leisure locations by participant and category
location_category
|
min
|
mean
|
median
|
max
|
17 [Leisure-time physical activity]
|
0
|
1.43
|
1
|
5
|
18 [Park]
|
0
|
1.66
|
1
|
5
|
19 [Cultural activity]
|
0
|
0.80
|
0
|
5
|
20 [Volunteering place]
|
0
|
0.66
|
0
|
5
|
21 [Religious or spiritual activity]
|
0
|
0.22
|
0
|
3
|
22 [Restaurant, café, bar, etc. ]
|
0
|
2.10
|
2
|
5
|
23 [Take-out]
|
0
|
0.63
|
0
|
5
|
24 [Walk]
|
0
|
1.73
|
1
|
5
|
Derived metrics
Transportation mode
preferences
Based on the answers to the question Usually, how do you go
there? (Check all that apply.).
# code en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99 Other
loc_labels <- data.frame(location_category = c(2:26), description = c(
" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"
))
# extract and summary stats
.tm <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels)
.tm_grouped <- .tm %>%
group_by(description) %>%
dplyr::summarise(
N = n(), "By car (driver)" = sum(location_tmode_1),
"By car (passenger)" = sum(location_tmode_2),
"By taxi/Uber" = sum(location_tmode_3),
"On foot" = sum(location_tmode_4),
"By bike" = sum(location_tmode_5),
"By bus" = sum(location_tmode_6),
"By train" = sum(location_tmode_7),
"Other" = sum(location_tmode_99)
)
kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description
|
N
|
By car (driver)
|
By car (passenger)
|
By taxi/Uber
|
On foot
|
By bike
|
By bus
|
By train
|
Other
|
2 [Other residence]
|
11
|
3
|
5
|
1
|
2
|
3
|
5
|
1
|
1
|
3 [Work]
|
176
|
71
|
4
|
1
|
45
|
53
|
41
|
13
|
17
|
4 [School/College/University]
|
24
|
5
|
0
|
0
|
8
|
2
|
15
|
3
|
4
|
5 [Supermarket]
|
549
|
231
|
50
|
0
|
298
|
87
|
39
|
3
|
4
|
6 [Public/farmer’s market]
|
109
|
34
|
8
|
0
|
58
|
35
|
9
|
0
|
0
|
7 [Bakery]
|
117
|
27
|
3
|
0
|
85
|
18
|
5
|
0
|
1
|
8 [Specialty food store]
|
193
|
46
|
8
|
0
|
130
|
37
|
17
|
2
|
1
|
9 [Convenience store/Dépanneur]
|
50
|
13
|
1
|
0
|
40
|
1
|
1
|
0
|
0
|
10 [Liquor store/SAQ]
|
185
|
80
|
13
|
0
|
98
|
27
|
6
|
0
|
0
|
11 [Bank]
|
159
|
34
|
3
|
0
|
124
|
21
|
9
|
2
|
2
|
12 [Hair salon/barbershop]
|
149
|
59
|
6
|
0
|
76
|
23
|
26
|
2
|
3
|
13 [Post office]
|
139
|
23
|
0
|
0
|
121
|
21
|
3
|
0
|
0
|
14 [Drugstore]
|
192
|
59
|
6
|
0
|
145
|
26
|
8
|
0
|
0
|
15 [Doctor/healthcare provider]
|
211
|
88
|
7
|
0
|
93
|
34
|
49
|
7
|
2
|
16 [Public transit stop]
|
422
|
3
|
3
|
0
|
397
|
5
|
0
|
0
|
26
|
17 [Leisure-time physical activity]
|
301
|
109
|
22
|
0
|
141
|
87
|
17
|
6
|
10
|
18 [Park]
|
348
|
75
|
31
|
0
|
224
|
97
|
18
|
1
|
6
|
19 [Cultural activity]
|
169
|
55
|
34
|
2
|
50
|
26
|
56
|
4
|
1
|
20 [Volunteering place]
|
138
|
62
|
7
|
0
|
63
|
21
|
21
|
3
|
5
|
21 [Religious/spiritual activity]
|
46
|
22
|
4
|
0
|
18
|
8
|
9
|
1
|
0
|
22 [Restaurant, café, bar, etc.]
|
440
|
129
|
39
|
2
|
260
|
53
|
61
|
3
|
10
|
23 [Take-out]
|
132
|
42
|
8
|
0
|
74
|
6
|
9
|
1
|
15
|
24 [Walk]
|
364
|
51
|
15
|
0
|
313
|
39
|
10
|
0
|
5
|
25 [Other place]
|
146
|
67
|
13
|
0
|
63
|
30
|
28
|
9
|
7
|
26 [Social contact residence]
|
158
|
79
|
26
|
0
|
50
|
28
|
26
|
6
|
1
|
# graph
.tm1 <- .tm %>%
filter(location_tmode_1 == 1) %>%
mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
filter(location_tmode_2 == 1) %>%
mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
filter(location_tmode_3 == 1) %>%
mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
filter(location_tmode_4 == 1) %>%
mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
filter(location_tmode_5 == 1) %>%
mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
filter(location_tmode_6 == 1) %>%
mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
filter(location_tmode_7 == 1) %>%
mutate(tm = "[7] By train")
.tm99 <- .tm %>%
filter(location_tmode_99 == 1) %>%
mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
bind_rows(.tm3) %>%
bind_rows(.tm4) %>%
bind_rows(.tm5) %>%
bind_rows(.tm6) %>%
bind_rows(.tm7) %>%
bind_rows(.tm99)
# histogram of answers
ggplot(data = .tm) +
geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
scale_fill_brewer(palette = "Set3", name = "Transport modes") +
scale_y_continuous(labels = percent) +
labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
coord_flip() +
theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
guides(fill = guide_legend(nrow = 3))

Visiting places
alone
Based on the answers to the question Do you usually go to this
place alone or with other people?.
loc_labels <- data.frame(location_category = c(2:26), description = c(
" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"
))
# extract and summary stats
.alone <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels) %>%
mutate(location_alone_recode = case_when(
location_alone == 1 ~ 1,
location_alone == 2 ~ 0
))
.alone_grouped <- .alone %>%
group_by(description) %>%
dplyr::summarise(
N = n(), "Visited alone" = sum(location_alone_recode),
"Visited alone (%)" = round(sum(location_alone_recode) * 100.0 / n(), digits = 1)
)
kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places alone
description
|
N
|
Visited alone
|
Visited alone (%)
|
2 [Other residence]
|
11
|
NA
|
NA
|
3 [Work]
|
176
|
72
|
40.9
|
4 [School/College/University]
|
24
|
20
|
83.3
|
5 [Supermarket]
|
549
|
425
|
77.4
|
6 [Public/farmer’s market]
|
109
|
62
|
56.9
|
7 [Bakery]
|
117
|
89
|
76.1
|
8 [Specialty food store]
|
193
|
156
|
80.8
|
9 [Convenience store/Dépanneur]
|
50
|
45
|
90.0
|
10 [Liquor store/SAQ]
|
185
|
144
|
77.8
|
11 [Bank]
|
159
|
149
|
93.7
|
12 [Hair salon/barbershop]
|
149
|
140
|
94.0
|
13 [Post office]
|
139
|
132
|
95.0
|
14 [Drugstore]
|
192
|
170
|
88.5
|
15 [Doctor/healthcare provider]
|
211
|
194
|
91.9
|
16 [Public transit stop]
|
422
|
352
|
83.4
|
17 [Leisure-time physical activity]
|
301
|
150
|
49.8
|
18 [Park]
|
348
|
148
|
42.5
|
19 [Cultural activity]
|
169
|
44
|
26.0
|
20 [Volunteering place]
|
138
|
57
|
41.3
|
21 [Religious/spiritual activity]
|
46
|
15
|
32.6
|
22 [Restaurant, café, bar, etc.]
|
440
|
96
|
21.8
|
23 [Take-out]
|
132
|
65
|
49.2
|
24 [Walk]
|
364
|
189
|
51.9
|
25 [Other place]
|
146
|
82
|
56.2
|
26 [Social contact residence]
|
158
|
70
|
44.3
|
# histogram of answers
ggplot(data = .alone) +
geom_bar(aes(x = fct_rev(description), fill = factor(location_alone)), position = "fill") +
scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("N/A", "Alone", "With someone")) +
scale_y_continuous(labels = percent) +
labs(y = "Proportion of places visited alone", x = element_blank()) +
coord_flip()

Visit
frequency
Based on the answers to the question How often do you go
there?.
loc_labels <- data.frame(location_category = c(2:26), description = c(
" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"
))
# extract and summary stats
.freq <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels)
.freq_grouped <- .freq %>%
group_by(description) %>%
dplyr::summarise(
N = n(), min = min(location_freq_visit),
max = max(location_freq_visit),
mean = mean(location_freq_visit),
median = median(location_freq_visit),
sd = sd(location_freq_visit)
)
kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description
|
N
|
min
|
max
|
mean
|
median
|
sd
|
2 [Other residence]
|
11
|
2
|
208
|
75.454545
|
52
|
62.110166
|
3 [Work]
|
176
|
1
|
520
|
177.369318
|
208
|
115.107031
|
4 [School/College/University]
|
24
|
4
|
364
|
154.000000
|
130
|
102.007672
|
5 [Supermarket]
|
549
|
2
|
1040
|
62.704918
|
52
|
73.032792
|
6 [Public/farmer’s market]
|
109
|
1
|
364
|
44.963303
|
24
|
62.284432
|
7 [Bakery]
|
117
|
3
|
260
|
37.179487
|
24
|
41.135314
|
8 [Specialty food store]
|
193
|
3
|
1040
|
50.668394
|
24
|
86.380842
|
9 [Convenience store/Dépanneur]
|
50
|
1
|
520
|
59.740000
|
24
|
83.901986
|
10 [Liquor store/SAQ]
|
185
|
1
|
208
|
31.843243
|
24
|
34.100431
|
11 [Bank]
|
159
|
1
|
260
|
30.666667
|
24
|
36.516282
|
12 [Hair salon/barbershop]
|
149
|
1
|
36
|
6.953020
|
6
|
5.344024
|
13 [Post office]
|
139
|
2
|
104
|
14.589928
|
8
|
19.129541
|
14 [Drugstore]
|
192
|
1
|
208
|
42.151042
|
24
|
44.589613
|
15 [Doctor/healthcare provider]
|
211
|
1
|
104
|
5.535545
|
3
|
8.809757
|
16 [Public transit stop]
|
422
|
1
|
364
|
61.715640
|
24
|
83.875231
|
17 [Leisure-time physical activity]
|
301
|
2
|
364
|
102.458472
|
104
|
86.560590
|
18 [Park]
|
348
|
1
|
1560
|
74.514368
|
33
|
119.288874
|
19 [Cultural activity]
|
169
|
1
|
208
|
17.698225
|
6
|
31.598637
|
20 [Volunteering place]
|
138
|
1
|
5200
|
116.913044
|
52
|
445.811938
|
21 [Religious/spiritual activity]
|
46
|
4
|
364
|
76.260870
|
52
|
102.167930
|
22 [Restaurant, café, bar, etc.]
|
440
|
1
|
364
|
26.950000
|
12
|
43.894544
|
23 [Take-out]
|
132
|
2
|
364
|
25.037879
|
12
|
39.906648
|
24 [Walk]
|
364
|
1
|
1560
|
99.156593
|
52
|
136.668166
|
25 [Other place]
|
146
|
1
|
520
|
42.164384
|
24
|
63.740749
|
26 [Social contact residence]
|
158
|
3
|
312
|
36.645570
|
24
|
46.663325
|
# graph
ggplot(data = .freq) +
geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit)) +
scale_y_continuous(limits = c(0, 365)) +
labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x = element_blank()) +
coord_flip()

Spatial indicators:
Camille Perchoux’s toolbox
Below is a list of indicators proposed by Camille Perchoux in her
paper Assessing
patterns of spatial behavior in health studies: Their socio-demographic
determinants and associations with transportation modes (the RECORD
Cohort Study).
-- Reading Camille tbx indics from Essence table
SELECT interact_id,
n_acti_places, n_weekly_vst, n_acti_types,
cvx_perimeter, cvx_surface,
min_length, max_length, median_length,
pct_visits_neighb,
n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 1
Social indicators:
Alexandre Naud’s toolbox
See Alex’s document for
a more comprehensive presentation of the social indicators.
-- Reading Alex tbx indics from Essence table
SELECT interact_id,
people_degree,
socialize_size, socialize_meet, socialize_chat,
important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 1
Number of people
in the network (people_degree
)
ggplot(ess.tab.alex) +
geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
1
|
3
|
4.257143
|
6
|
24
|
Simmelian
Brokerage (simmelian
)
ggplot(ess.tab.alex) +
geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
1
|
2.763889
|
5.416667
|
8.721272
|
12.69375
|
42.61905
|
27
|
Number of people
with whom the participant like to socialize
(socialize_size
)
ggplot(ess.tab.alex) +
geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
1
|
3
|
3.528571
|
5
|
19
|
Weekly
face-to-face interactions among people with whom the participant like to
socialize (socialize_meet
)
ggplot(filter(ess.tab.alex, socialize_meet < 100)) +
geom_histogram(aes(x = socialize_meet)) +
annotate(geom = "text", x = 75, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_meet))), caption = "socialize_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_meet
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
106
|
364
|
750.6
|
930.5
|
11068
|
Weekly ICT
interactions among people with whom the participant like to socialize
(socialize_chat
)
ggplot(filter(ess.tab.alex, socialize_chat < 100)) +
geom_histogram(aes(x = socialize_chat)) +
annotate(geom = "text", x = 55, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_chat))), caption = "socialize_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_chat
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
88
|
364
|
248547.8
|
647
|
5.2e+07
|
Number of people
with whom the participant discuss important matters
(important_size
)
ggplot(ess.tab.alex) +
geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
1
|
2
|
2.714286
|
4
|
19
|
Number of people
in all groups (group_degree
)
ggplot(filter(ess.tab.alex, group_degree < 100)) +
geom_histogram(aes(x = group_degree)) +
annotate(geom = "text", x = 20, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$group_degree))), caption = "group_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
group_degree
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
0
|
0
|
4
|
7.12381
|
11
|
63
|
2.9.5 Social indicators: Alexandre Naud’s toolbox
See Alex’s document for a more comprehensive presentation of the social indicators.
2.9.5.1 Number of people in the network (
people_degree
)2.9.5.2 Simmelian Brokerage (
simmelian
)2.9.5.3 Number of people with whom the participant like to socialize (
socialize_size
)2.9.5.4 Weekly face-to-face interactions among people with whom the participant like to socialize (
socialize_meet
)2.9.5.5 Weekly ICT interactions among people with whom the participant like to socialize (
socialize_chat
)2.9.5.6 Number of people with whom the participant discuss important matters (
important_size
)2.9.5.7 Number of people in all groups (
group_degree
)