1 VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

  • Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
  • Social contacts: people and/or groups frequented by participants
  • Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

VERITAS entities
VERITAS entities

2 Basic descriptive statistics

2.1 Section 1: Residence and Neighbourhood

2.1.1 Now, let’s start with your home. What is your address?

home_location <- locations[locations$location_category == 1, ]

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c("left", "bottom", "right", "top")
van_aoi[["left"]] <- van_aoi[["left"]] - .02
van_aoi[["right"]] <- van_aoi[["right"]] + .01
van_aoi[["top"]] <- van_aoi[["top"]] + .018
van_aoi[["bottom"]] <- van_aoi[["bottom"]] - .018

bm <- get_stadiamap(van_aoi, zoom = 11, maptype = "stamen_toner_lite") %>%
  ggmap(extent = "device")
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3) # see https://github.com/r-spatial/sf/issues/336

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"]) %>%
  as.data.frame()
home_by_mun_cnt <- home_by_municipalites %>%
  group_by(NAME) %>%
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$geom <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of participants by municipalities
NAME n
Vancouver 208
Burnaby 1
Greater Vancouver 1

2.1.2 When did you move to your current address?

# N of addresses by date of move
year_of_move <- veritas_main[c("interact_id", "home_move_date")]
year_of_move$home_move_date <- year(ymd(year_of_move$home_move_date))
ggplot(data = year_of_move) +
  geom_histogram(aes(x = home_move_date))

# recode date of move
year_of_move$home_move_date_recode <- as.character(year_of_move$home_move_date)
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2005] <- "2005 - 2001"
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2000] <- "2000 - 1991"
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 1990] <- paste("1990 -", min(year_of_move$home_move_date))

year_of_move_cnt <- year_of_move %>%
  group_by(home_move_date_recode) %>%
  dplyr::count() %>%
  arrange(desc(home_move_date_recode))
kable(year_of_move_cnt, caption = "Year of move to current address") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Year of move to current address
home_move_date_recode n
2018 17
2017 8
2016 17
2015 7
2014 7
2013 8
2012 6
2011 6
2010 7
2009 7
2008 3
2007 7
2006 3
2005 - 2001 35
2000 - 1991 46
1990 - 1950 26

2.1.3 If you were asked to draw the boundaries of your neighbourhood, what would they be?

prn <- poly_geom[poly_geom$area_type == "neighborhood", ]

## version ggmap
bm + geom_sf(data = prn, inherit.aes = FALSE, fill = alpha("blue", 0.05), color = alpha("blue", 0.3))

# Min, max, median & mean area of PRN
prn$area_m2 <- st_area(prn$geom)
kable(t(as.matrix(summary(prn$area_m2))), 
  caption = "Area (in square meters) of the perceived residential neighborhood",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived residential neighborhood
Min. 1st Qu. Median Mean 3rd Qu. Max.
738.9 780272.9 1846898 3372615 4217316 26812746

NB only 179 valid neighborhoods were collected, as many participants struggled to draw polygons on the map.

2.1.4 How attached are you to your neighbourhood?

# extract and recode
.ngh_att <- veritas_main[veritas_main$neighbourhood_attach != 99, c("interact_id", "neighbourhood_attach")] %>% dplyr::rename(neighbourhood_attach_code = neighbourhood_attach)
.ngh_att$neighbourhood_attach <- factor(ifelse(.ngh_att$neighbourhood_attach_code == 1, "1 [Not attached at all]",
  ifelse(.ngh_att$neighbourhood_attach_code == 6, "6 [Very attached]",
    .ngh_att$neighbourhood_attach_code
  )
))

# histogram of attachment
ggplot(data = .ngh_att) +
  geom_histogram(aes(x = neighbourhood_attach), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "neighbourhood_attach")

.ngh_att_cnt <- .ngh_att %>%
  group_by(neighbourhood_attach) %>%
  dplyr::count() %>%
  arrange(neighbourhood_attach)
kable(.ngh_att_cnt, caption = "Neigbourhood attachment") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Neigbourhood attachment
neighbourhood_attach n
1 [Not attached at all] 1
2 6
3 16
4 31
5 88
6 [Very attached] 65

2.1.5 On average, how many hours per day do you spend outside of your home?

# histogram of n hours out
ggplot(data = veritas_main) +
  geom_histogram(aes(x = hours_out))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$hours_out))), 
  caption = "Hours/day outside home",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside home
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 4 6 7 10 16

2.1.6 Of this time spent outside your home, on average how many hours do you spend outside your neighbourhood?

# histogram of n hours out
ggplot(data = veritas_main) +
  geom_histogram(aes(x = hours_out_neighb))

# Min, max, median & mean hours/day out of neighborhood
kable(t(as.matrix(summary(veritas_main$hours_out_neighb))), 
  caption = "Hours/day outside neighbourhood",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Hours/day outside neighbourhood
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 2 3 4.5 8 15

2.1.7 Are there one or more areas close to where you live that you tend to avoid because you do not feel safe there (for any reason)?

# extract and recode
.unsafe <- veritas_main[c("interact_id", "unsafe_area")] %>% dplyr::rename(unsafe_area_code = unsafe_area)
.unsafe$unsafe_area <- factor(ifelse(.unsafe$unsafe_area_code == 1, "1 [Yes]",
  ifelse(.unsafe$unsafe_area_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .unsafe) +
  geom_histogram(aes(x = unsafe_area), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "unsafe_area")

.unsafe_cnt <- .unsafe %>%
  group_by(unsafe_area) %>%
  dplyr::count() %>%
  arrange(unsafe_area)
kable(.unsafe_cnt, caption = "unsafe_area") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
unsafe_area
unsafe_area n
1 [Yes] 9
2 [No] 201
# map
unsafe <- poly_geom[poly_geom$area_type == "unsafe area", ]

## version ggmap
bm + geom_sf(data = unsafe, inherit.aes = FALSE, fill = alpha("blue", 0.3), color = alpha("blue", 0.5))

# Min, max, median & mean area of PRN
unsafe$area_m2 <- st_area(unsafe$geom)
kable(t(as.matrix(summary(unsafe$area_m2))), 
  caption = "Area (in square meters) of the perceived unsafe area",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Area (in square meters) of the perceived unsafe area
Min. 1st Qu. Median Mean 3rd Qu. Max.
8831.8 113728.2 204269.7 258345.4 318770.9 831151.6

2.1.8 Do you spend the night somewhere other than your home at least once per week?

# extract and recode
.o_res <- veritas_main[c("interact_id", "other_resid")] %>% dplyr::rename(other_resid_code = other_resid)
.o_res$other_resid <- factor(ifelse(.o_res$other_resid_code == 1, "1 [Yes]",
  ifelse(.o_res$other_resid_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .o_res) +
  geom_histogram(aes(x = other_resid), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_resid")

.o_res_cnt <- .o_res %>%
  group_by(other_resid) %>%
  dplyr::count() %>%
  arrange(other_resid)
kable(.o_res_cnt, caption = "Other residence") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Other residence
other_resid n
1 [Yes] 11
2 [No] 199

2.2 Section 2: Occupation

2.2.1 Are you currently working?

# extract and recode
.work <- veritas_main[c("interact_id", "working")] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, "1 [Yes]",
  ifelse(.work$working_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .work) +
  geom_histogram(aes(x = working), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>%
  group_by(working) %>%
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently working
working n
1 [Yes] 128
2 [No] 82

2.2.2 Where do you work?

work_location <- locations[locations$location_category == 3, ]

bm + geom_sf(data = work_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.2.3 On average, how many hours per week do you work?

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$working == 1, ]) +
  geom_histogram(aes(x = work_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$work_hours[veritas_main$working == 1]))), 
  caption = "Work hours/week",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Work hours/week
Min. 1st Qu. Median Mean 3rd Qu. Max.
2 24.8 37 32.7 40 84

2.2.4 Which of the following categories best describes the amount of physical activity required for your job?

# extract and recode
.work_pa <- veritas_main[veritas_main$working == 1, c("interact_id", "work_pa")] %>% dplyr::rename(work_pa_code = work_pa)
.work_pa$work_pa <- factor(ifelse(.work_pa$work_pa_code == 1, "1 [Mainly sitting with slight arm movements]",
  ifelse(.work_pa$work_pa_code == 2, "2 [Sitting and standing with some walking]",
    ifelse(.work_pa$work_pa_code == 3, "3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)]",
      ifelse(.work_pa$work_pa_code == 4, "4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)]", "N/A")
    )
  )
))

# histogram of answers
ggplot(data = .work_pa) +
  geom_histogram(aes(x = work_pa), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Physical activity at work")

.work_pa_cnt <- .work_pa %>%
  group_by(work_pa) %>%
  dplyr::count() %>%
  arrange(work_pa)
kable(.work_pa_cnt, caption = "Physical activity at work") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Physical activity at work
work_pa n
1 [Mainly sitting with slight arm movements] 55
2 [Sitting and standing with some walking] 57
3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)] 15
4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)] 1

2.2.5 Are you currently a registered student?

# extract and recode
.study <- veritas_main[c("interact_id", "studying")] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, "1 [Yes]",
  ifelse(.study$studying_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .study) +
  geom_histogram(aes(x = studying), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>%
  group_by(studying) %>%
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Currently studying
studying n
1 [Yes] 17
2 [No] 193

2.2.6 Where do you study?

study_location <- locations[locations$location_category == 4, ]

bm + geom_sf(data = study_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.2.7 On average, how many hours per week do you study?

# histogram of n hours out
ggplot(data = veritas_main[veritas_main$studying == 1, ]) +
  geom_histogram(aes(x = study_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$study_hours[veritas_main$studying == 1]))), 
  caption = "study hours/week",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
study hours/week
Min. 1st Qu. Median Mean 3rd Qu. Max.
2 12 20 23.6 30 60

2.3 Section 3: Shopping activities

The following questions are used to generate the locations grouped into this section:

  1. Do you shop for groceries at a supermarket at least once per month?
  2. Do you shop at a public/farmer’s market at least once per month?
  3. Do you shop at a bakery at least once per month?
  4. Do you go to a specialty food store at least once per month? For example: a cheese shop, fruit and vegetable store, butcher’s shop, natural and health food store.
  5. Do you go to a convenience store at least once per month?
  6. Do you go to a liquor store at least once per month?
shop_location <- locations[locations$location_category %in% c(5, 6, 7, 8, 9, 10), ] %>% dplyr::rename(location_category_code = location_category)
shop_location$location_category <- factor(ifelse(shop_location$location_category_code == 5, " 5 [Supermarket]",
  ifelse(shop_location$location_category_code == 6, " 6 [Public/farmer’s market]",
    ifelse(shop_location$location_category_code == 7, " 7 [Bakery]",
      ifelse(shop_location$location_category_code == 8, " 8 [Specialty food store]",
        ifelse(shop_location$location_category_code == 9, " 9 [Convenience store/Dépanneur]", "10 [Liquor store/SAQ]")
      )
    )
  )
))

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = shop_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
5 [Supermarket] 549
6 [Public/farmer’s market] 109
7 [Bakery] 117
8 [Specialty food store] 193
9 [Convenience store/Dépanneur] 50
10 [Liquor store/SAQ] 185
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
  interact_iid = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
  .dmy$interact_id <- as.character(iid)
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
5 [Supermarket] 0 2.61 3 5
6 [Public/farmer’s market] 0 0.52 0 5
7 [Bakery] 0 0.56 0 4
8 [Specialty food store] 0 0.92 0 5
9 [Convenience store/Dépanneur] 0 0.24 0 4
10 [Liquor store/SAQ] 0 0.88 1 5

2.4 Section 4: Services

The following questions are used to generate the locations grouped into this section:

  1. Where is the bank you go to most often located?
  2. Where is the hair salon or barber shop you go to most often?
  3. Where is the post office where you go to most often?
  4. Where is the drugstore you go to most often?
  5. If you need to visit a doctor or other healthcare provider, where do you go most often?
serv_location <- locations[locations$location_category %in% c(11, 12, 13, 14, 15), ] %>% dplyr::rename(location_category_code = location_category)
serv_location$location_category <- factor(ifelse(serv_location$location_category_code == 11, "11 [Bank]",
  ifelse(serv_location$location_category_code == 12, "12 [Hair salon/barbershop]",
    ifelse(serv_location$location_category_code == 13, "13 [Post office]",
      ifelse(serv_location$location_category_code == 14, "14 [Drugstore]", "15 Doctor/healthcare provider]")
    )
  )
))

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = serv_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
11 [Bank] 159
12 [Hair salon/barbershop] 149
13 [Post office] 139
14 [Drugstore] 192
15 Doctor/healthcare provider] 211
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
  interact_iid = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
  .dmy$interact_id <- as.character(iid)
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of shopping locations by participant and category
location_category min mean median max
11 [Bank] 0 0.76 1 1
12 [Hair salon/barbershop] 0 0.71 1 1
13 [Post office] 0 0.66 1 1
14 [Drugstore] 0 0.91 1 1
15 Doctor/healthcare provider] 0 1.00 1 4

2.5 Section 5: Transportation

2.5.1 Do you use public transit from your home?

# extract and recode
.transp <- veritas_main[c("interact_id", "public_transit")] %>% dplyr::rename(public_transit_code = public_transit)
.transp$public_transit <- factor(ifelse(.transp$public_transit_code == 1, "1 [Yes]",
  ifelse(.transp$public_transit_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .transp) +
  geom_histogram(aes(x = public_transit), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "public_transit")

.transp_cnt <- .transp %>%
  group_by(public_transit) %>%
  dplyr::count() %>%
  arrange(public_transit)
kable(.transp_cnt, caption = "Use public transit") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Use public transit
public_transit n
1 [Yes] 179
2 [No] 31

2.5.2 Where are the public transit stops that you access from your home?

transp_location <- locations[locations$location_category == 16, ]

bm + geom_sf(data = transp_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.6 Section 6: Leisure activities

The following questions are used to generate the locations grouped into this section:

  1. Do you participate in any (individual or group) sports or leisure-time physical activities at least once per month?
  2. Do you visit a park at least once per month?
  3. Do you participate in or attend as a spectator a cultural or non-sport leisure activity at least once per month? For example: singing or drawing lessons, book or poker club, concert or play.
  4. Do you volunteer at least once per month?
  5. Do you engage in any religious or spiritual activities at least once per month?
  6. Do you go to a restaurant, café, bar or other food and drink establishment at least once per month?
  7. Do you get take-out food at least once per month?
  8. Do you regularly go for walks?
leisure_location <- locations[locations$location_category %in% c(17, 18, 19, 20, 21, 22, 23, 24), ] %>% dplyr::rename(location_category_code = location_category)
leisure_location$location_category <- factor(ifelse(leisure_location$location_category_code == 17, "17 [Leisure-time physical activity]",
  ifelse(leisure_location$location_category_code == 18, "18 [Park]",
    ifelse(leisure_location$location_category_code == 19, "19 [Cultural activity]",
      ifelse(leisure_location$location_category_code == 20, "20 [Volunteering place]",
        ifelse(leisure_location$location_category_code == 21, "21 [Religious or spiritual activity]",
          ifelse(leisure_location$location_category_code == 22, "22 [Restaurant, café, bar, etc. ]",
            ifelse(leisure_location$location_category_code == 23, "23 [Take-out]", "24 [Walk]")
          )
        )
      )
    )
  )
))

# map
bm + geom_sf(data = leisure_location, inherit.aes = FALSE, aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size = 8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data = leisure_location) +
  geom_histogram(aes(x = location_category), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Leisure locations by categories")

.location_category_cnt <- as.data.frame(leisure_location[c("location_category")]) %>%
  group_by(location_category) %>%
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Shopping locations by categories
location_category n
17 [Leisure-time physical activity] 301
18 [Park] 348
19 [Cultural activity] 169
20 [Volunteering place] 138
21 [Religious or spiritual activity] 46
22 [Restaurant, café, bar, etc. ] 440
23 [Take-out] 132
24 [Walk] 364
# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(leisure_location[c("interact_id", "location_category")]) %>%
  group_by(interact_id, location_category) %>%
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(
  interact_iid = character(),
  location_category = character()
)
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- distinct(.loc_iid_category_cnt[c("location_category")])
  .dmy$interact_id <- as.character(iid)
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_leisure_iid <- dplyr::setdiff(.dummy, .loc_iid_category_cnt[c("location_category", "interact_id")]) %>%
  mutate(n = 0)

.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_leisure_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of leisure locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of leisure locations by participant and category
location_category min mean median max
17 [Leisure-time physical activity] 0 1.43 1 5
18 [Park] 0 1.66 1 5
19 [Cultural activity] 0 0.80 0 5
20 [Volunteering place] 0 0.66 0 5
21 [Religious or spiritual activity] 0 0.22 0 3
22 [Restaurant, café, bar, etc. ] 0 2.10 2 5
23 [Take-out] 0 0.63 0 5
24 [Walk] 0 1.73 1 5

2.7 Section 7: Other places/activities

2.7.1 Are there other places that you go to at least once per month that we have not mentioned? For example: a mall, a daycare, a hardware store, or a community center.

# extract and recode
.other <- veritas_main[c("interact_id", "other")] %>% dplyr::rename(other_code = other)
.other$other <- factor(ifelse(.other$other_code == 1, "1 [Yes]",
  ifelse(.other$other_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .other) +
  geom_histogram(aes(x = other), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other")

.other_cnt <- .other %>%
  group_by(other) %>%
  dplyr::count() %>%
  arrange(other)
kable(.other_cnt, caption = "Other places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Other places
other n
1 [Yes] 85
2 [No] 125

2.7.2 Can you locate this place?

other_location <- locations[locations$location_category == 25, ]

bm + geom_sf(data = other_location, inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

2.8 Section 8: Social contact

2.8.1 Do you visit anyone at his or her home at least once per month?

# extract and recode
.visiting <- veritas_main[c("interact_id", "visiting")] %>% dplyr::rename(visiting_code = visiting)
.visiting$visiting <- factor(ifelse(.visiting$visiting_code == 1, "1 [Yes]",
  ifelse(.visiting$visiting_code == 2, "2 [No]", "N/A")
))

# histogram of answers
ggplot(data = .visiting) +
  geom_histogram(aes(x = visiting), stat = "count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting")

.visiting_cnt <- .visiting %>%
  group_by(visiting) %>%
  dplyr::count() %>%
  arrange(visiting)
kable(.visiting_cnt, caption = "Social contact") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Social contact
visiting n
1 [Yes] 115
2 [No] 95

2.8.2 Where does this person live?

visiting_location <- locations[locations$location_category == 26, ]

bm + geom_sf(data = st_jitter(visiting_location, .008), inherit.aes = FALSE, color = "blue", size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

2.8.3 Great, we are almost done completing this questionnaire. You have documented all your activity places on a map, and specified with whom you generally do these activities. These last few questions concern the people you documented earlier.

# compute statistics on groups / participant
# > one needs to account for participants who did not report any group
.gr_iid_cnt <- as.data.frame(group[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_gr_iid <- anti_join(veritas_main[c("interact_id")], .gr_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.gr_iid_cnt <- bind_rows(.gr_iid_cnt, .no_gr_iid)

kable(t(as.matrix(summary(.gr_iid_cnt$n))), 
  caption = "Number of groups per participant",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of groups per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 1 1.5 2 8
# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c("interact_id")]) %>%
  group_by(interact_id) %>%
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c("interact_id")], .pl_iid_cnt, by = "interact_id") %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))), 
  caption = "Number of people per participant",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 4.3 6 24
# histogram
.sc_iid_cnt <- .pl_iid_cnt %>% mutate(soc_type = "people")
.sc_iid_cnt <- .gr_iid_cnt %>%
  mutate(soc_type = "group") %>%
  bind_rows(.sc_iid_cnt)

ggplot(data = .sc_iid_cnt) +
  geom_histogram(aes(x = n, y = stat(count), fill = soc_type), position = "dodge") +
  labs(x = "Social network size by element type", fill = element_blank())

2.8.3.1 Among these people, who do you discuss important matters with?

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_important, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))), 
  caption = "Number of important people per participant",
  digits = 1
) %>% 
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of important people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 2.7 4 19
kable(t(as.matrix(summary(.n_people_imp$pct))), 
  caption = "% of important people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of important people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 45.3 80 70.7 100 100 20

2.8.3.2 Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_socialize, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))), 
  caption = "Number of people with whom to socialize per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of people with whom to socialize per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 3.5 5 19
kable(t(as.matrix(summary(.n_people_soc$pct))), 
  caption = "% of people with whom to  socialize among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of people with whom to socialize among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 75 100 84.3 100 100 20

2.8.3.3 Among these people, who do you meet often with but do not necessarily feel close to?

# extract number of important people / participant
.n_not_close <- not_close %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_not_close <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_not_close, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_not_close = n.y) %>%
  mutate(pct = 100 * n_not_close / n_people)

kable(t(as.matrix(summary(.n_people_not_close$n_not_close))), 
  caption = "Number of not so close people per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of not so close people per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0.6 1 10
kable(t(as.matrix(summary(.n_people_not_close$pct))), 
  caption = "% of not so close people among social contact per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of not so close people among social contact per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 0 0 13.7 16.1 100 20

2.8.3.4 Among these people, who knows whom?

# extract number of who knows who relationships
.n_relat <- relationship %>%
  filter(relationship_type == 1) %>%
  dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_relat <- left_join(veritas_main[c("interact_id")], .n_people, by = "interact_id") %>%
  left_join(.n_relat, by = "interact_id") %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_relat = n.y) %>%
  mutate(pct = 100 * n_relat * 2 / (n_people * (n_people - 1))) # potential number of relationships = N x (N -1) / 2

kable(t(as.matrix(summary(.n_people_relat$n_relat))), 
  caption = "Number of relationships « who knows who » per participant",
  digits = 1
) %>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Number of relationships « who knows who » per participant
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 2.5 8.3 9 171
kable(t(as.matrix(summary(.n_people_relat$pct))), 
  caption = "% of relationships « who knows who » per participant",
  digits = 1
) %>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
% of relationships « who knows who » per participant
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
0 41.1 80 68.7 100 100 61

2.9 Derived metrics

2.9.1 Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

# code  en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99    Other

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "By car (driver)" = sum(location_tmode_1),
    "By car (passenger)" = sum(location_tmode_2),
    "By taxi/Uber" = sum(location_tmode_3),
    "On foot" = sum(location_tmode_4),
    "By bike" = sum(location_tmode_5),
    "By bus" = sum(location_tmode_6),
    "By train" = sum(location_tmode_7),
    "Other" = sum(location_tmode_99)
  )

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
description N By car (driver) By car (passenger) By taxi/Uber On foot By bike By bus By train Other
2 [Other residence] 11 3 5 1 2 3 5 1 1
3 [Work] 176 71 4 1 45 53 41 13 17
4 [School/College/University] 24 5 0 0 8 2 15 3 4
5 [Supermarket] 549 231 50 0 298 87 39 3 4
6 [Public/farmer’s market] 109 34 8 0 58 35 9 0 0
7 [Bakery] 117 27 3 0 85 18 5 0 1
8 [Specialty food store] 193 46 8 0 130 37 17 2 1
9 [Convenience store/Dépanneur] 50 13 1 0 40 1 1 0 0
10 [Liquor store/SAQ] 185 80 13 0 98 27 6 0 0
11 [Bank] 159 34 3 0 124 21 9 2 2
12 [Hair salon/barbershop] 149 59 6 0 76 23 26 2 3
13 [Post office] 139 23 0 0 121 21 3 0 0
14 [Drugstore] 192 59 6 0 145 26 8 0 0
15 [Doctor/healthcare provider] 211 88 7 0 93 34 49 7 2
16 [Public transit stop] 422 3 3 0 397 5 0 0 26
17 [Leisure-time physical activity] 301 109 22 0 141 87 17 6 10
18 [Park] 348 75 31 0 224 97 18 1 6
19 [Cultural activity] 169 55 34 2 50 26 56 4 1
20 [Volunteering place] 138 62 7 0 63 21 21 3 5
21 [Religious/spiritual activity] 46 22 4 0 18 8 9 1 0
22 [Restaurant, café, bar, etc.] 440 129 39 2 260 53 61 3 10
23 [Take-out] 132 42 8 0 74 6 9 1 15
24 [Walk] 364 51 15 0 313 39 10 0 5
25 [Other place] 146 67 13 0 63 30 28 9 7
26 [Social contact residence] 158 79 26 0 50 28 26 6 1
# graph
.tm1 <- .tm %>%
  filter(location_tmode_1 == 1) %>%
  mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
  filter(location_tmode_2 == 1) %>%
  mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
  filter(location_tmode_3 == 1) %>%
  mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
  filter(location_tmode_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode_6 == 1) %>%
  mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
  filter(location_tmode_7 == 1) %>%
  mutate(tm = "[7] By train")
.tm99 <- .tm %>%
  filter(location_tmode_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
  bind_rows(.tm3) %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm7) %>%
  bind_rows(.tm99)

# histogram of answers
ggplot(data = .tm) +
  geom_bar(aes(x = fct_rev(description), fill = tm), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of transportation mode by location category", x = element_blank()) +
  coord_flip() +
  theme(legend.position = "bottom", legend.justification = c(0, 0), legend.text = element_text(size = 8)) +
  guides(fill = guide_legend(nrow = 3))

2.9.2 Visiting places alone

Based on the answers to the question Do you usually go to this place alone or with other people?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.alone <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_alone_recode = case_when(
    location_alone == 1 ~ 1,
    location_alone == 2 ~ 0
  ))

.alone_grouped <- .alone %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), "Visited alone" = sum(location_alone_recode),
    "Visited alone (%)" = round(sum(location_alone_recode) * 100.0 / n(), digits = 1)
  )

kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places alone
description N Visited alone Visited alone (%)
2 [Other residence] 11 NA NA
3 [Work] 176 72 40.9
4 [School/College/University] 24 20 83.3
5 [Supermarket] 549 425 77.4
6 [Public/farmer’s market] 109 62 56.9
7 [Bakery] 117 89 76.1
8 [Specialty food store] 193 156 80.8
9 [Convenience store/Dépanneur] 50 45 90.0
10 [Liquor store/SAQ] 185 144 77.8
11 [Bank] 159 149 93.7
12 [Hair salon/barbershop] 149 140 94.0
13 [Post office] 139 132 95.0
14 [Drugstore] 192 170 88.5
15 [Doctor/healthcare provider] 211 194 91.9
16 [Public transit stop] 422 352 83.4
17 [Leisure-time physical activity] 301 150 49.8
18 [Park] 348 148 42.5
19 [Cultural activity] 169 44 26.0
20 [Volunteering place] 138 57 41.3
21 [Religious/spiritual activity] 46 15 32.6
22 [Restaurant, café, bar, etc.] 440 96 21.8
23 [Take-out] 132 65 49.2
24 [Walk] 364 189 51.9
25 [Other place] 146 82 56.2
26 [Social contact residence] 158 70 44.3
# histogram of answers
ggplot(data = .alone) +
  geom_bar(aes(x = fct_rev(description), fill = factor(location_alone)), position = "fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("N/A", "Alone", "With someone")) +
  scale_y_continuous(labels = percent) +
  labs(y = "Proportion of places visited alone", x = element_blank()) +
  coord_flip()

2.9.3 Visit frequency

Based on the answers to the question How often do you go there?.

loc_labels <- data.frame(location_category = c(2:26), description = c(
  " 2 [Other residence]",
  " 3 [Work]",
  " 4 [School/College/University]",
  " 5 [Supermarket]",
  " 6 [Public/farmer’s market]",
  " 7 [Bakery]",
  " 8 [Specialty food store]",
  " 9 [Convenience store/Dépanneur]",
  "10 [Liquor store/SAQ]",
  "11 [Bank]",
  "12 [Hair salon/barbershop]",
  "13 [Post office]",
  "14 [Drugstore]",
  "15 [Doctor/healthcare provider]",
  "16 [Public transit stop]",
  "17 [Leisure-time physical activity]",
  "18 [Park]",
  "19 [Cultural activity]",
  "20 [Volunteering place]",
  "21 [Religious/spiritual activity]",
  "22 [Restaurant, café, bar, etc.]",
  "23 [Take-out]",
  "24 [Walk]",
  "25 [Other place]",
  "26 [Social contact residence]"
))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(
    N = n(), min = min(location_freq_visit),
    max = max(location_freq_visit),
    mean = mean(location_freq_visit),
    median = median(location_freq_visit),
    sd = sd(location_freq_visit)
  )

kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
description N min max mean median sd
2 [Other residence] 11 2 208 75.454545 52 62.110166
3 [Work] 176 1 520 177.369318 208 115.107031
4 [School/College/University] 24 4 364 154.000000 130 102.007672
5 [Supermarket] 549 2 1040 62.704918 52 73.032792
6 [Public/farmer’s market] 109 1 364 44.963303 24 62.284432
7 [Bakery] 117 3 260 37.179487 24 41.135314
8 [Specialty food store] 193 3 1040 50.668394 24 86.380842
9 [Convenience store/Dépanneur] 50 1 520 59.740000 24 83.901986
10 [Liquor store/SAQ] 185 1 208 31.843243 24 34.100431
11 [Bank] 159 1 260 30.666667 24 36.516282
12 [Hair salon/barbershop] 149 1 36 6.953020 6 5.344024
13 [Post office] 139 2 104 14.589928 8 19.129541
14 [Drugstore] 192 1 208 42.151042 24 44.589613
15 [Doctor/healthcare provider] 211 1 104 5.535545 3 8.809757
16 [Public transit stop] 422 1 364 61.715640 24 83.875231
17 [Leisure-time physical activity] 301 2 364 102.458472 104 86.560590
18 [Park] 348 1 1560 74.514368 33 119.288874
19 [Cultural activity] 169 1 208 17.698225 6 31.598637
20 [Volunteering place] 138 1 5200 116.913044 52 445.811938
21 [Religious/spiritual activity] 46 4 364 76.260870 52 102.167930
22 [Restaurant, café, bar, etc.] 440 1 364 26.950000 12 43.894544
23 [Take-out] 132 2 364 25.037879 12 39.906648
24 [Walk] 364 1 1560 99.156593 52 136.668166
25 [Other place] 146 1 520 42.164384 24 63.740749
26 [Social contact residence] 158 3 312 36.645570 24 46.663325
# graph
ggplot(data = .freq) +
  geom_boxplot(aes(x = fct_rev(description), y = location_freq_visit)) +
  scale_y_continuous(limits = c(0, 365)) +
  labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x = element_blank()) +
  coord_flip()

2.9.4 Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

-- Reading Camille tbx indics from Essence table
SELECT interact_id,
  n_acti_places, n_weekly_vst, n_acti_types,
  cvx_perimeter, cvx_surface,
  min_length, max_length, median_length, 
  pct_visits_neighb, 
  n_acti_prn, pct_visits_prn, prn_area_km2
FROM essence_table.essence_perchoux_tbx
WHERE city_id = 'Vancouver' AND wave_id = 1

2.9.5 Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

-- Reading Alex tbx indics from Essence table
SELECT interact_id,
  people_degree, 
  socialize_size, socialize_meet, socialize_chat,
  important_size, group_degree, simmelian
FROM essence_table.essence_naud_social
WHERE city_id = 'Vancouver' AND wave_id = 1

2.9.5.1 Number of people in the network (people_degree)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = people_degree))

kable(t(as.matrix(summary(ess.tab.alex$people_degree))), caption = "people_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
people_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 4.257143 6 24

2.9.5.2 Simmelian Brokerage (simmelian)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = simmelian))

kable(t(as.matrix(summary(ess.tab.alex$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
1 2.763889 5.416667 8.721272 12.69375 42.61905 27

2.9.5.3 Number of people with whom the participant like to socialize (socialize_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = socialize_size))

kable(t(as.matrix(summary(ess.tab.alex$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 3 3.528571 5 19

2.9.5.4 Weekly face-to-face interactions among people with whom the participant like to socialize (socialize_meet)

ggplot(filter(ess.tab.alex, socialize_meet < 100)) +
  geom_histogram(aes(x = socialize_meet)) +
  annotate(geom = "text", x = 75, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_meet))), caption = "socialize_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_meet
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 106 364 750.6 930.5 11068

2.9.5.5 Weekly ICT interactions among people with whom the participant like to socialize (socialize_chat)

ggplot(filter(ess.tab.alex, socialize_chat < 100)) +
  geom_histogram(aes(x = socialize_chat)) +
  annotate(geom = "text", x = 55, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$socialize_chat))), caption = "socialize_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_chat
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 88 364 248547.8 647 5.2e+07

2.9.5.6 Number of people with whom the participant discuss important matters (important_size)

ggplot(ess.tab.alex) +
  geom_histogram(aes(x = important_size))

kable(t(as.matrix(summary(ess.tab.alex$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 1 2 2.714286 4 19

2.9.5.7 Number of people in all groups (group_degree)

ggplot(filter(ess.tab.alex, group_degree < 100)) +
  geom_histogram(aes(x = group_degree)) +
  annotate(geom = "text", x = 20, y = 100, label = "X-axis: values over 100 not displayed", alpha = .5)

kable(t(as.matrix(summary(ess.tab.alex$group_degree))), caption = "group_degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
group_degree
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 4 7.12381 11 63