VERITAS dataset description

Unlike the Eligibility or Health questionnaires, which can mostly be encoded as a flat table, the VERITAS questionnaire implicitly records a series of entities and their relationships:

Places: list of geocoded locations visited by participants, along with the following characteristics: category, name, visit frequency, transportation mode
Social contacts: people and/or groups frequented by participants
Relationships: between social contacts (who knows who / who belongs to which group) as well as between locations and social contacts (places visited along with whom)

The diagram below illustrates the various entities collected throught the VERITAS questionnaire:

Basic descriptive statistics

Section 1: Residence and Neighbourhood

Now, let’s start with your home. What is your address?

home_location <- locations[locations$location_category == 1,]

## version with leaflet || DO NOT USE -> LEAKS COORDINATES
# leaflet(data=st_cast(home_location, 'POINT'),
#              options = leafletOptions(zoomControl = FALSE, boxZoom = FALSE, scrollWheelZoom = FALSE,
#                                       dragging = FALSE, keyboard = FALSE,
#                                       minZoom = 10.5, maxZoom = 10.5)) %>%
#   addProviderTiles(providers$CartoDB.Positron) %>%
#   addCircleMarkers(radius = 3, stroke = FALSE, fillOpacity = 0.5)

## version with tmap
# tm_shape(home_location) + tm_dots() + 
#   tm_shape(basemap) + tm_fill() + tm_borders(alpha = 0.3) + 
#   tm_shape(home_location) + tm_dots(col = "blue", size = .3, alpha = .3) + 
#   tm_layout(frame = FALSE)

## version ggmap
van_aoi <- st_bbox(home_location)
names(van_aoi) <- c('left', 'bottom', 'right', 'top')
van_aoi[['left']] <- van_aoi[['left']] - .02
van_aoi[['right']] <- van_aoi[['right']] + .01
van_aoi[['top']] <- van_aoi[['top']] + .018
van_aoi[['bottom']] <- van_aoi[['bottom']] - .018

bm <- get_stamenmap(van_aoi, zoom = 11, maptype = "toner-lite") %>% ggmap(extent = 'device')
bm + geom_sf(data = st_jitter(home_location, .008), inherit.aes = FALSE , color = 'blue', size = 1.8, alpha = .3) #see https://github.com/r-spatial/sf/issues/336

NB: Home locations have been randomly shifted from their original position to protect privacy.

# Number of participants by municipalites
home_by_municipalites <- st_join(home_location, municipalities["NAME"])
home_by_mun_cnt <- home_by_municipalites %>% 
  group_by(NAME) %>% 
  dplyr::count() %>%
  arrange(desc(n), NAME)
home_by_mun_cnt$Shape <- NULL
kable(home_by_mun_cnt, caption = "Number of participants by municipalities") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of participants by municipalities
NAME	n
Vancouver	208
Burnaby	1
Greater Vancouver	1

When did you move to your current address?

#N of addresses by date of move
year_of_move <- veritas_main[c("interact_id", "home_move_date")]
year_of_move$home_move_date <- year(ymd(year_of_move$home_move_date))
ggplot(data=year_of_move) +
  geom_histogram(aes(x=home_move_date))

#recode date of move
year_of_move$home_move_date_recode <- as.character(year_of_move$home_move_date)
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2005] <- '2005 - 2001'
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 2000] <- '2000 - 1991'
year_of_move$home_move_date_recode[year_of_move$home_move_date <= 1990] <- paste('1990 -', min(year_of_move$home_move_date))

year_of_move_cnt <- year_of_move %>% 
  group_by(home_move_date_recode) %>% 
  dplyr::count() %>%
  arrange(desc(home_move_date_recode))
kable(year_of_move_cnt, caption = "Year of move to current address") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Year of move to current address
home_move_date_recode	n
2018	17
2017	8
2016	17
2015	7
2014	7
2013	8
2012	6
2011	6
2010	7
2009	7
2008	3
2007	7
2006	3
2005 - 2001	35
2000 - 1991	46
1990 - 1950	26

If you were asked to draw the boundaries of your neighbourhood, what would they be?

prn <- poly_geom[poly_geom$area_type == 'neighborhood',]

# Map
# leaflet(data=prn,
#         options = leafletOptions(zoomControl = FALSE, boxZoom = FALSE, scrollWheelZoom = FALSE,
#                                  dragging = FALSE, keyboard = FALSE,
#                                  minZoom = 10.5, maxZoom = 10.5)) %>%
#   addProviderTiles(providers$CartoDB.Positron) %>%
#   addPolygons(weight = 1, smoothFactor = 0.5,
#               opacity = 0.5, fillOpacity = 0.1)

## version with tmap
# tm_shape(home_location) + tm_dots() + 
#   tm_shape(basemap) + tm_fill() + tm_borders(alpha = 0.3) + 
#   tm_shape(prn) + tm_fill(col = "blue", alpha = .05) + tm_borders(col = "blue", alpha = 0.3) + 
#   tm_layout(frame = FALSE)

## version ggmap
bm + geom_sf(data = prn, inherit.aes = FALSE , fill = alpha('blue', 0.05), color = alpha('blue', 0.3))

# Min, max, median & mean area of PRN
prn$area_m2 <- st_area(prn$Shape)
kable(t(as.matrix(summary(prn$area_m2))), caption = "Area (in square meters) of the perceived residential neighborhood") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Area (in square meters) of the perceived residential neighborhood
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
741.3206	782788.9	1852841	3383486	4230928	26899228

NB only 179 valid neighborhoods were collected, as many participants struggled to draw polygons on the map.

How attached are you to your neighbourhood?

# extract and recode
.ngh_att <- veritas_main[veritas_main$neighbourhood_attach != 99,c('interact_id', 'neighbourhood_attach')] %>% dplyr::rename(neighbourhood_attach_code = neighbourhood_attach)
.ngh_att$neighbourhood_attach <- factor(ifelse(.ngh_att$neighbourhood_attach_code == 1, '1 [Not attached at all]',
                               ifelse(.ngh_att$neighbourhood_attach_code == 6, '6 [Very attached]',
                                      .ngh_att$neighbourhood_attach_code)))

# histogram of attachment
ggplot(data=.ngh_att) +
  geom_histogram(aes(x=neighbourhood_attach), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "neighbourhood_attach")

.ngh_att_cnt <- .ngh_att %>% 
  group_by(neighbourhood_attach) %>% 
  dplyr::count() %>%
  arrange(neighbourhood_attach)
kable(.ngh_att_cnt, caption = "Neigbourhood attachment") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Neigbourhood attachment
neighbourhood_attach	n
1 [Not attached at all]	1
2	6
3	16
4	31
5	88
6 [Very attached]	65

On average, how many hours per day do you spend outside of your home?

# histogram of n hours out
ggplot(data=veritas_main) +
  geom_histogram(aes(x=hours_out))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$hours_out))), caption = "Hours/day outside home") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Hours/day outside home
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1	4	6	6.976191	10	16

Of this time spent outside your home, on average how many hours do you spend outside your neighbourhood?

# histogram of n hours out
ggplot(data=veritas_main) +
  geom_histogram(aes(x=hours_out_neighb))

# Min, max, median & mean hours/day out of neighborhood
kable(t(as.matrix(summary(veritas_main$hours_out_neighb))), caption = "Hours/day outside neighbourhood") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Hours/day outside neighbourhood
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2	3	4.547619	8	15

Are there one or more areas close to where you live that you tend to avoid because you do not feel safe there (for any reason)?

# extract and recode
.unsafe <- veritas_main[c('interact_id', 'unsafe_area')] %>% dplyr::rename(unsafe_area_code = unsafe_area)
.unsafe$unsafe_area <- factor(ifelse(.unsafe$unsafe_area_code == 1, '1 [Yes]', 
                                    ifelse(.unsafe$unsafe_area_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.unsafe) +
  geom_histogram(aes(x=unsafe_area), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "unsafe_area")

.unsafe_cnt <- .unsafe %>% 
  group_by(unsafe_area) %>% 
  dplyr::count() %>%
  arrange(unsafe_area)
kable(.unsafe_cnt, caption = "unsafe_area") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

unsafe_area
unsafe_area	n
1 [Yes]	9
2 [No]	201

#map
unsafe <- poly_geom[poly_geom$area_type == 'unsafe area',]

# Map
# leaflet(data=unsafe,
#         options = leafletOptions(zoomControl = FALSE, boxZoom = FALSE, scrollWheelZoom = FALSE,
#                                  dragging = FALSE, keyboard = FALSE,
#                                  minZoom = 10.5, maxZoom = 10.5)) %>%
#   addProviderTiles(providers$CartoDB.Positron) %>%
#   addPolygons(weight = 1, smoothFactor = 0.5,
#               opacity = 0.5, fillOpacity = 0.1)

# tm_shape(home_location) + tm_dots() + 
#   tm_shape(basemap) + tm_fill() + tm_borders(alpha = 0.3) + 
#   tm_shape(unsafe) + tm_fill(col = "blue", alpha = .05) + tm_borders(col = "blue", alpha = 0.3) + 
#   tm_layout(frame = FALSE)

## version ggmap
bm + geom_sf(data = unsafe, inherit.aes = FALSE , fill = alpha('blue', 0.3), color = alpha('blue', 0.5))

# Min, max, median & mean area of PRN
unsafe$area_m2 <- st_area(unsafe$Shape)
kable(t(as.matrix(summary(unsafe$area_m2))), caption = "Area (in square meters) of the perceived unsafe area") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Area (in square meters) of the perceived unsafe area
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
8860.23	114094.5	204928.5	259178.2	319796.6	833831.4

Do you spend the night somewhere other than your home at least once per week?

# extract and recode
.o_res <- veritas_main[c('interact_id', 'other_resid')] %>% dplyr::rename(other_resid_code = other_resid)
.o_res$other_resid <- factor(ifelse(.o_res$other_resid_code == 1, '1 [Yes]', 
                                    ifelse(.o_res$other_resid_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.o_res) +
  geom_histogram(aes(x=other_resid), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other_resid")

.o_res_cnt <- .o_res %>% 
  group_by(other_resid) %>% 
  dplyr::count() %>%
  arrange(other_resid)
kable(.o_res_cnt, caption = "Other residence") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Other residence
other_resid	n
1 [Yes]	11
2 [No]	199

Section 2: Occupation

Are you currently working?

# extract and recode
.work <- veritas_main[c('interact_id', 'working')] %>% dplyr::rename(working_code = working)
.work$working <- factor(ifelse(.work$working_code == 1, '1 [Yes]', 
                                    ifelse(.work$working_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.work) +
  geom_histogram(aes(x=working), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "working")

.work_cnt <- .work %>% 
  group_by(working) %>% 
  dplyr::count() %>%
  arrange(working)
kable(.work_cnt, caption = "Currently working") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently working
working	n
1 [Yes]	128
2 [No]	82

Where do you work?

work_location <- locations[locations$location_category == 3,]

# leaflet(data=st_cast(work_location, 'POINT'),
#              options = leafletOptions(zoomControl = FALSE, boxZoom = FALSE, scrollWheelZoom = FALSE,
#                                       dragging = FALSE, keyboard = FALSE,
#                                       minZoom = 10.5, maxZoom = 10.5)) %>%
#   addProviderTiles(providers$CartoDB.Positron) %>%
#   addCircleMarkers(radius = 3, stroke = FALSE, fillOpacity = 0.5) %>%
#   setView(-73.65, 45.55, 10.5)

# tm_shape(home_location) + tm_dots() + 
#   tm_shape(basemap) + tm_fill() + tm_borders(alpha = 0.3) + 
#   tm_shape(work_location) + tm_dots(col = "blue", size = .3, alpha = .3) + 
#   tm_layout(frame = FALSE)

bm + geom_sf(data = work_location, inherit.aes = FALSE , color = 'blue', size = 1.8, alpha = .3)

On average, how many hours per week do you work?

# histogram of n hours out
ggplot(data=veritas_main[veritas_main$working == 1,]) +
  geom_histogram(aes(x=work_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$work_hours[veritas_main$working == 1]))), caption = "Work hours/week") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Work hours/week
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
2	24.75	37	32.66016	40	84

Which of the following categories best describes the amount of physical activity required for your job?

# extract and recode
.work_pa <- veritas_main[veritas_main$working == 1,c('interact_id', 'work_pa')] %>% dplyr::rename(work_pa_code = work_pa)
.work_pa$work_pa <- factor(ifelse(.work_pa$work_pa_code == 1, '1 [Mainly sitting with slight arm movements]',
                               ifelse(.work_pa$work_pa_code == 2, '2 [Sitting and standing with some walking]',
                                      ifelse(.work_pa$work_pa_code == 3, '3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)]',
                                             ifelse(.work_pa$work_pa_code == 4, '4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)]', 'N/A')))))

# histogram of answers
ggplot(data=.work_pa) +
  geom_histogram(aes(x=work_pa), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Physical activity at work")

.work_pa_cnt <- .work_pa %>% 
  group_by(work_pa) %>% 
  dplyr::count() %>%
  arrange(work_pa)
kable(.work_pa_cnt, caption = "Physical activity at work") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Physical activity at work
work_pa	n
1 [Mainly sitting with slight arm movements]	55
2 [Sitting and standing with some walking]	57
3 [Walking, with some handling of materials generally weighing less than 25 kg (55 lbs)]	15
4 [Walking and heavy manual work often requiring handling of materials weighing over 25 kg (50 lbs)]	1

Are you currently a registered student?

# extract and recode
.study <- veritas_main[c('interact_id', 'studying')] %>% dplyr::rename(studying_code = studying)
.study$studying <- factor(ifelse(.study$studying_code == 1, '1 [Yes]', 
                                    ifelse(.study$studying_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.study) +
  geom_histogram(aes(x=studying), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Studying")

.study_cnt <- .study %>% 
  group_by(studying) %>% 
  dplyr::count() %>%
  arrange(studying)
kable(.study_cnt, caption = "Currently studying") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Currently studying
studying	n
1 [Yes]	17
2 [No]	193

Where do you study?

study_location <- locations[locations$location_category == 4,]

# leaflet(data=st_cast(study_location, 'POINT'),
#              options = leafletOptions(zoomControl = FALSE, boxZoom = FALSE, scrollWheelZoom = FALSE,
#                                       dragging = FALSE, keyboard = FALSE,
#                                       minZoom = 10.5, maxZoom = 10.5)) %>%
#   addProviderTiles(providers$CartoDB.Positron) %>%
#   addCircleMarkers(radius = 3, stroke = FALSE, fillOpacity = 0.5) %>%
#   setView(-73.65, 45.55, 10.5)

# tm_shape(home_location) + tm_dots() + 
#   tm_shape(basemap) + tm_fill() + tm_borders(alpha = 0.3) + 
#   tm_shape(study_location) + tm_dots(col = "blue", size = .3, alpha = .3) + 
#   tm_layout(frame = FALSE)

bm + geom_sf(data = study_location, inherit.aes = FALSE , color = 'blue', size = 1.8, alpha = .3)

On average, how many hours per week do you study?

# histogram of n hours out
ggplot(data=veritas_main[veritas_main$studying == 1,]) +
  geom_histogram(aes(x=study_hours))

# Min, max, median & mean hours/day out
kable(t(as.matrix(summary(veritas_main$study_hours[veritas_main$studying == 1]))), caption = "study hours/week") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

study hours/week
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
2	12	20	23.58824	30	60

Section 3: Shopping activities

The following questions are used to generate the locations grouped into this section:

Do you shop for groceries at a supermarket at least once per month?
Do you shop at a public/farmer’s market at least once per month?
Do you shop at a bakery at least once per month?
Do you go to a specialty food store at least once per month? For example: a cheese shop, fruit and vegetable store, butcher’s shop, natural and health food store.
Do you go to a convenience store at least once per month?
Do you go to a liquor store at least once per month?

shop_location <- locations[locations$location_category %in% c(5, 6, 7, 8, 9, 10),] %>% dplyr::rename(location_category_code = location_category)
shop_location$location_category <- factor(ifelse(shop_location$location_category_code == 5, ' 5 [Supermarket]',
                                                 ifelse(shop_location$location_category_code == 6, ' 6 [Public/farmer’s market]',
                                                        ifelse(shop_location$location_category_code == 7, ' 7 [Bakery]',
                                                               ifelse(shop_location$location_category_code == 8, ' 8 [Specialty food store]',
                                                                      ifelse(shop_location$location_category_code == 9, ' 9 [Convenience store/Dépanneur]','10 [Liquor store/SAQ]'))))))

# map
bm + geom_sf(data = shop_location, inherit.aes = FALSE , aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size=8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data=shop_location) +
  geom_histogram(aes(x=location_category), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(shop_location[c('location_category')]) %>% 
  group_by(location_category) %>% 
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
5 [Supermarket]	549
6 [Public/farmer’s market]	109
7 [Bakery]	117
8 [Specialty food store]	193
9 [Convenience store/Dépanneur]	50
10 [Liquor store/SAQ]	185

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(shop_location[c('interact_id', 'location_category')]) %>% 
  group_by(interact_id, location_category) %>% 
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(interact_iid = character(),
                     location_category = character())
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- distinct(.loc_iid_category_cnt[c('location_category')])
  .dmy$interact_id <- as.character(iid)
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_shop_iid <- dplyr::setdiff(.dummy,  .loc_iid_category_cnt[c('location_category','interact_id')]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_shop_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of shopping locations by participant and category
location_category	mean	median	max
5 [Supermarket]	2.61	3	5
6 [Public/farmer’s market]	0.52	0	5
7 [Bakery]	0.56	0	4
8 [Specialty food store]	0.92	0	5
9 [Convenience store/Dépanneur]	0.24	0	4
10 [Liquor store/SAQ]	0.88	1	5

Section 4: Services

The following questions are used to generate the locations grouped into this section:

Where is the bank you go to most often located?
Where is the hair salon or barber shop you go to most often?
Where is the post office where you go to most often?
Where is the drugstore you go to most often?
If you need to visit a doctor or other healthcare provider, where do you go most often?

serv_location <- locations[locations$location_category %in% c(11, 12, 13, 14, 15),] %>% dplyr::rename(location_category_code = location_category)
serv_location$location_category <- factor(ifelse(serv_location$location_category_code == 11, '11 [Bank]',
                                                 ifelse(serv_location$location_category_code == 12, '12 [Hair salon/barbershop]',
                                                        ifelse(serv_location$location_category_code == 13, '13 [Post office]',
                                                               ifelse(serv_location$location_category_code == 14, '14 [Drugstore]', '15 Doctor/healthcare provider]')))))

# map
bm + geom_sf(data = serv_location, inherit.aes = FALSE , aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size=8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data=serv_location) +
  geom_histogram(aes(x=location_category), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Shopping locations by categories")

.location_category_cnt <- as.data.frame(serv_location[c('location_category')]) %>% 
  group_by(location_category) %>% 
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
11 [Bank]	159
12 [Hair salon/barbershop]	149
13 [Post office]	139
14 [Drugstore]	192
15 Doctor/healthcare provider]	211

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(serv_location[c('interact_id', 'location_category')]) %>% 
  group_by(interact_id, location_category) %>% 
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(interact_iid = character(),
                     location_category = character())
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- distinct(.loc_iid_category_cnt[c('location_category')])
  .dmy$interact_id <- as.character(iid)
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_serv_iid <- dplyr::setdiff(.dummy,  .loc_iid_category_cnt[c('location_category','interact_id')]) %>%
  mutate(n = 0)
.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_serv_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of shopping locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of shopping locations by participant and category
location_category	mean	median	max
11 [Bank]	0.76	1	1
12 [Hair salon/barbershop]	0.71	1	1
13 [Post office]	0.66	1	1
14 [Drugstore]	0.91	1	1
15 Doctor/healthcare provider]	1.00	1	4

Section 5: Transportation

Do you use public transit from your home?

# extract and recode
.transp <- veritas_main[c('interact_id', 'public_transit')] %>% dplyr::rename(public_transit_code = public_transit)
.transp$public_transit <- factor(ifelse(.transp$public_transit_code == 1, '1 [Yes]', 
                                    ifelse(.transp$public_transit_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.transp) +
  geom_histogram(aes(x=public_transit), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "public_transit")

.transp_cnt <- .transp %>% 
  group_by(public_transit) %>% 
  dplyr::count() %>%
  arrange(public_transit)
kable(.transp_cnt, caption = "Use public transit") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Use public transit
public_transit	n
1 [Yes]	179
2 [No]	31

Where are the public transit stops that you access from your home?

transp_location <- locations[locations$location_category == 16,]

bm + geom_sf(data = transp_location, inherit.aes = FALSE , color = 'blue', size = 1.8, alpha = .3)

Section 6: Leisure activities

The following questions are used to generate the locations grouped into this section:

Do you participate in any (individual or group) sports or leisure-time physical activities at least once per month?
Do you visit a park at least once per month?
Do you participate in or attend as a spectator a cultural or non-sport leisure activity at least once per month? For example: singing or drawing lessons, book or poker club, concert or play.
Do you volunteer at least once per month?
Do you engage in any religious or spiritual activities at least once per month?
Do you go to a restaurant, café, bar or other food and drink establishment at least once per month?
Do you get take-out food at least once per month?
Do you regularly go for walks?

leisure_location <- locations[locations$location_category %in% c(17, 18, 19, 20, 21, 22, 23, 24),] %>% dplyr::rename(location_category_code = location_category)
leisure_location$location_category <- factor(ifelse(leisure_location$location_category_code == 17, '17 [Leisure-time physical activity]',
                                                    ifelse(leisure_location$location_category_code == 18, '18 [Park]',
                                                      ifelse(leisure_location$location_category_code == 19, '19 [Cultural activity]',
                                                        ifelse(leisure_location$location_category_code == 20, '20 [Volunteering place]',
                                                               ifelse(leisure_location$location_category_code == 21, '21 [Religious or spiritual activity]',
                                                                      ifelse(leisure_location$location_category_code == 22, '22 [Restaurant, café, bar, etc. ]',
                                                                             ifelse(leisure_location$location_category_code == 23, '23 [Take-out]', '24 [Walk]'))))))))

# map
bm + geom_sf(data = leisure_location, inherit.aes = FALSE , aes(color = location_category), size = 1.5, alpha = .3) +
  scale_color_brewer(palette = "Accent") +
  theme(legend.position = "bottom", legend.text = element_text(size=8), legend.title = element_blank())

# compute number of shopping locations by category
ggplot(data=leisure_location) +
  geom_histogram(aes(x=location_category), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "Leisure locations by categories")

.location_category_cnt <- as.data.frame(leisure_location[c('location_category')]) %>% 
  group_by(location_category) %>% 
  dplyr::count() %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Shopping locations by categories") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Shopping locations by categories
location_category	n
17 [Leisure-time physical activity]	301
18 [Park]	348
19 [Cultural activity]	169
20 [Volunteering place]	138
21 [Religious or spiritual activity]	46
22 [Restaurant, café, bar, etc. ]	440
23 [Take-out]	132
24 [Walk]	364

# compute statistics on shopping locations by participants and categories
# > one needs to account for participants who did not report location for some categories
.loc_iid_category_cnt <- as.data.frame(leisure_location[c('interact_id', 'location_category')]) %>% 
  group_by(interact_id, location_category) %>% 
  dplyr::count()

# (cont'd) simulate SQL JOIN TABLE ON TRUE
.dummy <- data.frame(interact_iid = character(),
                     location_category = character())
for (iid in as.vector(veritas_main$interact_id)) {
  .dmy <- distinct(.loc_iid_category_cnt[c('location_category')])
  .dmy$interact_id <- as.character(iid)
  .dummy <- rbind(.dummy, .dmy)
}

# (cont'd) find iid/categ combination without match in veritas locations
.no_leisure_iid <- dplyr::setdiff(.dummy,  .loc_iid_category_cnt[c('location_category','interact_id')]) %>%
  mutate(n = 0)

.loc_iid_category_cnt <- bind_rows(.loc_iid_category_cnt, .no_leisure_iid)

.location_category_cnt <- .loc_iid_category_cnt %>%
  group_by(location_category) %>%
  dplyr::summarise(min = min(n), mean = round(mean(n), 2), median = median(n), max = max(n)) %>%
  arrange(location_category)
kable(.location_category_cnt, caption = "Number of leisure locations by participant and category") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of leisure locations by participant and category
location_category	mean	median	max
17 [Leisure-time physical activity]	1.43	1	5
18 [Park]	1.66	1	5
19 [Cultural activity]	0.80	0	5
20 [Volunteering place]	0.66	0	5
21 [Religious or spiritual activity]	0.22	0	3
22 [Restaurant, café, bar, etc. ]	2.10	2	5
23 [Take-out]	0.63	0	5
24 [Walk]	1.73	1	5

Section 7: Other places/activities

Are there other places that you go to at least once per month that we have not mentioned? For example: a mall, a daycare, a hardware store, or a community center.

# extract and recode
.other <- veritas_main[c('interact_id', 'other')] %>% dplyr::rename(other_code = other)
.other$other <- factor(ifelse(.other$other_code == 1, '1 [Yes]', 
                                    ifelse(.other$other_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.other) +
  geom_histogram(aes(x=other), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "other")

.other_cnt <- .other %>% 
  group_by(other) %>% 
  dplyr::count() %>%
  arrange(other)
kable(.other_cnt, caption = "Other places") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Other places
other	n
1 [Yes]	85
2 [No]	125

Can you locate this place?

other_location <- locations[locations$location_category == 25,]

bm + geom_sf(data = other_location, inherit.aes = FALSE , color = 'blue', size = 1.8, alpha = .3)

Section 9: Social contact

Do you visit anyone at his or her home at least once per month?

# extract and recode
.visiting <- veritas_main[c('interact_id', 'visiting')] %>% dplyr::rename(visiting_code = visiting)
.visiting$visiting <- factor(ifelse(.visiting$visiting_code == 1, '1 [Yes]', 
                                    ifelse(.visiting$visiting_code == 2, '2 [No]', 'N/A')))

# histogram of answers
ggplot(data=.visiting) +
  geom_histogram(aes(x=visiting), stat="count") +
  scale_x_discrete(labels = function(lbl) str_wrap(lbl, width = 20)) +
  labs(x = "visiting")

.visiting_cnt <- .visiting %>% 
  group_by(visiting) %>% 
  dplyr::count() %>%
  arrange(visiting)
kable(.visiting_cnt, caption = "Social contact") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Social contact
visiting	n
1 [Yes]	115
2 [No]	95

Where does this person live?

visiting_location <- locations[locations$location_category == 26,]

bm + geom_sf(data = st_jitter(visiting_location, .008), inherit.aes = FALSE , color = 'blue', size = 1.8, alpha = .3)

NB: Visiting locations have been randomly shifted from their original position to protect privacy.

Great, we are almost done completing this questionnaire. You have documented all your activity places on a map, and specified with whom you generally do these activities. These last few questions concern the people you documented earlier.

# compute statistics on groups / participant
# > one needs to account for participants who did not report any group
.gr_iid_cnt <- as.data.frame(group[c('interact_id')]) %>% 
  group_by(interact_id) %>% 
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_gr_iid <- anti_join(veritas_main[c('interact_id')], .gr_iid_cnt, by = 'interact_id') %>%
  mutate(n = 0)
.gr_iid_cnt <- bind_rows(.gr_iid_cnt, .no_gr_iid)

kable(t(as.matrix(summary(.gr_iid_cnt$n))), caption = "Number of groups per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of groups per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	1	1.5	2	8

# compute statistics on people / participant
# > one needs to account for participants who did not report any group
.pl_iid_cnt <- as.data.frame(people[c('interact_id')]) %>% 
  group_by(interact_id) %>% 
  dplyr::count()

# (cont'd) find iid combination without match in veritas group
.no_pl_iid <- anti_join(veritas_main[c('interact_id')], .pl_iid_cnt, by = 'interact_id') %>%
  mutate(n = 0)
.pl_iid_cnt <- bind_rows(.pl_iid_cnt, .no_pl_iid)

kable(t(as.matrix(summary(.pl_iid_cnt$n))), caption = "Number of people per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	4.257143	6	24

# histogram
.sc_iid_cnt <- .pl_iid_cnt %>% mutate(soc_type = 'people')
.sc_iid_cnt <- .gr_iid_cnt %>% mutate(soc_type = 'group') %>%
  bind_rows(.sc_iid_cnt)
  
ggplot(data = .sc_iid_cnt) + 
  geom_histogram(aes(x=n, y=stat(count), fill = soc_type), position = 'dodge') +
  labs(x="Social network size by element type", fill = element_blank())

Among these people, who do you discuss important matters with?

# extract number of important people / participant
.n_important <- important %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_imp <- left_join(veritas_main[c('interact_id')], .n_people, by="interact_id") %>%
  left_join(.n_important, by="interact_id") %>%
  mutate_all(~replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_important = n.y) %>%
  mutate(pct = 100 * n_important / n_people)

kable(t(as.matrix(summary(.n_people_imp$n_important))), caption = "Number of important people per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of important people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	2	2.714286	4	19

kable(t(as.matrix(summary(.n_people_imp$pct))), caption = "% of important people among social contact per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of important people among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	45.3125	80	70.65076	100	100	20

Among these people, who do you like to socialize with?

# extract number of important people / participant
.n_socialize <- socialize %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_soc <- left_join(veritas_main[c('interact_id')], .n_people, by="interact_id") %>%
  left_join(.n_socialize, by="interact_id") %>%
  mutate_all(~replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_socialize = n.y) %>%
  mutate(pct = 100 * n_socialize / n_people)

kable(t(as.matrix(summary(.n_people_soc$n_socialize))), caption = "Number of people with whom to socialize per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of people with whom to socialize per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	3.528571	5	19

kable(t(as.matrix(summary(.n_people_soc$pct))), caption = "% of people with whom to  socialize among social contact per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of people with whom to socialize among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	75	100	84.31105	100	100	20

Among these people, who do you meet often with but do not necessarily feel close to?

# extract number of important people / participant
.n_not_close <- not_close %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_not_close <- left_join(veritas_main[c('interact_id')], .n_people, by="interact_id") %>%
  left_join(.n_not_close, by="interact_id") %>%
  mutate_all(~replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_not_close = n.y) %>%
  mutate(pct = 100 * n_not_close / n_people)

kable(t(as.matrix(summary(.n_people_not_close$n_not_close))), caption = "Number of not so close people per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of not so close people per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	0	0.5714286	1	10

kable(t(as.matrix(summary(.n_people_not_close$pct))), caption = "% of not so close people among social contact per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of not so close people among social contact per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0	0	13.71603	16.07143	100	20

Among these people, who knows whom?

# extract number of who knows who relationships
.n_relat <- relationship %>% filter(relationship_type == 1) %>% dplyr::count(interact_id)
.n_people <- people %>% dplyr::count(interact_id)

.n_people_relat <- left_join(veritas_main[c('interact_id')], .n_people, by="interact_id") %>%
  left_join(.n_relat, by="interact_id") %>%
  mutate_all(~replace(., is.na(.), 0)) %>%
  dplyr::rename(n_people = n.x, n_relat = n.y) %>%
  mutate(pct = 100 * n_relat * 2 / (n_people * (n_people - 1))) #potential number of relationships = N x (N -1) / 2

kable(t(as.matrix(summary(.n_people_relat$n_relat))), caption = "Number of relationships « who knows who » per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Number of relationships « who knows who » per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	2.5	8.276191	9	171

kable(t(as.matrix(summary(.n_people_relat$pct))), caption = "% of relationships « who knows who » per participant") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

% of relationships « who knows who » per participant
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	41.05263	80	68.74559	100	100	61

Derived metrics

Transportation mode preferences

Based on the answers to the question Usually, how do you go there? (Check all that apply.).

# code  en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99    Other

loc_labels = data.frame(location_category=c(2:26), description=c(" 2 [Other residence]",
    " 3 [Work]",
    " 4 [School/College/University]",
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]",
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]",
    "16 [Public transit stop]",
    "17 [Leisure-time physical activity]",
    "18 [Park]",
    "19 [Cultural activity]",
    "20 [Volunteering place]",
    "21 [Religious/spiritual activity]",
    "22 [Restaurant, café, bar, etc.]",
    "23 [Take-out]",
    "24 [Walk]",
    "25 [Other place]",
    "26 [Social contact residence]"))

# extract and summary stats
.tm <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)

.tm_grouped <- .tm %>%
  group_by(description) %>%
  dplyr::summarise(N=n(), "By car (driver)"=sum(location_tmode_1),
                   "By car (passenger)"=sum(location_tmode_2),
                   "By taxi/Uber"=sum(location_tmode_3),
                   "On foot"=sum(location_tmode_4),
                   "By bike"=sum(location_tmode_5),
                   "By bus"=sum(location_tmode_6),
                   "By train"=sum(location_tmode_7),
                   "Other"=sum(location_tmode_99))

kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Transportation mode preferences
description	N	By car (driver)	By car (passenger)	By taxi/Uber	On foot	By bike	By bus	By train	Other
2 [Other residence]	11	3	5	1	2	3	5	1	1
3 [Work]	176	71	4	1	45	53	41	13	17
4 [School/College/University]	24	5	0	0	8	2	15	3	4
5 [Supermarket]	549	231	50	0	298	87	39	3	4
6 [Public/farmer’s market]	109	34	8	0	58	35	9	0	0
7 [Bakery]	117	27	3	0	85	18	5	0	1
8 [Specialty food store]	193	46	8	0	130	37	17	2	1
9 [Convenience store/Dépanneur]	50	13	1	0	40	1	1	0	0
10 [Liquor store/SAQ]	185	80	13	0	98	27	6	0	0
11 [Bank]	159	34	3	0	124	21	9	2	2
12 [Hair salon/barbershop]	149	59	6	0	76	23	26	2	3
13 [Post office]	139	23	0	0	121	21	3	0	0
14 [Drugstore]	192	59	6	0	145	26	8	0	0
15 [Doctor/healthcare provider]	211	88	7	0	93	34	49	7	2
16 [Public transit stop]	422	3	3	0	397	5	0	0	26
17 [Leisure-time physical activity]	301	109	22	0	141	87	17	6	10
18 [Park]	348	75	31	0	224	97	18	1	6
19 [Cultural activity]	169	55	34	2	50	26	56	4	1
20 [Volunteering place]	138	62	7	0	63	21	21	3	5
21 [Religious/spiritual activity]	46	22	4	0	18	8	9	1	0
22 [Restaurant, café, bar, etc.]	440	129	39	2	260	53	61	3	10
23 [Take-out]	132	42	8	0	74	6	9	1	15
24 [Walk]	364	51	15	0	313	39	10	0	5
25 [Other place]	146	67	13	0	63	30	28	9	7
26 [Social contact residence]	158	79	26	0	50	28	26	6	1

#graph
.tm1 <- .tm %>%
  filter(location_tmode_1 == 1) %>%
  mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
  filter(location_tmode_2 == 1) %>%
  mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
  filter(location_tmode_3 == 1) %>%
  mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
  filter(location_tmode_4 == 1) %>%
  mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
  filter(location_tmode_5 == 1) %>%
  mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
  filter(location_tmode_6 == 1) %>%
  mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
  filter(location_tmode_7 == 1) %>%
  mutate(tm = "[7] By train")
.tm99 <- .tm %>%
  filter(location_tmode_99 == 1) %>%
  mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
  bind_rows(.tm3) %>%
  bind_rows(.tm4) %>%
  bind_rows(.tm5) %>%
  bind_rows(.tm6) %>%
  bind_rows(.tm7) %>%
  bind_rows(.tm99)
  
# histogram of answers
ggplot(data=.tm) +
  geom_bar(aes(x=fct_rev(description), fill=tm), position="fill") +
  scale_fill_brewer(palette = "Set3", name = "Transport modes") + 
  scale_y_continuous(labels = percent) + 
  labs(y = "Proportion of transportation mode by location category", x=element_blank()) + 
  coord_flip() +
  theme(legend.position = "bottom", legend.justification=c(0,0), legend.text = element_text(size=8)) + 
  guides(fill=guide_legend(nrow = 3))

Visiting places alone

Based on the answers to the question Do you usually go to this place alone or with other people?.

loc_labels = data.frame(location_category=c(2:26), description=c(" 2 [Other residence]",
    " 3 [Work]",
    " 4 [School/College/University]",
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]",
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]",
    "16 [Public transit stop]",
    "17 [Leisure-time physical activity]",
    "18 [Park]",
    "19 [Cultural activity]",
    "20 [Volunteering place]",
    "21 [Religious/spiritual activity]",
    "22 [Restaurant, café, bar, etc.]",
    "23 [Take-out]",
    "24 [Walk]",
    "25 [Other place]",
    "26 [Social contact residence]"))

# extract and summary stats
.alone <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels) %>%
  mutate(location_alone_recode=case_when(location_alone == 1 ~ 1,
                   location_alone == 2 ~ 0))
  
.alone_grouped <- .alone %>%
  group_by(description) %>%
  dplyr::summarise(N=n(), "Visited alone"=sum(location_alone_recode),
                   "Visited alone (%)"=round(sum(location_alone_recode)*100.0/n(), digits=1))

kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visiting places alone
description	N	Visited alone	Visited alone (%)
2 [Other residence]	11	NA	NA
3 [Work]	176	72	40.9
4 [School/College/University]	24	20	83.3
5 [Supermarket]	549	425	77.4
6 [Public/farmer’s market]	109	62	56.9
7 [Bakery]	117	89	76.1
8 [Specialty food store]	193	156	80.8
9 [Convenience store/Dépanneur]	50	45	90.0
10 [Liquor store/SAQ]	185	144	77.8
11 [Bank]	159	149	93.7
12 [Hair salon/barbershop]	149	140	94.0
13 [Post office]	139	132	95.0
14 [Drugstore]	192	170	88.5
15 [Doctor/healthcare provider]	211	194	91.9
16 [Public transit stop]	422	352	83.4
17 [Leisure-time physical activity]	301	150	49.8
18 [Park]	348	148	42.5
19 [Cultural activity]	169	44	26.0
20 [Volunteering place]	138	57	41.3
21 [Religious/spiritual activity]	46	15	32.6
22 [Restaurant, café, bar, etc.]	440	96	21.8
23 [Take-out]	132	65	49.2
24 [Walk]	364	189	51.9
25 [Other place]	146	82	56.2
26 [Social contact residence]	158	70	44.3

# histogram of answers
ggplot(data=.alone) +
  geom_bar(aes(x=fct_rev(description), fill=factor(location_alone)), position="fill") +
  scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("N/A", 'Alone', "With someone")) + 
  scale_y_continuous(labels = percent) + 
  labs(y = "Proportion of places visited alone", x=element_blank()) + 
  coord_flip()

Visit frequency

Based on the answers to the question How often do you go there?.

loc_labels = data.frame(location_category=c(2:26), description=c(" 2 [Other residence]",
    " 3 [Work]",
    " 4 [School/College/University]",
    " 5 [Supermarket]",
    " 6 [Public/farmer’s market]",
    " 7 [Bakery]",
    " 8 [Specialty food store]",
    " 9 [Convenience store/Dépanneur]",
    "10 [Liquor store/SAQ]",
    "11 [Bank]",
    "12 [Hair salon/barbershop]",
    "13 [Post office]",
    "14 [Drugstore]",
    "15 [Doctor/healthcare provider]",
    "16 [Public transit stop]",
    "17 [Leisure-time physical activity]",
    "18 [Park]",
    "19 [Cultural activity]",
    "20 [Volunteering place]",
    "21 [Religious/spiritual activity]",
    "22 [Restaurant, café, bar, etc.]",
    "23 [Take-out]",
    "24 [Walk]",
    "25 [Other place]",
    "26 [Social contact residence]"))

# extract and summary stats
.freq <- locations %>%
  st_set_geometry(NULL) %>%
  filter(location_category != 1) %>%
  left_join(loc_labels)
  
.freq_grouped <- .freq %>%
  group_by(description) %>%
  dplyr::summarise(N=n(), min=min(location_freq_visit),
                   max=max(location_freq_visit),
                   mean=mean(location_freq_visit),
                   median=median(location_freq_visit),
                   sd=sd(location_freq_visit))

kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Visit frequency (expressed in times/year)
description	N	min	max	mean	median	sd
2 [Other residence]	11	2	208	75.454545	52	62.110166
3 [Work]	176	1	520	177.369318	208	115.107031
4 [School/College/University]	24	4	364	154.000000	130	102.007672
5 [Supermarket]	549	2	1040	62.704918	52	73.032792
6 [Public/farmer’s market]	109	1	364	44.963303	24	62.284432
7 [Bakery]	117	3	260	37.179487	24	41.135314
8 [Specialty food store]	193	3	1040	50.668394	24	86.380842
9 [Convenience store/Dépanneur]	50	1	520	59.740000	24	83.901986
10 [Liquor store/SAQ]	185	1	208	31.843243	24	34.100431
11 [Bank]	159	1	260	30.666667	24	36.516282
12 [Hair salon/barbershop]	149	1	36	6.953020	6	5.344024
13 [Post office]	139	2	104	14.589928	8	19.129541
14 [Drugstore]	192	1	208	42.151042	24	44.589613
15 [Doctor/healthcare provider]	211	1	104	5.535545	3	8.809757
16 [Public transit stop]	422	1	364	61.715640	24	83.875231
17 [Leisure-time physical activity]	301	2	364	102.458472	104	86.560590
18 [Park]	348	1	1560	74.514368	33	119.288874
19 [Cultural activity]	169	1	208	17.698225	6	31.598637
20 [Volunteering place]	138	1	5200	116.913044	52	445.811938
21 [Religious/spiritual activity]	46	4	364	76.260870	52	102.167930
22 [Restaurant, café, bar, etc.]	440	1	364	26.950000	12	43.894544
23 [Take-out]	132	2	364	25.037879	12	39.906648
24 [Walk]	364	1	1560	99.156593	52	136.668166
25 [Other place]	146	1	520	42.164384	24	63.740749
26 [Social contact residence]	158	3	312	36.645570	24	46.663325

#graph
ggplot(data=.freq) +
  geom_boxplot(aes(x=fct_rev(description), y=location_freq_visit)) +
  scale_y_continuous(limits = c(0, 365)) +
  labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x=element_blank()) + 
  coord_flip()

Spatial indicators: Camille Perchoux’s toolbox

Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).

py_config()

## python:         C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/python.exe
## libpython:      C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/python36.dll
## pythonhome:     C:\PROGRA~1\ArcGIS\Pro\bin\Python\envs\ARCGIS~1
## version:        3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]
## Architecture:   64bit
## numpy:          C:\PROGRA~1\ArcGIS\Pro\bin\Python\envs\ARCGIS~1\lib\site-packages\numpy
## numpy_version:  1.16.2
## 
## NOTE: Python version was forced by use_python function

import arcpy
import pandas
from time import perf_counter

arcpy.env.workspace = r"E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb"
src_loc = "veritas_1van_location" #'test_location' #
src_poly = "veritas_1van_poly_geom" #'test_poly_geom' #
src_loc_proj = src_loc + "_proj"
src_poly_proj = src_poly + "_proj"
src_prn = src_loc.replace('location', 'prn')
dst_ll = src_loc.replace('location', 'Indicator_Lifestyle') #"camille_LifestyleIndicator" #
dst_as = src_loc.replace('location', 'Indicator_ActivitySpace') #"camille_ActivitySpaceIndicator" #
dst_rn = src_loc.replace('location', 'Indicator_ResidentialNghd') #"camille_ResidentialNeighborhoodIndicator" #

#check that we already have the results
done = False
done = arcpy.Exists(dst_ll) and arcpy.Exists(dst_as) and arcpy.Exists(dst_rn)

if not done:
  c0 = perf_counter()

  # --- Project data set
  arcpy.Project_management(in_dataset=src_loc, out_dataset=src_loc_proj, out_coor_system="PROJCS['NAD_1983_CSRS_Statistics_Canada_Lambert',GEOGCS['GCS_North_American_1983_CSRS',DATUM['D_North_American_1983_CSRS',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Lambert_Conformal_Conic'],PARAMETER['False_Easting',6200000.0],PARAMETER['False_Northing',3000000.0],PARAMETER['Central_Meridian',-91.86666666666666],PARAMETER['Standard_Parallel_1',49.0],PARAMETER['Standard_Parallel_2',77.0],PARAMETER['Latitude_Of_Origin',63.390675],UNIT['Meter',1.0]]", transform_method="NAD_1983_CSRS_To_WGS_1984_2", in_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", preserve_shape="NO_PRESERVE_SHAPE", max_deviation="", vertical="NO_VERTICAL")
  
  arcpy.Project_management(in_dataset=src_poly, out_dataset=src_poly_proj, out_coor_system="PROJCS['NAD_1983_CSRS_Statistics_Canada_Lambert',GEOGCS['GCS_North_American_1983_CSRS',DATUM['D_North_American_1983_CSRS',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Lambert_Conformal_Conic'],PARAMETER['False_Easting',6200000.0],PARAMETER['False_Northing',3000000.0],PARAMETER['Central_Meridian',-91.86666666666666],PARAMETER['Standard_Parallel_1',49.0],PARAMETER['Standard_Parallel_2',77.0],PARAMETER['Latitude_Of_Origin',63.390675],UNIT['Meter',1.0]]", transform_method="NAD_1983_CSRS_To_WGS_1984_2", in_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", preserve_shape="NO_PRESERVE_SHAPE", max_deviation="", vertical="NO_VERTICAL")
  
  # --- Reselect PRN only from poly_geom
  arcpy.FeatureClassToFeatureClass_conversion(in_features=src_poly_proj, out_path=arcpy.env.workspace, out_name=src_prn, where_clause="area_type = 'neighborhood'")
  
  # --- Add required fields for Camille's tbx computation
  arcpy.management.AddFields(src_loc_proj, [["freq_week", "DOUBLE"],["recode_categ", "LONG"]])
  
  arcpy.management.CalculateField(src_loc_proj, "freq_week", "get_freq_week(!location_freq_visit!)", "PYTHON3", "def get_freq_week(annual_freq):\n    if annual_freq >= 0:\n        return annual_fr" +
      "eq / 52")
  arcpy.management.CalculateField(src_loc_proj, "recode_categ", "recode_categ(!location_category!)", "PYTHON3", """def recode_categ(categ):
      if categ == 1:
          return 1
      if categ in [3, 4]:
          return 2 #Occupation
      if categ in [5, 6, 7, 8, 9, 10]:
          return 3 #Shopping activities
      if categ in [11, 12, 13, 14, 15]:
          return 4 #Services
      if categ == 16:
          return 5 #Transportation
      if categ in [17, 18, 19, 20, 21, 22, 23, 24]:
          return 6 #Leisure activities
  """)
          
  # Call Camille Tbx
  arcpy.ImportToolbox(r"E:\Benoit\PROJETS\_DIVERS\2016_CamilleTbx\script\CamilleTbx.pyt", "CamilleTbx")
  arcpy.CamilleTbx.LifestyleIndicatorTool(src_loc_proj, dst_ll, "interact_id", "freq_week", "recode_categ", 1, 4, 6, 3) #1st=social [Services] / 2nd=recreaction [Leisure activities] / 3rd=food [Shopping activities]
  
  arcpy.CamilleTbx.ActivitySpaceIndicatorTool(src_loc_proj, dst_as, "interact_id", "recode_categ", 1, r"E:\Megaphone\DMTI_BaseLayers_2017\CanMapContentSuite\CanMapContentSuite.gdb\Transportation\NetworkDataSet", "Meters")
  
  arcpy.CamilleTbx.ResidentialNeighborhoodIndicatorTool(src_loc_proj, dst_rn, "interact_id", "freq_week", "recode_categ", 1, src_prn, r"E:\Megaphone\DMTI_BaseLayers_2017\CanMapContentSuite\CanMapContentSuite.gdb\Transportation\NetworkDataSet", "Meters")
  
  print(f"Done in {time.perf_counter() - c0:.1f}s")
  
else:
  print('Loading precomputed indictors:')
  print('\tLL -> {}'.format(arcpy.Describe(dst_ll).catalogPath))
  print('\tAS -> {}'.format(arcpy.Describe(dst_as).catalogPath))
  print('\tRN -> {}'.format(arcpy.Describe(dst_rn).catalogPath))

# load results into R

## Loading precomputed indictors:
##  LL -> E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb\veritas_1van_Indicator_Lifestyle
##  AS -> E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb\veritas_1van_Indicator_ActivitySpace
##  RN -> E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb\veritas_1van_Indicator_ResidentialNghd

LifestyleIndicator = pandas.DataFrame(arcpy.da.TableToNumPyArray(dst_ll, ('interact_id', 'N_acti_places', 'N_weekly_vst', 'N_acti_types', 'Food_store_Q', 'Recreational_Q', 'Social_Q')))
ActivitySpaceIndicator = pandas.DataFrame(arcpy.da.TableToNumPyArray(dst_as, ('interact_id', 'cvx_Perimeter', 'cvx_Surface', 'axis_ratio', 'cvx_gravelius', 'eccentricity', 'dsty_ellipse', 'Min_Length', 'Max_Length', 'Mean_Length')))
ResidentialNeighborhoodIndicator = pandas.DataFrame(arcpy.da.TableToNumPyArray(dst_rn, ('interact_id', 'pct_visits_neighb', 'N_acti_PRN', 'pct_visits_PRN', 'PRN_area_km2', 'ratio_PRN_area', 'ratio_PRN_AS', 'PRN_gravelius', 'PRN_eccentricity')))

Indicators related to the lifestyle

Indicators	Measurement approach
Number of activity places (`N_acti_places`)	Count of activity places
Number of visits to places per week (`N_weekly_vst`)	Number of activity places per individual multiplied by the frequency of visit per week to each location, excluding the residence
Number of activity types (`N_acti_types`)	6 types of activities considered: 1-Residential; 2-Occupation; 3-Shopping activities; 4-Services; 5-Transportation; 6-Leisure activities (NB original categories were as follow: 1-Residential; 2-work; 3-food and other services; 4-transport station/stop; 5-recreational activity; 6-social activity)
Individual quotient of Shopping activities (`Shopping_Q`)	Comparison of the proportion of Shopping activities for each participant to the proportion of other activities
Individual quotient of Services (`Services_Q`)	Comparison of the proportion of Services for each participant to the proportion of other activities
Individual quotient of Leisure activities (`Leisures_Q`)	Comparison of the proportion of Leisure activities for each participant to the proportion of other activities

LLindic <- py$LifestyleIndicator
names(LLindic) <- c("interact_id", "N_acti_places", "N_weekly_vst", "N_acti_types",
                    "Shopping_Q", "Services_Q", "Leisures_Q")
.llmtx <- as.matrix(summary(LLindic[c("N_acti_places", "N_weekly_vst", "N_acti_types",
                    "Shopping_Q", "Services_Q", "Leisures_Q")]))
.llmtx <- apply(.llmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.llmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
kable(.llmtx, caption = "Lifestyle statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Lifestyle statistics
	N_acti_places	N_weekly_vst	N_acti_types	Shopping_Q	Services_Q	Leisures_Q
Min.	6.00	6.019	3.000	0.4731	0.4567	0.7054
1st Qu.	18.00	15.404	5.000	0.9303	0.8905	0.9160
Median	23.00	21.635	5.000	1.0054	1.0177	0.9948
Mean	24.47	24.831	5.419	1.0049	1.0404	0.9892
3rd Qu.	31.00	30.572	6.000	1.0983	1.1333	1.0683
Max.	49.00	134.404	6.000	1.3102	1.6190	1.2932

Indicators related to the geometry of the activity space

Indicators	Measurement approach
Perimeter of the convex hull (`cvx_Perimeter`)	Perimeter of the smallest polygon containing all the activity locations of the participant (unit: km)
Surface of the convex hull (`cvx_Surface`)	Surface of the smallest polygon containing all the activity locations of the participant (unit: km2)
Major to minor axis ratio (`axis_ratio`)	Ratio of the axes of a standard deviational ellipse weighted by the annual frequency of visits to places
Gravelius compactness coefficient (`cvx_gravelius`)	Activity space represented by a convex hull. \(K = \frac{P}{2\sqrt{\pi A}}\) (where P = perimeter and A = surface)
Index of eccentricity (`eccentricity`)	Ratio of the distance between the residence and the centroid of the standard deviational ellipse to the length of major axis
Density of activity locations in the standard deviational ellipse (`dsty_ellipse`)	Ratio of the number of activity places to the surface of the standard deviational ellipse
Minimal road network distance from the residence to an activity place (`Min_Length`)	Minimal distance from the residence to an activity place using the road network (in meters)
Maximal road network distance from the residence to an activity place (`Max_Length`)	Maximal distance from the residence to an activity place using the road network (in meters)
Average road network distance from the residence to all activity places (`Mean_Length`)	Average distance from home to all activity places using the road network (in meters)
Median road network distance from the residence to all activity places (`Median_Length`)	Median distance from home to all activity places using the road network (in meters)

ASindic <- py$ActivitySpaceIndicator
.asmtx <- as.matrix(summary(ASindic[c("cvx_Perimeter", "cvx_Surface", "axis_ratio", "cvx_gravelius", "eccentricity", 
                                  "dsty_ellipse", "Min_Length", "Max_Length", "Mean_Length")]))
.asmtx <- apply(.asmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.asmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ", "NA's   ")
.asmtx[is.na(.asmtx)] <- 0
kable(.asmtx, caption = "Activity space statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Activity space statistics
	cvx_Perimeter	cvx_Surface	axis_ratio	cvx_gravelius	eccentricity	dsty_ellipse	Min_Length	Max_Length	Mean_Length
Min.	4.689	0.29	1.006	1.081	0.03708	0.00117	0.00	1670	596.2
1st Qu.	18.064	15.58	1.539	1.208	0.18928	0.64787	47.41	6269	1691.3
Median	25.658	28.96	2.025	1.299	0.28729	1.37580	165.97	8518	2477.5
Mean	170.313	474.22	14.676	1.619	0.31086	2.30188	213.62	15323	2969.2
3rd Qu.	39.298	66.53	2.731	1.442	0.40709	2.49034	283.87	12051	3148.7
Max.	25766.664	54769.60	2345.553	31.059	1.30176	34.40465	3743.28	407527	43309.9
NA’s	0	0	0	0	0	0	1	1	1

Indicators related to the importance of the residential neighborhood

Indicators	Measurement approach
Percentage of visits to places in the residential neighborhood (`pct_visits_neighb`)	Count of visits to places within the 500 m road network buffer centered on the residence divided by the total number of visits to places
Number of activity locations in the PRN (`N_acti_PRN`)	Count of activity locations in the PRN
Percentage of visits in the PRN (`pct_visits_PRN`)	Count of visits to places in the PRN divided by the total number of visits to places
Surface of the PRN (`PRN_area_km2`)	Unit: km2
Proportion of the overall activity/perceived space covered by the PRN (`ratio_PRN_area`)	Ratio of the activity/perceived space (resulting from the merge of the PRN with the activity space convex hull) covered by the PRN
Proportion of the activity space covered by the PRN (`ratio_PRN_AS`)	Ratio of the activity space convex hull covered by the PRN
Gravelius compactness coefficient for the PRN (`PRN_gravelius`)	Gravelius compactness coefficient calculated for the PRN
Index of eccentricity for the PRN (`PRN_eccentricity`)	Shortest distance from the residence to the PRN boundary divided by the radius of a circle of the same area than the PRN

RNindic <- py$ResidentialNeighborhoodIndicator
RNindic <- RNindic[RNindic$interact_id %in% prn$interact_id, ]
.rnmtx <- as.matrix(summary(RNindic[c("pct_visits_neighb", "N_acti_PRN", "pct_visits_PRN", "PRN_area_km2", "ratio_PRN_area",
                                  "ratio_PRN_AS", "PRN_gravelius", "PRN_eccentricity")]))
.rnmtx <- apply(.rnmtx, 1:2, function(x) strsplit(as.character(x), ":")[[1]][2])
rownames(.rnmtx) <- c("Min.   ", "1st Qu.", "Median ", "Mean   ", "3rd Qu.", "Max.   ")
.rnmtx[is.na(.rnmtx)] <- 0
kable(.rnmtx, caption = "Residential neighbourhood statistics") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

Residential neighbourhood statistics
	pct_visits_neighb	N_acti_PRN	pct_visits_PRN	PRN_area_km2	ratio_PRN_area	ratio_PRN_AS	PRN_gravelius	PRN_eccentricity
Min.	0.000	0.00	0.00	0.000741	0.00000	0.00000	1.068	0.00009
1st Qu.	8.519	5.00	22.76	0.782790	0.01696	0.01696	1.147	0.21224
Median	23.132	9.00	45.73	1.853176	0.04669	0.04706	1.214	0.37003
Mean	27.176	10.42	46.45	3.383490	0.08228	0.09062	1.351	0.74100
3rd Qu.	45.608	15.00	70.61	4.230920	0.12518	0.13083	1.413	0.53804
Max.	84.797	33.00	98.58	26.899326	0.46654	0.52708	3.226	54.60768

Only participants with valid PRN considered in Residential Neighbourhood indicators (= 179 participants).

Social indicators: Alexandre Naud’s toolbox

See Alex’s document for a more comprehensive presentation of the social indicators.

site <- "van" # Can be mtl (Montreal), skt (Saskatoon), van (Vancouver) or vic (Victoria)
source('Alex/main.R')

## Reading layer `veritas_1van_location' from data source `I:\Chercheurs\Kestens_Yan\Spherelab\Prj2017_INTERACT\DATA\_Treksoft_2019_10_10_2851c89\veritas_1van_2851c89.gdb' using driver `OpenFileGDB'
## Simple feature collection with 5138 features and 21 fields
## geometry type:  MULTIPOINT
## dimension:      XY
## bbox:           xmin: -123.5266 ymin: -33.88918 xmax: 151.183 ymax: 53.8922
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## Reading layer `veritas_1van_poly_geom' from data source `I:\Chercheurs\Kestens_Yan\Spherelab\Prj2017_INTERACT\DATA\_Treksoft_2019_10_10_2851c89\veritas_1van_2851c89.gdb' using driver `OpenFileGDB'
## Simple feature collection with 187 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -123.2624 ymin: 49.18969 xmax: -123.0033 ymax: 49.28413
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

Number of people in the network (`degree`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=degree))

kable(t(as.matrix(summary(sn_stat1$degree))), caption = "degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

degree
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	4.257143	6	24

Number of edges divides by the maximum possible number of edges in the network (`density`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=density))

kable(t(as.matrix(summary(sn_stat1$density))), caption = "density") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

density
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0.2	0.33	0.400113	0.52	1	33

Simmelian Brokerage (`simmelian`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=simmelian))

kable(t(as.matrix(summary(sn_stat1$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

simmelian
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
1	1	1.33	2.332886	3	9.43	61

Standard deviation for network member ages (`age_sd`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=age_sd))

kable(t(as.matrix(summary(sn_stat1$age_sd))), caption = "age_sd") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

age_sd
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	6	11.31	12.67141	19.09	31.27	61

Does the participant have a spouse (`spouse`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=spouse), stat="count") +
  labs(x="has spouse")

Proportion of kin in the network (`prop_kin`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=prop_kin))

kable(t(as.matrix(summary(sn_stat1$prop_kin))), caption = "prop_kin") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

prop_kin
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0	0.25	0.3205696	0.5	1	52

Diversity of relation types (`diversity`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=diversity))

kable(t(as.matrix(summary(sn_stat1$diversity))), caption = "diversity") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

diversity
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	0	0.5372381	1	2.32

Number of individuals that are not connected with the spouse (`independant_ties`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=independant_ties))

kable(t(as.matrix(summary(sn_stat1$independant_ties))), caption = "independant_ties") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

independant_ties
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	0	0	0.5419847	0	16	79

Weekly face-to-face interactions (`meet_by_week`)

ggplot(filter(sn_stat1, meet_by_week < 100)) +
  geom_histogram(aes(x=meet_by_week)) +
  annotate(geom="text", x=75, y=100, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$meet_by_week))), caption = "meet_by_week") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

meet_by_week
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	3.17	9.19	20.705	19.015	1000

Weekly interactions through information and communication technologies (`chat_by_week`)

ggplot(filter(sn_stat1, chat_by_week < 100)) +
  geom_histogram(aes(x=chat_by_week)) +
  annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$chat_by_week))), caption = "chat_by_week") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

chat_by_week
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	4.8875	8.335	4782.305	15	1e+06

Number of people with whom the participant like to socialize (`socialize_size`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=socialize_size))

kable(t(as.matrix(summary(sn_stat1$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	3	3.528571	5	19

Weekly face-to-face interactions among people with whom the participant like to socialize (`socialize_meet`)

ggplot(filter(sn_stat1, socialize_meet < 100)) +
  geom_histogram(aes(x=socialize_meet)) +
  annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$socialize_meet))), caption = "socialize_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_meet
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2.0375	7	14.43443	17.895	212.85

Weekly ICT interactions among people with whom the participant like to socialize (`socialize_chat`)

ggplot(filter(sn_stat1, socialize_chat < 100)) +
  geom_histogram(aes(x=socialize_chat)) +
  annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$socialize_chat))), caption = "socialize_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

socialize_chat
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1.69	7	4779.766	12.44	1e+06

Number of people with whom the participant discuss important matters (`important_size`)

ggplot(sn_stat1) +
  geom_histogram(aes(x=important_size))

kable(t(as.matrix(summary(sn_stat1$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	1	2	2.714286	4	19

Weekly face-to-face interactions among people with whom the participant discuss important matters (`important_meet`)

ggplot(filter(sn_stat1, important_meet < 100)) +
  geom_histogram(aes(x=important_meet)) +
  annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$important_meet))), caption = "important_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_meet
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2.0575	7	17.99005	16.9625	1000

Number of ICT interactions, by week, among people with whom the participant discuss important matters (`important_chat`)

ggplot(filter(sn_stat1, important_chat < 100)) +
  geom_histogram(aes(x=important_chat)) +
  annotate(geom="text", x=30, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$important_chat))), caption = "important_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

important_chat
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	2.02	7	4779.284	10.015	1e+06

Number of groups in the network (`nb_groups`)

ggplot(sn_stat2) +
  geom_histogram(aes(x=nb_groups))

kable(t(as.matrix(summary(sn_stat2$nb_groups))), caption = "nb_groups") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

nb_groups
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0	0	1	1.5	2	8

Number of people in all groups (`group_size`)

ggplot(filter(sn_stat2, group_size < 100)) +
  geom_histogram(aes(x=group_size)) +
  annotate(geom="text", x=55, y=15, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat2$group_size))), caption = "group_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

group_size
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
2	10	26	83.78195	71	1294	77

Simmelian Brokerage calculated on the full network (`simmelian`)

ggplot(sn_stat2) +
  geom_histogram(aes(x=simmelian))

kable(t(as.matrix(summary(sn_stat2$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")

simmelian
Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
1	2.67	5	8.115628	11.76	42.62	27

INTERACT Vancouver Participant VERITAS Summary - W1

Benoit THIERRY

22 octobre, 2019

VERITAS dataset description

Basic descriptive statistics

Section 1: Residence and Neighbourhood

Now, let’s start with your home. What is your address?

When did you move to your current address?

If you were asked to draw the boundaries of your neighbourhood, what would they be?

How attached are you to your neighbourhood?

On average, how many hours per day do you spend outside of your home?

Of this time spent outside your home, on average how many hours do you spend outside your neighbourhood?

Are there one or more areas close to where you live that you tend to avoid because you do not feel safe there (for any reason)?

Do you spend the night somewhere other than your home at least once per week?

Section 2: Occupation

Are you currently working?

Where do you work?

On average, how many hours per week do you work?

Which of the following categories best describes the amount of physical activity required for your job?

Are you currently a registered student?

Where do you study?

On average, how many hours per week do you study?

Section 3: Shopping activities

Section 4: Services

Section 5: Transportation

Do you use public transit from your home?

Where are the public transit stops that you access from your home?

Section 6: Leisure activities

Section 7: Other places/activities

Are there other places that you go to at least once per month that we have not mentioned? For example: a mall, a daycare, a hardware store, or a community center.

Can you locate this place?

Derived metrics

Transportation mode preferences

Visiting places alone

Visit frequency

Spatial indicators: Camille Perchoux’s toolbox

INTERACT Vancouver Participant VERITAS Summary - W1

Benoit THIERRY

22 octobre, 2019

VERITAS dataset description

Basic descriptive statistics

Section 1: Residence and Neighbourhood

Now, let’s start with your home. What is your address?

When did you move to your current address?

If you were asked to draw the boundaries of your neighbourhood, what would they be?

How attached are you to your neighbourhood?

On average, how many hours per day do you spend outside of your home?

Of this time spent outside your home, on average how many hours do you spend outside your neighbourhood?

Are there one or more areas close to where you live that you tend to avoid because you do not feel safe there (for any reason)?

Do you spend the night somewhere other than your home at least once per week?

Section 2: Occupation

Are you currently working?

Where do you work?

On average, how many hours per week do you work?

Which of the following categories best describes the amount of physical activity required for your job?

Are you currently a registered student?

Where do you study?

On average, how many hours per week do you study?

Section 3: Shopping activities

Section 4: Services

Section 5: Transportation

Do you use public transit from your home?

Where are the public transit stops that you access from your home?

Section 6: Leisure activities

Section 7: Other places/activities

Are there other places that you go to at least once per month that we have not mentioned? For example: a mall, a daycare, a hardware store, or a community center.

Can you locate this place?

Section 9: Social contact

Do you visit anyone at his or her home at least once per month?

Where does this person live?

Great, we are almost done completing this questionnaire. You have documented all your activity places on a map, and specified with whom you generally do these activities. These last few questions concern the people you documented earlier.

Among these people, who do you discuss important matters with?

Among these people, who do you like to socialize with?

Among these people, who do you meet often with but do not necessarily feel close to?

Among these people, who knows whom?

Derived metrics

Transportation mode preferences

Visiting places alone

Visit frequency

Spatial indicators: Camille Perchoux’s toolbox

Indicators related to the lifestyle

Indicators related to the geometry of the activity space

Indicators related to the importance of the residential neighborhood

Social indicators: Alexandre Naud’s toolbox

Number of people in the network (degree)

Number of edges divides by the maximum possible number of edges in the network (density)

Simmelian Brokerage (simmelian)

Standard deviation for network member ages (age_sd)

Does the participant have a spouse (spouse)

Proportion of kin in the network (prop_kin)

Diversity of relation types (diversity)

Number of individuals that are not connected with the spouse (independant_ties)

Weekly face-to-face interactions (meet_by_week)

Weekly interactions through information and communication technologies (chat_by_week)

Number of people with whom the participant like to socialize (socialize_size)

Weekly face-to-face interactions among people with whom the participant like to socialize (socialize_meet)

Weekly ICT interactions among people with whom the participant like to socialize (socialize_chat)

Number of people with whom the participant discuss important matters (important_size)

Weekly face-to-face interactions among people with whom the participant discuss important matters (important_meet)

Number of ICT interactions, by week, among people with whom the participant discuss important matters (important_chat)

Number of groups in the network (nb_groups)

Number of people in all groups (group_size)

Simmelian Brokerage calculated on the full network (simmelian)

Number of people in the network (`degree`)

Number of edges divides by the maximum possible number of edges in the network (`density`)

Simmelian Brokerage (`simmelian`)

Standard deviation for network member ages (`age_sd`)

Does the participant have a spouse (`spouse`)

Proportion of kin in the network (`prop_kin`)

Diversity of relation types (`diversity`)

Number of individuals that are not connected with the spouse (`independant_ties`)

Weekly face-to-face interactions (`meet_by_week`)

Weekly interactions through information and communication technologies (`chat_by_week`)

Number of people with whom the participant like to socialize (`socialize_size`)

Weekly face-to-face interactions among people with whom the participant like to socialize (`socialize_meet`)

Weekly ICT interactions among people with whom the participant like to socialize (`socialize_chat`)

Number of people with whom the participant discuss important matters (`important_size`)

Weekly face-to-face interactions among people with whom the participant discuss important matters (`important_meet`)

Number of ICT interactions, by week, among people with whom the participant discuss important matters (`important_chat`)

Number of groups in the network (`nb_groups`)

Number of people in all groups (`group_size`)

Simmelian Brokerage calculated on the full network (`simmelian`)