Transportation mode preferences
Based on the answers to the question Usually, how do you go there? (Check all that apply.).
# code en
# 1 By car and you drive
# 2 By car and someone else drives
# 3 By taxi/Uber
# 4 On foot
# 5 By bike
# 6 By bus
# 7 By subway
# 8 By train
# 99 Other
loc_labels = data.frame(location_category=c(2:26), description=c(" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"))
# extract and summary stats
.tm <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels)
.tm_grouped <- .tm %>%
group_by(description) %>%
dplyr::summarise(N=n(), "By car (driver)"=sum(location_tmode_1),
"By car (passenger)"=sum(location_tmode_2),
"By taxi/Uber"=sum(location_tmode_3),
"On foot"=sum(location_tmode_4),
"By bike"=sum(location_tmode_5),
"By bus"=sum(location_tmode_6),
"By train"=sum(location_tmode_7),
"Other"=sum(location_tmode_99))
kable(.tm_grouped, caption = "Transportation mode preferences") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Transportation mode preferences
|
description
|
N
|
By car (driver)
|
By car (passenger)
|
By taxi/Uber
|
On foot
|
By bike
|
By bus
|
By train
|
Other
|
|
2 [Other residence]
|
11
|
3
|
5
|
1
|
2
|
3
|
5
|
1
|
1
|
|
3 [Work]
|
176
|
71
|
4
|
1
|
45
|
53
|
41
|
13
|
17
|
|
4 [School/College/University]
|
24
|
5
|
0
|
0
|
8
|
2
|
15
|
3
|
4
|
|
5 [Supermarket]
|
549
|
231
|
50
|
0
|
298
|
87
|
39
|
3
|
4
|
|
6 [Public/farmer’s market]
|
109
|
34
|
8
|
0
|
58
|
35
|
9
|
0
|
0
|
|
7 [Bakery]
|
117
|
27
|
3
|
0
|
85
|
18
|
5
|
0
|
1
|
|
8 [Specialty food store]
|
193
|
46
|
8
|
0
|
130
|
37
|
17
|
2
|
1
|
|
9 [Convenience store/Dépanneur]
|
50
|
13
|
1
|
0
|
40
|
1
|
1
|
0
|
0
|
|
10 [Liquor store/SAQ]
|
185
|
80
|
13
|
0
|
98
|
27
|
6
|
0
|
0
|
|
11 [Bank]
|
159
|
34
|
3
|
0
|
124
|
21
|
9
|
2
|
2
|
|
12 [Hair salon/barbershop]
|
149
|
59
|
6
|
0
|
76
|
23
|
26
|
2
|
3
|
|
13 [Post office]
|
139
|
23
|
0
|
0
|
121
|
21
|
3
|
0
|
0
|
|
14 [Drugstore]
|
192
|
59
|
6
|
0
|
145
|
26
|
8
|
0
|
0
|
|
15 [Doctor/healthcare provider]
|
211
|
88
|
7
|
0
|
93
|
34
|
49
|
7
|
2
|
|
16 [Public transit stop]
|
422
|
3
|
3
|
0
|
397
|
5
|
0
|
0
|
26
|
|
17 [Leisure-time physical activity]
|
301
|
109
|
22
|
0
|
141
|
87
|
17
|
6
|
10
|
|
18 [Park]
|
348
|
75
|
31
|
0
|
224
|
97
|
18
|
1
|
6
|
|
19 [Cultural activity]
|
169
|
55
|
34
|
2
|
50
|
26
|
56
|
4
|
1
|
|
20 [Volunteering place]
|
138
|
62
|
7
|
0
|
63
|
21
|
21
|
3
|
5
|
|
21 [Religious/spiritual activity]
|
46
|
22
|
4
|
0
|
18
|
8
|
9
|
1
|
0
|
|
22 [Restaurant, café, bar, etc.]
|
440
|
129
|
39
|
2
|
260
|
53
|
61
|
3
|
10
|
|
23 [Take-out]
|
132
|
42
|
8
|
0
|
74
|
6
|
9
|
1
|
15
|
|
24 [Walk]
|
364
|
51
|
15
|
0
|
313
|
39
|
10
|
0
|
5
|
|
25 [Other place]
|
146
|
67
|
13
|
0
|
63
|
30
|
28
|
9
|
7
|
|
26 [Social contact residence]
|
158
|
79
|
26
|
0
|
50
|
28
|
26
|
6
|
1
|
#graph
.tm1 <- .tm %>%
filter(location_tmode_1 == 1) %>%
mutate(tm = "[1] By car (driver)")
.tm2 <- .tm %>%
filter(location_tmode_2 == 1) %>%
mutate(tm = "[2] By car (passenger)")
.tm3 <- .tm %>%
filter(location_tmode_3 == 1) %>%
mutate(tm = "[3] By taxi/Uber")
.tm4 <- .tm %>%
filter(location_tmode_4 == 1) %>%
mutate(tm = "[4] On foot")
.tm5 <- .tm %>%
filter(location_tmode_5 == 1) %>%
mutate(tm = "[5] By bike")
.tm6 <- .tm %>%
filter(location_tmode_6 == 1) %>%
mutate(tm = "[6] By bus")
.tm7 <- .tm %>%
filter(location_tmode_7 == 1) %>%
mutate(tm = "[7] By train")
.tm99 <- .tm %>%
filter(location_tmode_99 == 1) %>%
mutate(tm = "[99] Other")
.tm <- bind_rows(.tm1, .tm2) %>%
bind_rows(.tm3) %>%
bind_rows(.tm4) %>%
bind_rows(.tm5) %>%
bind_rows(.tm6) %>%
bind_rows(.tm7) %>%
bind_rows(.tm99)
# histogram of answers
ggplot(data=.tm) +
geom_bar(aes(x=fct_rev(description), fill=tm), position="fill") +
scale_fill_brewer(palette = "Set3", name = "Transport modes") +
scale_y_continuous(labels = percent) +
labs(y = "Proportion of transportation mode by location category", x=element_blank()) +
coord_flip() +
theme(legend.position = "bottom", legend.justification=c(0,0), legend.text = element_text(size=8)) +
guides(fill=guide_legend(nrow = 3))

Visiting places alone
Based on the answers to the question Do you usually go to this place alone or with other people?.
loc_labels = data.frame(location_category=c(2:26), description=c(" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"))
# extract and summary stats
.alone <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels) %>%
mutate(location_alone_recode=case_when(location_alone == 1 ~ 1,
location_alone == 2 ~ 0))
.alone_grouped <- .alone %>%
group_by(description) %>%
dplyr::summarise(N=n(), "Visited alone"=sum(location_alone_recode),
"Visited alone (%)"=round(sum(location_alone_recode)*100.0/n(), digits=1))
kable(.alone_grouped, caption = "Visiting places alone") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visiting places alone
|
description
|
N
|
Visited alone
|
Visited alone (%)
|
|
2 [Other residence]
|
11
|
NA
|
NA
|
|
3 [Work]
|
176
|
72
|
40.9
|
|
4 [School/College/University]
|
24
|
20
|
83.3
|
|
5 [Supermarket]
|
549
|
425
|
77.4
|
|
6 [Public/farmer’s market]
|
109
|
62
|
56.9
|
|
7 [Bakery]
|
117
|
89
|
76.1
|
|
8 [Specialty food store]
|
193
|
156
|
80.8
|
|
9 [Convenience store/Dépanneur]
|
50
|
45
|
90.0
|
|
10 [Liquor store/SAQ]
|
185
|
144
|
77.8
|
|
11 [Bank]
|
159
|
149
|
93.7
|
|
12 [Hair salon/barbershop]
|
149
|
140
|
94.0
|
|
13 [Post office]
|
139
|
132
|
95.0
|
|
14 [Drugstore]
|
192
|
170
|
88.5
|
|
15 [Doctor/healthcare provider]
|
211
|
194
|
91.9
|
|
16 [Public transit stop]
|
422
|
352
|
83.4
|
|
17 [Leisure-time physical activity]
|
301
|
150
|
49.8
|
|
18 [Park]
|
348
|
148
|
42.5
|
|
19 [Cultural activity]
|
169
|
44
|
26.0
|
|
20 [Volunteering place]
|
138
|
57
|
41.3
|
|
21 [Religious/spiritual activity]
|
46
|
15
|
32.6
|
|
22 [Restaurant, café, bar, etc.]
|
440
|
96
|
21.8
|
|
23 [Take-out]
|
132
|
65
|
49.2
|
|
24 [Walk]
|
364
|
189
|
51.9
|
|
25 [Other place]
|
146
|
82
|
56.2
|
|
26 [Social contact residence]
|
158
|
70
|
44.3
|
# histogram of answers
ggplot(data=.alone) +
geom_bar(aes(x=fct_rev(description), fill=factor(location_alone)), position="fill") +
scale_fill_brewer(palette = "Set3", name = "Visiting places", labels = c("N/A", 'Alone', "With someone")) +
scale_y_continuous(labels = percent) +
labs(y = "Proportion of places visited alone", x=element_blank()) +
coord_flip()

Visit frequency
Based on the answers to the question How often do you go there?.
loc_labels = data.frame(location_category=c(2:26), description=c(" 2 [Other residence]",
" 3 [Work]",
" 4 [School/College/University]",
" 5 [Supermarket]",
" 6 [Public/farmer’s market]",
" 7 [Bakery]",
" 8 [Specialty food store]",
" 9 [Convenience store/Dépanneur]",
"10 [Liquor store/SAQ]",
"11 [Bank]",
"12 [Hair salon/barbershop]",
"13 [Post office]",
"14 [Drugstore]",
"15 [Doctor/healthcare provider]",
"16 [Public transit stop]",
"17 [Leisure-time physical activity]",
"18 [Park]",
"19 [Cultural activity]",
"20 [Volunteering place]",
"21 [Religious/spiritual activity]",
"22 [Restaurant, café, bar, etc.]",
"23 [Take-out]",
"24 [Walk]",
"25 [Other place]",
"26 [Social contact residence]"))
# extract and summary stats
.freq <- locations %>%
st_set_geometry(NULL) %>%
filter(location_category != 1) %>%
left_join(loc_labels)
.freq_grouped <- .freq %>%
group_by(description) %>%
dplyr::summarise(N=n(), min=min(location_freq_visit),
max=max(location_freq_visit),
mean=mean(location_freq_visit),
median=median(location_freq_visit),
sd=sd(location_freq_visit))
kable(.freq_grouped, caption = "Visit frequency (expressed in times/year)") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
Visit frequency (expressed in times/year)
|
description
|
N
|
min
|
max
|
mean
|
median
|
sd
|
|
2 [Other residence]
|
11
|
2
|
208
|
75.454545
|
52
|
62.110166
|
|
3 [Work]
|
176
|
1
|
520
|
177.369318
|
208
|
115.107031
|
|
4 [School/College/University]
|
24
|
4
|
364
|
154.000000
|
130
|
102.007672
|
|
5 [Supermarket]
|
549
|
2
|
1040
|
62.704918
|
52
|
73.032792
|
|
6 [Public/farmer’s market]
|
109
|
1
|
364
|
44.963303
|
24
|
62.284432
|
|
7 [Bakery]
|
117
|
3
|
260
|
37.179487
|
24
|
41.135314
|
|
8 [Specialty food store]
|
193
|
3
|
1040
|
50.668394
|
24
|
86.380842
|
|
9 [Convenience store/Dépanneur]
|
50
|
1
|
520
|
59.740000
|
24
|
83.901986
|
|
10 [Liquor store/SAQ]
|
185
|
1
|
208
|
31.843243
|
24
|
34.100431
|
|
11 [Bank]
|
159
|
1
|
260
|
30.666667
|
24
|
36.516282
|
|
12 [Hair salon/barbershop]
|
149
|
1
|
36
|
6.953020
|
6
|
5.344024
|
|
13 [Post office]
|
139
|
2
|
104
|
14.589928
|
8
|
19.129541
|
|
14 [Drugstore]
|
192
|
1
|
208
|
42.151042
|
24
|
44.589613
|
|
15 [Doctor/healthcare provider]
|
211
|
1
|
104
|
5.535545
|
3
|
8.809757
|
|
16 [Public transit stop]
|
422
|
1
|
364
|
61.715640
|
24
|
83.875231
|
|
17 [Leisure-time physical activity]
|
301
|
2
|
364
|
102.458472
|
104
|
86.560590
|
|
18 [Park]
|
348
|
1
|
1560
|
74.514368
|
33
|
119.288874
|
|
19 [Cultural activity]
|
169
|
1
|
208
|
17.698225
|
6
|
31.598637
|
|
20 [Volunteering place]
|
138
|
1
|
5200
|
116.913044
|
52
|
445.811938
|
|
21 [Religious/spiritual activity]
|
46
|
4
|
364
|
76.260870
|
52
|
102.167930
|
|
22 [Restaurant, café, bar, etc.]
|
440
|
1
|
364
|
26.950000
|
12
|
43.894544
|
|
23 [Take-out]
|
132
|
2
|
364
|
25.037879
|
12
|
39.906648
|
|
24 [Walk]
|
364
|
1
|
1560
|
99.156593
|
52
|
136.668166
|
|
25 [Other place]
|
146
|
1
|
520
|
42.164384
|
24
|
63.740749
|
|
26 [Social contact residence]
|
158
|
3
|
312
|
36.645570
|
24
|
46.663325
|
#graph
ggplot(data=.freq) +
geom_boxplot(aes(x=fct_rev(description), y=location_freq_visit)) +
scale_y_continuous(limits = c(0, 365)) +
labs(y = "Visits/year (Frequency over 1 visit/day not shown)", x=element_blank()) +
coord_flip()

Spatial indicators: Camille Perchoux’s toolbox
Below is a list of indicators proposed by Camille Perchoux in her paper Assessing patterns of spatial behavior in health studies: Their socio-demographic determinants and associations with transportation modes (the RECORD Cohort Study).
py_config()
## python: C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/python.exe
## libpython: C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/python36.dll
## pythonhome: C:\PROGRA~1\ArcGIS\Pro\bin\Python\envs\ARCGIS~1
## version: 3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]
## Architecture: 64bit
## numpy: C:\PROGRA~1\ArcGIS\Pro\bin\Python\envs\ARCGIS~1\lib\site-packages\numpy
## numpy_version: 1.16.2
##
## NOTE: Python version was forced by use_python function
import arcpy
import pandas
from time import perf_counter
arcpy.env.workspace = r"E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb"
src_loc = "veritas_1van_location" #'test_location' #
src_poly = "veritas_1van_poly_geom" #'test_poly_geom' #
src_loc_proj = src_loc + "_proj"
src_poly_proj = src_poly + "_proj"
src_prn = src_loc.replace('location', 'prn')
dst_ll = src_loc.replace('location', 'Indicator_Lifestyle') #"camille_LifestyleIndicator" #
dst_as = src_loc.replace('location', 'Indicator_ActivitySpace') #"camille_ActivitySpaceIndicator" #
dst_rn = src_loc.replace('location', 'Indicator_ResidentialNghd') #"camille_ResidentialNeighborhoodIndicator" #
#check that we already have the results
done = False
done = arcpy.Exists(dst_ll) and arcpy.Exists(dst_as) and arcpy.Exists(dst_rn)
if not done:
c0 = perf_counter()
# --- Project data set
arcpy.Project_management(in_dataset=src_loc, out_dataset=src_loc_proj, out_coor_system="PROJCS['NAD_1983_CSRS_Statistics_Canada_Lambert',GEOGCS['GCS_North_American_1983_CSRS',DATUM['D_North_American_1983_CSRS',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Lambert_Conformal_Conic'],PARAMETER['False_Easting',6200000.0],PARAMETER['False_Northing',3000000.0],PARAMETER['Central_Meridian',-91.86666666666666],PARAMETER['Standard_Parallel_1',49.0],PARAMETER['Standard_Parallel_2',77.0],PARAMETER['Latitude_Of_Origin',63.390675],UNIT['Meter',1.0]]", transform_method="NAD_1983_CSRS_To_WGS_1984_2", in_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", preserve_shape="NO_PRESERVE_SHAPE", max_deviation="", vertical="NO_VERTICAL")
arcpy.Project_management(in_dataset=src_poly, out_dataset=src_poly_proj, out_coor_system="PROJCS['NAD_1983_CSRS_Statistics_Canada_Lambert',GEOGCS['GCS_North_American_1983_CSRS',DATUM['D_North_American_1983_CSRS',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Lambert_Conformal_Conic'],PARAMETER['False_Easting',6200000.0],PARAMETER['False_Northing',3000000.0],PARAMETER['Central_Meridian',-91.86666666666666],PARAMETER['Standard_Parallel_1',49.0],PARAMETER['Standard_Parallel_2',77.0],PARAMETER['Latitude_Of_Origin',63.390675],UNIT['Meter',1.0]]", transform_method="NAD_1983_CSRS_To_WGS_1984_2", in_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", preserve_shape="NO_PRESERVE_SHAPE", max_deviation="", vertical="NO_VERTICAL")
# --- Reselect PRN only from poly_geom
arcpy.FeatureClassToFeatureClass_conversion(in_features=src_poly_proj, out_path=arcpy.env.workspace, out_name=src_prn, where_clause="area_type = 'neighborhood'")
# --- Add required fields for Camille's tbx computation
arcpy.management.AddFields(src_loc_proj, [["freq_week", "DOUBLE"],["recode_categ", "LONG"]])
arcpy.management.CalculateField(src_loc_proj, "freq_week", "get_freq_week(!location_freq_visit!)", "PYTHON3", "def get_freq_week(annual_freq):\n if annual_freq >= 0:\n return annual_fr" +
"eq / 52")
arcpy.management.CalculateField(src_loc_proj, "recode_categ", "recode_categ(!location_category!)", "PYTHON3", """def recode_categ(categ):
if categ == 1:
return 1
if categ in [3, 4]:
return 2 #Occupation
if categ in [5, 6, 7, 8, 9, 10]:
return 3 #Shopping activities
if categ in [11, 12, 13, 14, 15]:
return 4 #Services
if categ == 16:
return 5 #Transportation
if categ in [17, 18, 19, 20, 21, 22, 23, 24]:
return 6 #Leisure activities
""")
# Call Camille Tbx
arcpy.ImportToolbox(r"E:\Benoit\PROJETS\_DIVERS\2016_CamilleTbx\script\CamilleTbx.pyt", "CamilleTbx")
arcpy.CamilleTbx.LifestyleIndicatorTool(src_loc_proj, dst_ll, "interact_id", "freq_week", "recode_categ", 1, 4, 6, 3) #1st=social [Services] / 2nd=recreaction [Leisure activities] / 3rd=food [Shopping activities]
arcpy.CamilleTbx.ActivitySpaceIndicatorTool(src_loc_proj, dst_as, "interact_id", "recode_categ", 1, r"E:\Megaphone\DMTI_BaseLayers_2017\CanMapContentSuite\CanMapContentSuite.gdb\Transportation\NetworkDataSet", "Meters")
arcpy.CamilleTbx.ResidentialNeighborhoodIndicatorTool(src_loc_proj, dst_rn, "interact_id", "freq_week", "recode_categ", 1, src_prn, r"E:\Megaphone\DMTI_BaseLayers_2017\CanMapContentSuite\CanMapContentSuite.gdb\Transportation\NetworkDataSet", "Meters")
print(f"Done in {time.perf_counter() - c0:.1f}s")
else:
print('Loading precomputed indictors:')
print('\tLL -> {}'.format(arcpy.Describe(dst_ll).catalogPath))
print('\tAS -> {}'.format(arcpy.Describe(dst_as).catalogPath))
print('\tRN -> {}'.format(arcpy.Describe(dst_rn).catalogPath))
# load results into R
## Loading precomputed indictors:
## LL -> E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb\veritas_1van_Indicator_Lifestyle
## AS -> E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb\veritas_1van_Indicator_ActivitySpace
## RN -> E:\Benoit\PROJETS\2017_INTERACT\_repos\VERITAS_preanalysis\temp\veritas_1van_2851c89.gdb\veritas_1van_Indicator_ResidentialNghd
LifestyleIndicator = pandas.DataFrame(arcpy.da.TableToNumPyArray(dst_ll, ('interact_id', 'N_acti_places', 'N_weekly_vst', 'N_acti_types', 'Food_store_Q', 'Recreational_Q', 'Social_Q')))
ActivitySpaceIndicator = pandas.DataFrame(arcpy.da.TableToNumPyArray(dst_as, ('interact_id', 'cvx_Perimeter', 'cvx_Surface', 'axis_ratio', 'cvx_gravelius', 'eccentricity', 'dsty_ellipse', 'Min_Length', 'Max_Length', 'Mean_Length')))
ResidentialNeighborhoodIndicator = pandas.DataFrame(arcpy.da.TableToNumPyArray(dst_rn, ('interact_id', 'pct_visits_neighb', 'N_acti_PRN', 'pct_visits_PRN', 'PRN_area_km2', 'ratio_PRN_area', 'ratio_PRN_AS', 'PRN_gravelius', 'PRN_eccentricity')))
Social indicators: Alexandre Naud’s toolbox
See Alex’s document for a more comprehensive presentation of the social indicators.
site <- "van" # Can be mtl (Montreal), skt (Saskatoon), van (Vancouver) or vic (Victoria)
source('Alex/main.R')
## Reading layer `veritas_1van_location' from data source `I:\Chercheurs\Kestens_Yan\Spherelab\Prj2017_INTERACT\DATA\_Treksoft_2019_10_10_2851c89\veritas_1van_2851c89.gdb' using driver `OpenFileGDB'
## Simple feature collection with 5138 features and 21 fields
## geometry type: MULTIPOINT
## dimension: XY
## bbox: xmin: -123.5266 ymin: -33.88918 xmax: 151.183 ymax: 53.8922
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
## Reading layer `veritas_1van_poly_geom' from data source `I:\Chercheurs\Kestens_Yan\Spherelab\Prj2017_INTERACT\DATA\_Treksoft_2019_10_10_2851c89\veritas_1van_2851c89.gdb' using driver `OpenFileGDB'
## Simple feature collection with 187 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -123.2624 ymin: 49.18969 xmax: -123.0033 ymax: 49.28413
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
Number of people in the network (degree)
ggplot(sn_stat1) +
geom_histogram(aes(x=degree))

kable(t(as.matrix(summary(sn_stat1$degree))), caption = "degree") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
degree
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
1
|
3
|
4.257143
|
6
|
24
|
Number of edges divides by the maximum possible number of edges in the network (density)
ggplot(sn_stat1) +
geom_histogram(aes(x=density))

kable(t(as.matrix(summary(sn_stat1$density))), caption = "density") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
density
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
0
|
0.2
|
0.33
|
0.400113
|
0.52
|
1
|
33
|
Simmelian Brokerage (simmelian)
ggplot(sn_stat1) +
geom_histogram(aes(x=simmelian))

kable(t(as.matrix(summary(sn_stat1$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
1
|
1
|
1.33
|
2.332886
|
3
|
9.43
|
61
|
Standard deviation for network member ages (age_sd)
ggplot(sn_stat1) +
geom_histogram(aes(x=age_sd))

kable(t(as.matrix(summary(sn_stat1$age_sd))), caption = "age_sd") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
age_sd
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
0
|
6
|
11.31
|
12.67141
|
19.09
|
31.27
|
61
|
Does the participant have a spouse (spouse)
ggplot(sn_stat1) +
geom_histogram(aes(x=spouse), stat="count") +
labs(x="has spouse")

Proportion of kin in the network (prop_kin)
ggplot(sn_stat1) +
geom_histogram(aes(x=prop_kin))

kable(t(as.matrix(summary(sn_stat1$prop_kin))), caption = "prop_kin") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
prop_kin
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
0
|
0
|
0.25
|
0.3205696
|
0.5
|
1
|
52
|
Diversity of relation types (diversity)
ggplot(sn_stat1) +
geom_histogram(aes(x=diversity))

kable(t(as.matrix(summary(sn_stat1$diversity))), caption = "diversity") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
diversity
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
0
|
0
|
0.5372381
|
1
|
2.32
|
Number of individuals that are not connected with the spouse (independant_ties)
ggplot(sn_stat1) +
geom_histogram(aes(x=independant_ties))

kable(t(as.matrix(summary(sn_stat1$independant_ties))), caption = "independant_ties") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
independant_ties
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
0
|
0
|
0
|
0.5419847
|
0
|
16
|
79
|
Weekly face-to-face interactions (meet_by_week)
ggplot(filter(sn_stat1, meet_by_week < 100)) +
geom_histogram(aes(x=meet_by_week)) +
annotate(geom="text", x=75, y=100, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$meet_by_week))), caption = "meet_by_week") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
meet_by_week
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
3.17
|
9.19
|
20.705
|
19.015
|
1000
|
Number of people with whom the participant like to socialize (socialize_size)
ggplot(sn_stat1) +
geom_histogram(aes(x=socialize_size))

kable(t(as.matrix(summary(sn_stat1$socialize_size))), caption = "socialize_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_size
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
1
|
3
|
3.528571
|
5
|
19
|
Weekly face-to-face interactions among people with whom the participant like to socialize (socialize_meet)
ggplot(filter(sn_stat1, socialize_meet < 100)) +
geom_histogram(aes(x=socialize_meet)) +
annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$socialize_meet))), caption = "socialize_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_meet
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
2.0375
|
7
|
14.43443
|
17.895
|
212.85
|
Weekly ICT interactions among people with whom the participant like to socialize (socialize_chat)
ggplot(filter(sn_stat1, socialize_chat < 100)) +
geom_histogram(aes(x=socialize_chat)) +
annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$socialize_chat))), caption = "socialize_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
socialize_chat
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
1.69
|
7
|
4779.766
|
12.44
|
1e+06
|
Number of people with whom the participant discuss important matters (important_size)
ggplot(sn_stat1) +
geom_histogram(aes(x=important_size))

kable(t(as.matrix(summary(sn_stat1$important_size))), caption = "important_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_size
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
1
|
2
|
2.714286
|
4
|
19
|
Weekly face-to-face interactions among people with whom the participant discuss important matters (important_meet)
ggplot(filter(sn_stat1, important_meet < 100)) +
geom_histogram(aes(x=important_meet)) +
annotate(geom="text", x=40, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$important_meet))), caption = "important_meet") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_meet
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
2.0575
|
7
|
17.99005
|
16.9625
|
1000
|
Number of ICT interactions, by week, among people with whom the participant discuss important matters (important_chat)
ggplot(filter(sn_stat1, important_chat < 100)) +
geom_histogram(aes(x=important_chat)) +
annotate(geom="text", x=30, y=30, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat1$important_chat))), caption = "important_chat") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
important_chat
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
2.02
|
7
|
4779.284
|
10.015
|
1e+06
|
Number of groups in the network (nb_groups)
ggplot(sn_stat2) +
geom_histogram(aes(x=nb_groups))

kable(t(as.matrix(summary(sn_stat2$nb_groups))), caption = "nb_groups") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
nb_groups
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
|
0
|
0
|
1
|
1.5
|
2
|
8
|
Number of people in all groups (group_size)
ggplot(filter(sn_stat2, group_size < 100)) +
geom_histogram(aes(x=group_size)) +
annotate(geom="text", x=55, y=15, label="X-axis: values over 100 not displayed", alpha=.5)

kable(t(as.matrix(summary(sn_stat2$group_size))), caption = "group_size") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
group_size
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
2
|
10
|
26
|
83.78195
|
71
|
1294
|
77
|
Simmelian Brokerage calculated on the full network (simmelian)
ggplot(sn_stat2) +
geom_histogram(aes(x=simmelian))

kable(t(as.matrix(summary(sn_stat2$simmelian))), caption = "simmelian") %>% kable_styling(bootstrap_options = "striped", full_width = T, position = "left")
simmelian
|
Min.
|
1st Qu.
|
Median
|
Mean
|
3rd Qu.
|
Max.
|
NA’s
|
|
1
|
2.67
|
5
|
8.115628
|
11.76
|
42.62
|
27
|
Social indicators: Alexandre Naud’s toolbox
See Alex’s document for a more comprehensive presentation of the social indicators.
Number of people in the network (
degree)Number of edges divides by the maximum possible number of edges in the network (
density)Simmelian Brokerage (
simmelian)Standard deviation for network member ages (
age_sd)Does the participant have a spouse (
spouse)Proportion of kin in the network (
prop_kin)Diversity of relation types (
diversity)Number of individuals that are not connected with the spouse (
independant_ties)Weekly face-to-face interactions (
meet_by_week)Weekly interactions through information and communication technologies (
chat_by_week)Number of people with whom the participant like to socialize (
socialize_size)Weekly face-to-face interactions among people with whom the participant like to socialize (
socialize_meet)Weekly ICT interactions among people with whom the participant like to socialize (
socialize_chat)Number of people with whom the participant discuss important matters (
important_size)Weekly face-to-face interactions among people with whom the participant discuss important matters (
important_meet)Number of ICT interactions, by week, among people with whom the participant discuss important matters (
important_chat)Number of groups in the network (
nb_groups)Number of people in all groups (
group_size)Simmelian Brokerage calculated on the full network (
simmelian)