[1]:
import os
import transportation_tutorials as tt
import pandas as pd
import geopandas as gpd
import larch
Census data giving a tabulation of household income, by county in Florida. This data comes from the 2017 5-year ACS, Table B19001.
[2]:
os.path.basename(tt.data('ACS_17_5YR_B19001'))
[2]:
'ACS_17_5YR_B19001.csv.gz'
[3]:
hhinc = pd.read_csv(tt.data('ACS_17_5YR_B19001'), header=[0,1], index_col=0)
[4]:
hhinc.info()
<class 'pandas.core.frame.DataFrame'>
Index: 67 entries, 0500000US12001 to 0500000US12133
Data columns (total 36 columns):
(GEO.id2, Id2) 67 non-null int64
(GEO.display-label, Geography) 67 non-null object
(HD01_VD01, Estimate; Total:) 67 non-null int64
(HD02_VD01, Margin of Error; Total:) 67 non-null int64
(HD01_VD02, Estimate; Total: - Less than $10,000) 67 non-null int64
(HD02_VD02, Margin of Error; Total: - Less than $10,000) 67 non-null int64
(HD01_VD03, Estimate; Total: - $10,000 to $14,999) 67 non-null int64
(HD02_VD03, Margin of Error; Total: - $10,000 to $14,999) 67 non-null int64
(HD01_VD04, Estimate; Total: - $15,000 to $19,999) 67 non-null int64
(HD02_VD04, Margin of Error; Total: - $15,000 to $19,999) 67 non-null int64
(HD01_VD05, Estimate; Total: - $20,000 to $24,999) 67 non-null int64
(HD02_VD05, Margin of Error; Total: - $20,000 to $24,999) 67 non-null int64
(HD01_VD06, Estimate; Total: - $25,000 to $29,999) 67 non-null int64
(HD02_VD06, Margin of Error; Total: - $25,000 to $29,999) 67 non-null int64
(HD01_VD07, Estimate; Total: - $30,000 to $34,999) 67 non-null int64
(HD02_VD07, Margin of Error; Total: - $30,000 to $34,999) 67 non-null int64
(HD01_VD08, Estimate; Total: - $35,000 to $39,999) 67 non-null int64
(HD02_VD08, Margin of Error; Total: - $35,000 to $39,999) 67 non-null int64
(HD01_VD09, Estimate; Total: - $40,000 to $44,999) 67 non-null int64
(HD02_VD09, Margin of Error; Total: - $40,000 to $44,999) 67 non-null int64
(HD01_VD10, Estimate; Total: - $45,000 to $49,999) 67 non-null int64
(HD02_VD10, Margin of Error; Total: - $45,000 to $49,999) 67 non-null int64
(HD01_VD11, Estimate; Total: - $50,000 to $59,999) 67 non-null int64
(HD02_VD11, Margin of Error; Total: - $50,000 to $59,999) 67 non-null int64
(HD01_VD12, Estimate; Total: - $60,000 to $74,999) 67 non-null int64
(HD02_VD12, Margin of Error; Total: - $60,000 to $74,999) 67 non-null int64
(HD01_VD13, Estimate; Total: - $75,000 to $99,999) 67 non-null int64
(HD02_VD13, Margin of Error; Total: - $75,000 to $99,999) 67 non-null int64
(HD01_VD14, Estimate; Total: - $100,000 to $124,999) 67 non-null int64
(HD02_VD14, Margin of Error; Total: - $100,000 to $124,999) 67 non-null int64
(HD01_VD15, Estimate; Total: - $125,000 to $149,999) 67 non-null int64
(HD02_VD15, Margin of Error; Total: - $125,000 to $149,999) 67 non-null int64
(HD01_VD16, Estimate; Total: - $150,000 to $199,999) 67 non-null int64
(HD02_VD16, Margin of Error; Total: - $150,000 to $199,999) 67 non-null int64
(HD01_VD17, Estimate; Total: - $200,000 or more) 67 non-null int64
(HD02_VD17, Margin of Error; Total: - $200,000 or more) 67 non-null int64
dtypes: int64(35), object(1)
memory usage: 19.4+ KB
This shapefile contains the boundaries of counties in Florida.
[5]:
os.path.basename(tt.data('FL-COUNTY-SHAPE', '*.shp'))
[5]:
'Florida_County_Lines.shp'
[6]:
fl_county = gpd.read_file(tt.data('FL-COUNTY-SHAPE'))
[7]:
fl_county.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 616 entries, 0 to 615
Data columns (total 5 columns):
OBJECTID 616 non-null int64
COASTBND 616 non-null object
DATESTAMP 616 non-null object
SHAPELEN 616 non-null float64
geometry 616 non-null object
dtypes: float64(1), int64(1), object(3)
memory usage: 24.1+ KB
[8]:
ax = fl_county.plot()
The following files include data for the entire SERPM8 region.
This shapefile contains geographic data for travel analysis zones (TAZ) in the SERPM region.
[9]:
os.path.basename(tt.data('SERPM8-TAZSHAPE', '*.shp'))
[9]:
'SERPM8TAZ_NAD83_170502.shp'
[10]:
taz = gpd.read_file(tt.data('SERPM8-TAZSHAPE'))
[11]:
taz.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 4236 entries, 0 to 4235
Data columns (total 15 columns):
OBJECTID 4236 non-null int64
TAZ_REG 4236 non-null float64
TAZ_OLD05 4236 non-null int64
TAZ_MPO 4236 non-null int64
COUNTY 4236 non-null float64
CENSUSTAZ 1506 non-null object
TAZ_BF 4236 non-null int64
FIX 4236 non-null int64
AREA 4236 non-null float64
F_NETAREA 4236 non-null float64
CBD 4236 non-null int64
HM_ROOMS 4236 non-null int64
Shape_Leng 4236 non-null float64
Shape_Area 4236 non-null float64
geometry 4236 non-null object
dtypes: float64(6), int64(7), object(2)
memory usage: 496.5+ KB
[12]:
ax = taz.plot()
[13]:
os.path.basename(tt.data('SERPM8-MAZSHAPE', '*.shp'))
[13]:
'SERPM8MAZ_NAD83_170502.shp'
[14]:
maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE', '*.shp'))
maz.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 12022 entries, 0 to 12021
Data columns (total 8 columns):
OBJECTID 12022 non-null int64
MAZ 12022 non-null int64
SHAPE_LENG 12022 non-null float64
SHAPE_AREA 12022 non-null float64
ACRES 12022 non-null int64
POINT_X 12022 non-null int64
POINT_Y 12022 non-null int64
geometry 12022 non-null object
dtypes: float64(2), int64(5), object(1)
memory usage: 751.5+ KB
[15]:
ax = maz.plot()
[16]:
os.path.basename(tt.data('SERPM8-MAZDATA', '*.csv'))
[16]:
'MAZ_DATA.csv'
[17]:
mazd = pd.read_csv(tt.data('SERPM8-MAZDATA', '*.csv'))
[18]:
mazd.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12022 entries, 0 to 12021
Data columns (total 76 columns):
mgra 12022 non-null int64
TAZ 12022 non-null int64
HH 12022 non-null int64
POP 12022 non-null int64
emp_self 12022 non-null int64
emp_ag 12022 non-null int64
emp_const_non_bldg_prod 12022 non-null int64
emp_const_non_bldg_office 12022 non-null int64
emp_utilities_prod 12022 non-null int64
emp_utilities_office 12022 non-null int64
emp_const_bldg_prod 12022 non-null int64
emp_const_bldg_office 12022 non-null int64
emp_mfg_prod 12022 non-null int64
emp_mfg_office 12022 non-null int64
emp_whsle_whs 12022 non-null int64
emp_trans 12022 non-null int64
emp_retail 12022 non-null int64
emp_prof_bus_svcs 12022 non-null int64
emp_prof_bus_svcs_bldg_maint 12022 non-null int64
emp_pvt_ed_k12 12022 non-null int64
emp_pvt_ed_post_k12_oth 12022 non-null int64
emp_health 12022 non-null int64
emp_personal_svcs_office 12022 non-null int64
emp_amusement 12022 non-null int64
emp_hotel 12022 non-null int64
emp_restaurant_bar 12022 non-null int64
emp_personal_svcs_retail 12022 non-null int64
emp_religious 12022 non-null int64
emp_pvt_hh 12022 non-null int64
emp_state_local_gov_ent 12022 non-null int64
emp_scrap_other 12022 non-null int64
emp_fed_non_mil 12022 non-null int64
emp_fed_mil 12022 non-null int64
emp_state_local_gov_blue 12022 non-null int64
emp_state_local_gov_white 12022 non-null int64
emp_public_ed 12022 non-null int64
emp_own_occ_dwell_mgmt 12022 non-null int64
emp_fed_gov_accts 12022 non-null int64
emp_st_lcl_gov_accts 12022 non-null int64
emp_cap_accts 12022 non-null int64
emp_total 12022 non-null int64
collegeEnroll 12022 non-null int64
otherCollegeEnroll 12022 non-null int64
AdultSchEnrl 12022 non-null int64
EnrollGradeKto8 12022 non-null int64
EnrollGrade9to12 12022 non-null int64
PrivateEnrollGradeKto8 12022 non-null int64
ech_dist 12022 non-null int64
hch_dist 12022 non-null int64
parkarea 12022 non-null int64
hstallsoth 12022 non-null int64
hstallssam 12022 non-null int64
hparkcost 12022 non-null int64
numfreehrs 12022 non-null int64
dstallsoth 12022 non-null int64
dstallssam 12022 non-null int64
dparkcost 12022 non-null int64
mstallsoth 12022 non-null int64
mstallssam 12022 non-null int64
mparkcost 12022 non-null float64
TotInt 12022 non-null int64
DUDen 12022 non-null float64
EmpDen 12022 non-null float64
PopDen 12022 non-null float64
RetEmpDen 12022 non-null float64
IntDenBin 12022 non-null int64
EmpDenBin 12022 non-null int64
DuDenBin 12022 non-null int64
POINT_X 12022 non-null int64
POINT_Y 12022 non-null int64
ACRES 12022 non-null int64
HotelRoomTotal 12022 non-null int64
mall_flag 12022 non-null int64
beachAcres 12022 non-null int64
geoSRate 12022 non-null int64
geoSRateNm 12022 non-null int64
dtypes: float64(5), int64(71)
memory usage: 7.0 MB
[19]:
os.path.basename(tt.data('SERPM8-BASE2015-AM_HLOAD'))
[19]:
'SERPM8-BASE2015-AM_HLOAD.csv.gz'
[20]:
hwy = pd.read_csv(tt.data('SERPM8-BASE2015-AM_HLOAD'))
[21]:
hwy.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50865 entries, 0 to 50864
Columns: 228 entries, Unnamed: 0 to LW_RCTOLLPK
dtypes: float64(94), int64(134)
memory usage: 88.5 MB
The following files include synthetic data that has been generated for the Jupiter Example Study Area. All data has been generated using the 2015 base year scenario. Note that while this data is representative of the data from real people, this is purely synthetic data, and does not show the actual behavior of any real person.
Data on synthetic households residing in the Jupiter study area.
[22]:
os.path.basename(tt.data('SERPM8-BASE2015-HOUSEHOLDS'))
[22]:
'SERPM8-BASE2015-HOUSEHOLDS.csv.gz'
[23]:
hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS'))
hh.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18178 entries, 0 to 18177
Data columns (total 10 columns):
Unnamed: 0 18178 non-null int64
hh_id 18178 non-null int64
home_mgra 18178 non-null int64
income 18178 non-null int64
autos 18178 non-null int64
transponder 18178 non-null int64
cdap_pattern 18178 non-null object
jtf_choice 18178 non-null int64
autotech 18178 non-null int64
tncmemb 18178 non-null int64
dtypes: int64(9), object(1)
memory usage: 1.4+ MB
Data on synthetic persons residing in the Jupiter study area.
[24]:
os.path.basename(tt.data('SERPM8-BASE2015-PERSONS'))
[24]:
'SERPM8-BASE2015-PERSONS.csv.gz'
[25]:
pers = pd.read_csv(tt.data('SERPM8-BASE2015-PERSONS'))
pers.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40683 entries, 0 to 40682
Data columns (total 13 columns):
hh_id 40683 non-null int64
person_id 40683 non-null int64
person_num 40683 non-null int64
age 40683 non-null int64
gender 40683 non-null object
type 40683 non-null object
value_of_time 40683 non-null float64
activity_pattern 40683 non-null object
imf_choice 40683 non-null int64
inmf_choice 40683 non-null int64
fp_choice 40683 non-null int64
reimb_pct 40683 non-null float64
wrkr_type 40683 non-null int64
dtypes: float64(2), int64(8), object(3)
memory usage: 4.0+ MB
All individual tours made by synthetic persons residing in the Jupiter study area. Note this includes home-based tours that have destinations outside the study area, as well as work-based tours that may be entirely outside the study area.
[26]:
os.path.basename(tt.data('SERPM8-BASE2015-TOURS'))
[26]:
'SERPM8-BASE2015-TOURS.csv.gz'
[27]:
hh = pd.read_csv(tt.data('SERPM8-BASE2015-TOURS'))
hh.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47485 entries, 0 to 47484
Data columns (total 61 columns):
hh_id 47485 non-null int64
person_id 47485 non-null int64
person_num 47485 non-null int64
person_type 47485 non-null int64
tour_id 47485 non-null int64
tour_category 47485 non-null object
tour_purpose 47485 non-null object
orig_mgra 47485 non-null int64
dest_mgra 47485 non-null int64
start_period 47485 non-null int64
end_period 47485 non-null int64
tour_mode 47485 non-null int64
tour_distance 47485 non-null float64
tour_time 47485 non-null float64
atWork_freq 47485 non-null int64
num_ob_stops 47485 non-null int64
num_ib_stops 47485 non-null int64
out_btap 47485 non-null int64
out_atap 47485 non-null int64
in_btap 47485 non-null int64
in_atap 47485 non-null int64
util_1 47485 non-null float64
util_2 47485 non-null float64
util_3 47485 non-null float64
util_4 47485 non-null float64
util_5 47485 non-null float64
util_6 47485 non-null float64
util_7 47485 non-null float64
util_8 47485 non-null float64
util_9 47485 non-null float64
util_10 47485 non-null float64
util_11 47485 non-null float64
util_12 47485 non-null float64
util_13 47485 non-null float64
util_14 47485 non-null float64
util_15 47485 non-null float64
util_16 47485 non-null float64
util_17 47485 non-null float64
util_18 47485 non-null float64
util_19 47485 non-null float64
util_20 47485 non-null float64
prob_1 47485 non-null float64
prob_2 47485 non-null float64
prob_3 47485 non-null float64
prob_4 47485 non-null float64
prob_5 47485 non-null float64
prob_6 47485 non-null float64
prob_7 47485 non-null float64
prob_8 47485 non-null float64
prob_9 47485 non-null float64
prob_10 47485 non-null float64
prob_11 47485 non-null float64
prob_12 47485 non-null float64
prob_13 47485 non-null float64
prob_14 47485 non-null float64
prob_15 47485 non-null float64
prob_16 47485 non-null float64
prob_17 47485 non-null float64
prob_18 47485 non-null float64
prob_19 47485 non-null float64
prob_20 47485 non-null float64
dtypes: float64(42), int64(17), object(2)
memory usage: 22.1+ MB
All trips on individual tours made by synthetic persons residing in the Jupiter study area. Note this includes trips that have origins or destinations outside the study area.
[28]:
os.path.basename(tt.data('SERPM8-BASE2015-TRIPS'))
[28]:
'SERPM8-BASE2015-TRIPS.csv.gz'
[29]:
trips = pd.read_csv(tt.data('SERPM8-BASE2015-TRIPS'))
trips.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123874 entries, 0 to 123873
Data columns (total 20 columns):
hh_id 123874 non-null int64
person_id 123874 non-null int64
person_num 123874 non-null int64
tour_id 123874 non-null int64
stop_id 123874 non-null int64
inbound 123874 non-null int64
tour_purpose 123874 non-null object
orig_purpose 123874 non-null object
dest_purpose 123874 non-null object
orig_mgra 123874 non-null int64
dest_mgra 123874 non-null int64
parking_mgra 123874 non-null int64
stop_period 123874 non-null int64
trip_mode 123874 non-null int64
trip_board_tap 123874 non-null int64
trip_alight_tap 123874 non-null int64
tour_mode 123874 non-null int64
smplRate_geo 123874 non-null float64
autotech 123874 non-null int64
tncmemb 123874 non-null int64
dtypes: float64(1), int64(16), object(3)
memory usage: 18.9+ MB
The tutorial data contains a set of highway skims for the Jupiter example area, in openmatrix (omx) format.
[30]:
os.path.basename(tt.data('SERPM8-JUPITER-AMHSKIMS'))
[30]:
'SERPM8-JUPITER-AMHSKIMS.omx'
[31]:
skims = larch.OMX(tt.data('SERPM8-JUPITER-AMHSKIMS'))
skims
[31]:
<larch.OMX> ⋯/SERPM8-JUPITER-AMHSKIMS.omx
| shape:(220, 220)
| data:
| AM_DAT_DIST (float64)
| AM_DAT_FFTIME (float64)
| AM_DAT_TIME (float64)
| AM_DAT_TOLLCOST (float64)
| AM_DAT_TOLLDIST (float64)
| AM_GP_DIST (float64)
| AM_GP_FFTIME (float64)
| AM_GP_TIME (float64)
| AM_S2NH_DIST (float64)
| AM_S2NH_FFTIME (float64)
| AM_S2NH_HOVDIST (float64)
| AM_S2NH_TIME (float64)
| AM_S2TH_DIST (float64)
| AM_S2TH_FFTIME (float64)
| AM_S2TH_HOVDIST (float64)
| AM_S2TH_TIME (float64)
| AM_S2TH_TOLLCOST (float64)
| AM_S2TH_TOLLDIST (float64)
| lookup:
| TAZ_ID (220 int64)
Exampville is an entirely fictional town built for the express purpose of demostrating the use of discrete choice modeling tools for transportation planning. The exampville data files are packaged with Larch
, an open source package for discrete choice models.
[32]:
import larch.exampville
The shapefile that includes a map of the travel analysis zones in Exampville is stored in a zip file.
[33]:
os.path.basename(larch.exampville.files.shapefile)
[33]:
'exampville_taz.zip'
Geopandas can open and read this data directly with unzipping it on disk first, by including the “zip://” protocol in front of the filename.
[34]:
taz_shape = gpd.read_file("zip://"+larch.exampville.files.shapefile)
[35]:
taz_shape.plot(edgecolor='k');
[36]:
os.path.basename(larch.exampville.files.employment)
[36]:
'exampville_employment.csv.gz'
[37]:
emp = pd.read_csv(larch.exampville.files.employment, index_col='TAZ')
[38]:
emp.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 40 entries, 1 to 40
Data columns (total 3 columns):
NONRETAIL_EMP 40 non-null int64
RETAIL_EMP 40 non-null int64
TOTAL_EMP 40 non-null int64
dtypes: int64(3)
memory usage: 1.2 KB
The network skims for Exampville are stored in open matrix format. Larch includes a OMX reader, which also embeds a number of handy tools for processing OMX data into formats useful for discrete choice analysis.
[39]:
os.path.basename(larch.exampville.files.skims)
[39]:
'exampville_skims.omx'
[40]:
skims = larch.OMX( larch.exampville.files.skims, mode='r' )
skims
[40]:
<larch.OMX> ⋯/exampville_skims.omx
| shape:(40, 40)
| data:
| AUTO_COST (float64)
| AUTO_DIST (float64)
| AUTO_TIME (float64)
| BIKE_TIME (float64)
| TRANSIT_FARE (float64)
| TRANSIT_IVTT (float64)
| TRANSIT_OVTT (float64)
| WALK_DIST (float64)
| WALK_TIME (float64)
| lookup:
| TAZ_ID (40 int64)
[41]:
os.path.basename(larch.exampville.files.hh)
[41]:
'exampville_households.csv.gz'
[42]:
hh = pd.read_csv( larch.exampville.files.hh )
[43]:
hh.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
X 5000 non-null float64
Y 5000 non-null float64
INCOME 5000 non-null int64
geometry 5000 non-null object
HOMETAZ 5000 non-null int64
HHSIZE 5000 non-null int64
HHID 5000 non-null int64
dtypes: float64(2), int64(4), object(1)
memory usage: 273.5+ KB
[44]:
os.path.basename(larch.exampville.files.person)
[44]:
'exampville_persons.csv.gz'
[45]:
pp = pd.read_csv( larch.exampville.files.person )
[46]:
pp.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9146 entries, 0 to 9145
Data columns (total 8 columns):
PERSONID 9146 non-null int64
HHID 9146 non-null int64
HHIDX 9146 non-null int64
AGE 9146 non-null int64
WORKS 9146 non-null int64
N_WORK_TOURS 9146 non-null int64
N_OTHER_TOURS 9146 non-null int64
N_TOURS 9146 non-null int64
dtypes: int64(8)
memory usage: 571.7 KB
[47]:
os.path.basename(larch.exampville.files.tour)
[47]:
'exampville_tours.csv.gz'
[48]:
tour = pd.read_csv( larch.exampville.files.tour )
[49]:
tour.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15934 entries, 0 to 15933
Data columns (total 6 columns):
TOURID 15934 non-null int64
HHID 15934 non-null int64
PERSONID 15934 non-null int64
DTAZ 15934 non-null int64
TOURMODE 15934 non-null int64
TOURPURP 15934 non-null int64
dtypes: int64(6)
memory usage: 747.0 KB
[50]:
os.path.basename(tt.data('THIS-FILE-IS-CORRUPT'))
[50]:
'THIS-FILE-IS-CORRUPT.csv.gz'
[51]:
import gzip
with gzip.open(tt.data('THIS-FILE-IS-CORRUPT'), 'rb') as f:
print(f.read())
b'"Name","2019 Population","Growth Since 2010"\n"Miami-Dade County",2751796,9.754517738314988\n"Broward County",1935878,10.438146461848802\n"Palm Beach County",1471150,11.134781186520442\n"Hillsborough County",1408566,14.187346024545278\n"Orange County",1348975,17.43533537158656\n"Pinellas County",970637,5.908084103481755\n"Duval County",937934,8.349130546785744\n"Lee County",739224,19.13993814336621\n"Polk County",686483,13.819128387508599\n"Brevard County",589162,8.306616468373605\n"Volusia County",538692,8.949041751947647\n"Pasco County",525643,12.922511761799399\n"Seminole County",462659,9.358845567465991\n"Sarasota County",419119,10.310730002316134\n"Manatee County",385571,19.209804630857747\n"Collier County",372880,15.585506554536408\n"Marion County",354353,6.94414264329518\n"Osceola County",352180,30.50857507077953\n"Lake County",346017,16.228535726762146\n"Escambia County",313512,5.19266128924022\n"St. Lucie Count\x97\xef\x88\xf5;\xd6\r\xa7\xf3\x9d\xd9\xf4\xbd\xaeZt\xb2\x88\xfb\xc8(\xb5\xb4\r\xf1%\xc05\xb4BM\xc1\xe3\xb4\xad\xd45\xb1:M\x00s|\x0f\xd9\x0eA\xb5\xa4\xcb>\xc28\xeb\x89\x9b\x1aR\xb8v\xc7\xc8\x84\x804"^\xf2\xd0\xad\x8ar\x0c\xf59\xdfU\xad\xafo\xae\xe2\x0c\xc4\xce94\xba\xdco\xdf<\x840\r\x97#\xaf\xe0\xcb\xcbf\xed\x02\xbe\xf1\x18\xb8Fm\x80\xb0:3\x1d\x02'