from __future__ import division
from utils import *


with open('data/LocationHistory/2018/LocationHistory.json', 'r') as location_file:
    raw = json.loads(location_file.read())

# use location_data as an abbreviation for location data
location_data = pd.DataFrame(raw['locations'])
del raw #free up some memory

# convert to typical units
location_data['latitudeE7'] = location_data['latitudeE7']/float(1e7) 
location_data['longitudeE7'] = location_data['longitudeE7']/float(1e7)

# convert timestampMs to seconds
location_data['timestampMs'] = location_data['timestampMs'].map(lambda x: float(x)/1000) 
location_data['datetime'] = location_data.timestampMs.map(datetime.datetime.fromtimestamp)

# Rename fields based on the conversions
location_data.rename(columns={'latitudeE7':'latitude',
                              'longitudeE7':'longitude',
                              'timestampMs':'timestamp'}, inplace=True)

# Ignore locations with accuracy estimates over 1000m
location_data = location_data[location_data.accuracy < 1000]
location_data.reset_index(drop=True, inplace=True)


print(location_data.dtypes)
location_data.describe()

accuracy                     int64
activity                    object
altitude                   float64
heading                    float64
latitude                   float64
longitude                  float64
timestamp                  float64
velocity                   float64
verticalAccuracy           float64
datetime            datetime64[ns]
dtype: object


print("earliest observed date: {}".format(min(location_data["datetime"]).strftime('%m-%d-%Y')))
print("latest observed date: {}".format(max(location_data["datetime"]).strftime('%m-%d-%Y')))

earliest_obs = min(location_data["datetime"]).strftime('%m-%d-%Y')
latest_obs = max(location_data["datetime"]).strftime('%m-%d-%Y')

earliest observed date: 08-17-2013
latest observed date: 02-22-2018


shapefilename = 'data/Seattle_Neighborhoods/WGS84/Neighborhoods'
shp = fiona.open(shapefilename+'.shp')
coords = shp.bounds
shp.close()

width, height = coords[2] - coords[0], coords[3] - coords[1]
extra = 0.01


m = Basemap(
    projection='tmerc', ellps='WGS84',
    lon_0=np.mean([coords[0], coords[2]]),
    lat_0=np.mean([coords[1], coords[3]]),
    llcrnrlon=coords[0] - extra * width,
    llcrnrlat=coords[1] - (extra * height), 
    urcrnrlon=coords[2] + extra * width,
    urcrnrlat=coords[3] + (extra * height),
    resolution='i',  suppress_ticks=True)

_out = m.readshapefile(shapefilename, name='seattle', drawbounds=False, color='none', zorder=2)


# set up a map dataframe
df_map = pd.DataFrame({
    'poly': [Polygon(hood_points) for hood_points in m.seattle],
    'name': [hood['S_HOOD'] for hood in m.seattle_info]
})

# Convert our latitude and longitude into Basemap cartesian map coordinates
mapped_points = [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(location_data['longitude'], 
            location_data['latitude'])]
all_points = MultiPoint(mapped_points)

# Use prep to optimize polygons for faster computation
hood_polygons = (MultiPolygon(list(df_map['poly'].values)))
prepared_polygons = prep(hood_polygons)

# Filter out the points that do not fall within the map we're making
city_points_filter = filter(prepared_polygons.contains, all_points)
city_points_list = list(city_points_filter)


hood_polygons


df_map.tail()


print("total data points in this period: {}".format(len(all_points)))
print("total data points in the city shape file for this period: {}".format(len(city_points_list)))
percentage_in_city = round(len(city_points_list)/len(all_points),2)*100
print("{}% of points this period are in the city shape file".format(percentage_in_city))

total data points in this period: 745660
total data points in the city shape file for this period: 295384
40.0% of points this period are in the city shape file


df_map['hood_count'] = df_map['poly'].map(lambda x: num_of_contained_points(x, city_points_list))
df_map['hood_hours'] = df_map.hood_count/60.0


df_map.sort_values(['hood_count'], ascending=[0]).head()


udistrict_points = round(len(list(filter((df_map['poly'][41]).contains, city_points_list)))/len(city_points_list),2)*100
print("{}% of points this in the city shape file are from the {}".format(udistrict_points, df_map['name'][41]))

92.0% of points this in the city shape file are from the University District


if not os.path.exists('output/seattle_plots'):
    os.makedirs('output/seattle_plots')

hexbin_file = 'seattle_hexbin'
choropleth_file = 'seattle_choropleth'


#%run choropleth.py


# Check out the full post at http://beneathdata.com/how-to/visualizing-my-location-history/
# for more information on the code below

fig = plt.figure(figsize=(6,8))
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

breaks = Natural_Breaks(
    df_map[df_map['hood_hours'].notnull()].hood_hours.values,
    initial=300,
    k=5)

# the notnull method lets us match indices when joining
jb = pd.DataFrame({'jenks_bins': breaks.yb}, index=df_map[df_map['hood_hours'].notnull()].index)
df_map['jenks_bins'] = jb
df_map.jenks_bins.fillna(-1, inplace=True)
jenks_labels = ['Never been here', "> 0 hours"]+["> %d hours"%(perc) for perc in breaks.bins[:-1]]

cmap = plt.get_cmap('Blues')

# draw neighborhoods with grey outlines
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(x, ec='#111111', lw=.8, alpha=1., zorder=4))
pc = PatchCollection(df_map['patches'], match_original=True)

# apply our custom color values onto the patch collection
cmap_list = [cmap(val) for val in (df_map.jenks_bins.values - df_map.jenks_bins.values.min())/(
                  df_map.jenks_bins.values.max()-float(df_map.jenks_bins.values.min()))]
pc.set_facecolor(cmap_list)
ax.add_collection(pc)

# Draw a map scale
m.drawmapscale(coords[0] + 0.12, coords[1],
    coords[0], coords[1], 4., units='mi',
    barstyle='fancy', labelstyle='simple',
    fillcolor1='w', fillcolor2='#000000', fontcolor='#555555',
    fontsize=6, zorder=5, ax=ax)

# ncolors+1 because we're using a "zero-th" color
cbar = custom_colorbar(cmap, ncolors=len(jenks_labels)+1, labels=jenks_labels, shrink=0.5)
cbar.ax.tick_params(labelsize=16)

current_date = time.strftime("printed: %a, %d %b %Y", time.localtime())

ax.set_title("Time Spent in Seattle Neighborhoods",
             fontsize=14, y=1)
ax.text(1.62, -.12, "kivanpolimis.com", color='#555555', fontsize=15, ha='right', transform=ax.transAxes)
ax.text(1.62, -.15, "Collected from {} to {} on Android".format(earliest_obs, latest_obs),
        fontsize=12, ha='right', transform=ax.transAxes)  
ax.text(1.62, -.21, "Geographic data provided by data.seattle.gov \n {}".format(current_date), 
    ha='right', color='#555555', style='italic', transform=ax.transAxes)

plt.savefig('output/seattle_plots/{}.png'.format(choropleth_file), dpi=100,
            frameon=False, bbox_inches='tight', pad_inches=0.5, facecolor='#F2F2F2')


Image("output/seattle_plots/{}.png".format(choropleth_file))


#%run hexbin.py


"""PLOT A HEXBIN MAP OF A LOCATION
"""

fig = plt.figure(figsize=(6,8))
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# draw neighborhood patches from polygons
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(
    x, fc='#555555', ec='#555555', lw=1, alpha=1, zorder=0))

# plot neighborhoods by adding the PatchCollection to the axes instance
ax.add_collection(PatchCollection(df_map['patches'].values, match_original=True))

# the mincnt argument only shows cells with a value >= 1
# The number of hexbins you want in the x-direction
numhexbins = 50
hx = m.hexbin(
    np.array([geom.x for geom in city_points_list]),
    np.array([geom.y for geom in city_points_list]),
    gridsize=(numhexbins, int(numhexbins*height/width)), #critical to get regular hexagon, must stretch to map dimensions
    bins='log', mincnt=1, edgecolor='none', alpha=1.,
    cmap=plt.get_cmap('Blues'))

# Draw the patches again, but this time just their borders (to achieve borders over the hexbins)
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(
    x, fc='none', ec='#FFFF99', lw=1, alpha=1, zorder=1))
ax.add_collection(PatchCollection(df_map['patches'].values, match_original=True))

# Draw a map scale
m.drawmapscale(coords[0] + 0.12, coords[1],
    coords[0], coords[1], 4.,
    units='mi', barstyle='fancy', labelstyle='simple',
    fillcolor1='w', fillcolor2='#555555', fontcolor='#555555',
    fontsize=6, zorder=5, ax=ax)

ax.set_title("Time Spent in Seattle Neighborhoods",
             fontsize=14, y=1)
ax.text(1.26, -.12, "kivanpolimis.com", color='#555555', fontsize=15, ha='right', transform=ax.transAxes)
ax.text(1.26, -.15, "Collected from {} to {} on Android".format(earliest_obs, latest_obs),
        fontsize=12, ha='right', transform=ax.transAxes)  
ax.text(1.26, -.21, "Geographic data provided by data.seattle.gov \n {}".format(current_date), 
    ha='right', color='#555555', style='italic', transform=ax.transAxes)
plt.savefig('output/seattle_plots/{}.png'.format(hexbin_file), dpi=100, frameon=False,
            bbox_inches='tight', pad_inches=0.5, facecolor='#DEDEDE')


Image("output/seattle_plots/{}.png".format(hexbin_file))


print("System and module version information: \n")
print('Python version: \n {} \n'.format(sys.version_info))
print("last updated: {}".format(time.strftime("%a, %d %b %Y %H:%M", time.localtime())))

System and module version information: 

Python version: 
 sys.version_info(major=2, minor=7, micro=14, releaselevel='final', serial=0) 

last updated: Thu, 15 Mar 2018 04:09

	accuracy	altitude	heading	latitude	longitude	timestamp	velocity	verticalAccuracy
count	745660.000000	101260.000000	44100.000000	745660.000000	745660.000000	7.456600e+05	58874.000000	4921.000000
mean	58.997173	67.057525	186.597551	37.748367	-102.506537	1.417774e+09	7.769678	23.099776
std	125.358984	242.209547	101.643968	9.004123	23.609836	3.356510e+07	11.790783	45.139324
min	1.000000	-715.000000	0.000000	13.689757	-123.260751	1.376790e+09	0.000000	2.000000
25%	22.000000	-18.000000	98.000000	29.817569	-122.306596	1.391259e+09	0.000000	2.000000
50%	31.000000	2.000000	181.000000	29.986634	-95.246060	1.413249e+09	1.000000	2.000000
75%	50.000000	60.000000	270.000000	47.664284	-94.995603	1.428049e+09	13.000000	30.000000
max	999.000000	6738.000000	359.000000	50.105984	23.782015	1.519330e+09	208.000000	473.000000

	name	poly
116	North College Park	POLYGON ((7030.064427988187 23557.22768257541,...
117	Maple Leaf	POLYGON ((8132.346719668341 23955.72380106724,...
118	Crown Hill	POLYGON ((5426.118400335106 23570.62258083521,...
119	Greenwood	POLYGON ((5823.505256228594 23565.2412308001, ...
120	Sunset Hill	POLYGON ((2720.630502400637 22739.9815489253, ...

	name	poly	hood_count	hood_hours
41	University District	POLYGON ((10922.33681717942 18686.63646881728,...	271440	4524.000000
31	Wallingford	POLYGON ((6833.376859863444 19104.80491920538,...	7627	127.116667
40	Roosevelt	POLYGON ((9772.031684135412 21918.84443397688,...	2611	43.516667
35	Ravenna	POLYGON ((9870.257357118362 21917.81373987568,...	2487	41.450000
99	Broadway	POLYGON ((8668.473681980502 16293.21876424036,...	1461	24.350000

Seattle Location History

Seattle Location History¶

Overview¶

Setup¶

Google Takeout¶

Install modules¶

Data Wrangling¶

Explore Data¶

Working with Shapefiles in Python¶

Prep data and pare down locations¶

Compute your measurement metric¶

Choropleth Plot¶

Hexbin Map¶

Potential future directions¶