In [15]:
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
import pandas as pd
import branca.colormap as cm
from shapely.geometry import Point
from shapely.geometry import mapping
from shapely.geometry import shape
from shapely import wkt
import numpy as np
import json
import statsmodels.api as sm

In [16]:
census_groups_df = './final/census_features.csv'
df_census = pd.read_csv(census_groups_df)
df_census.head()

Unnamed: 0.1,Unnamed: 0,center,area_m2,avg_stars,choice,no_truck_length,distance_pool,avg_housing_price,foreclosure_count,foreclosure_over_area
0,0,"39.96876598164982,-75.1997268790072",161887,,True,0.0,0.001575,3808.0,0,0.0
1,1,"39.96655556342398,-75.2004472970602",103778,,True,0.0,0.0,6752.0,4,3.9e-05
2,2,"39.96430097635668,-75.18964523537966",43724,,True,0.0,0.008896,7824.0,0,0.0
3,3,"39.9753665759928,-75.2113493640991",108966,,False,0.0,0.001422,5920.0,2,1.8e-05
4,4,"39.972428220268384,-75.20517069878659",142244,,False,2146.077336,0.005254,4064.0,3,2.1e-05


In [22]:
iso_df = './final/iso_features.csv'
iso_df = pd.read_csv(iso_df)
iso_df.head()

Unnamed: 0.1,Unnamed: 0,isochrone,center,area_m2,avg_stars,choice,no_truck_length,distance_pool,avg_housing_price,foreclosure_count,foreclosure_over_area
0,0,421010108001-600-foot,"39.96876598164982,-75.1997268790072",1545242.0,3.708333,True,1458.387879,0.0,5719.0,13,8e-06
1,1,421010108001-1200-foot,"39.96876598164982,-75.1997268790072",7552386.0,3.519802,True,4245.550548,0.0,5152.0,53,7e-06
2,2,421010108001-1800-foot,"39.96876598164982,-75.1997268790072",17370100.0,3.473648,True,8506.44576,0.0,5584.0,80,5e-06
3,3,421010108001-600-car,"39.96876598164982,-75.1997268790072",198873300.0,3.681063,True,209322.298891,0.0,8960.0,1176,6e-06
4,4,421010108001-1200-car,"39.96876598164982,-75.1997268790072",1108233000.0,3.642439,True,835668.097031,0.0,11168.0,2706,2e-06


In [23]:
features = ['area_m2', 'avg_stars', 'choice', 'no_truck_length', 'distance_pool', 'foreclosure_count', 'foreclosure_over_area']
target = 'avg_housing_price'

In [24]:
def run_regression(df, features, target):
    df_clean = df.dropna(subset=features + [target])
    X = df_clean[features].copy()
    for col in ['choice', 'no_truck_length']:
        if col in X.columns and X[col].dtype == bool:
            X[col] = X[col].astype(int)
    X = sm.add_constant(X)
    y = df_clean[target]
    model = sm.OLS(y, X).fit()
    return model


In [27]:
census_model = run_regression(df_census, features, target)
print(census_model.summary())


                            OLS Regression Results                            
Dep. Variable:      avg_housing_price   R-squared:                       0.064
Model:                            OLS   Adj. R-squared:                  0.063
Method:                 Least Squares   F-statistic:                     115.5
Date:                Mon, 12 May 2025   Prob (F-statistic):          1.12e-164
Time:                        16:32:50   Log-Likelihood:            -1.3699e+05
No. Observations:               11917   AIC:                         2.740e+05
Df Residuals:                   11909   BIC:                         2.740e+05
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                  1.075e+

In [32]:
iso_df[['id', 'distance', 'mode']] = iso_df['isochrone'].str.split('-', n=2, expand=True)
unique_combos = iso_df[['distance', 'mode']].drop_duplicates()

for dist, mode in unique_combos.values:
    subset = iso_df[(iso_df['distance'] == dist) & (iso_df['mode'] == mode)]
    model = run_regression(subset, features, target)
    print(f"Isochrone Regression: {dist}-{mode}")
    print(model.summary())


Isochrone Regression: 600-foot
                            OLS Regression Results                            
Dep. Variable:      avg_housing_price   R-squared:                       0.036
Model:                            OLS   Adj. R-squared:                  0.031
Method:                 Least Squares   F-statistic:                     7.008
Date:                Mon, 12 May 2025   Prob (F-statistic):           3.19e-08
Time:                        17:21:29   Log-Likelihood:                -16293.
No. Observations:                1303   AIC:                         3.260e+04
Df Residuals:                    1295   BIC:                         3.264e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------

In [68]:
price_lookup = df_census[['center','avg_housing_price']].rename(
    columns={'avg_housing_price':'census_price'}
)

iso_merged = iso_df.merge(price_lookup, on='center', how='left')

iso_merged = iso_merged.drop(columns=['avg_housing_price'])

iso_merged[['id','distance','m']] = (
    iso_merged['isochrone']
      .str.split('-', n=2, expand=True)
)

features = [
    'area_m2','avg_stars','choice',
    'no_truck_length','distance_pool',
    'foreclosure_count','foreclosure_over_area'
]
target = 'census_price'

def run_reg(df):
    df2 = df.dropna(subset=features + [target])
    X = df2[features].copy()
    X[['choice','no_truck_length']] = X[['choice','no_truck_length']].astype(int)
    X = sm.add_constant(X)
    y = df2[target]
    return sm.OLS(y, X).fit()

full_model = run_reg(iso_merged)
print("=== Full Isochrone Regression (census prices) ===")
print(full_model.summary())

combos = iso_merged[['distance','m']].drop_duplicates()

for dist, md in combos.values:
    sub = iso_merged[
        (iso_merged.distance==dist) &
        (iso_merged.m==md)
    ]
    m = run_reg(sub)
    print(f"\n=== Isochrone {dist}-{md} (census prices) ===")
    print(m.summary())


=== Full Isochrone Regression (census prices) ===
                            OLS Regression Results                            
Dep. Variable:           census_price   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     358.7
Date:                Mon, 12 May 2025   Prob (F-statistic):               0.00
Time:                        20:39:04   Log-Likelihood:            -1.2340e+06
No. Observations:              107118   AIC:                         2.468e+06
Df Residuals:                  107110   BIC:                         2.468e+06
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------