import pandas as pd
from datetime import *
import dateutil.parser
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import numpy as np


# Returns True when the lat and long of the pickup and dropoff
# are contained in the region.
def inRegion(region_NE_corner_lat, 
            region_NE_corner_long, 
            region_SW_corner_lat, 
            region_SW_corner_long, 
            region_SE_corner_lat, 
            region_SE_corner_long, 
            region_NW_corner_lat, 
            region_NW_corner_long, lat_pickup_points, long_pickup_points,
            lat_drop_points, long_drop_points):
    
    # Obs: we take boxes only...
    boolean_index = np.ones(len(lat_pickup_points), dtype=bool)
    polygon = Polygon([(region_NE_corner_lat, region_NE_corner_long), 
                       (region_NW_corner_lat, region_NW_corner_long), 
                       (region_SE_corner_lat, region_SE_corner_long),
                       (region_SW_corner_lat, region_SW_corner_long)])
    for i in range(len(lat_pickup_points)):
        # print(i)
        point1 = Point(lat_pickup_points[i], long_pickup_points[i])
        point2 = Point(lat_drop_points[i], long_drop_points[i])
        boolean_index[i] = (polygon.contains(point1) and polygon.contains(point2))
        # if(boolean_index[i]):
        #     print((lat_pickup_points[i], long_pickup_points[i]))
        #     print((lat_drop_points[i], long_drop_points[i]))
        #     print(polygon.contains(point1))
        #     print(polygon.contains(point2))
        
    return pd.DataFrame({'a': boolean_index})



date = datetime(2016, 1, 6).date()
taxiData = pd.read_csv('yellow_tripdata_2016-01.csv')

# taxiData = taxiData.head(100)
# print(taxiData) 

# specificDay = taxiData["tpep_pickup_datetime"].map(lambda x: dateutil.parser.parse(x).date())  == date
# taxiData = taxiData[specificDay]

# noTransporters = (taxiData["tpep_pickup_datetime"] < taxiData["tpep_dropoff_datetime"]) & ((taxiData["dropoff_latitude"] != taxiData["pickup_latitude"]) | (taxiData["dropoff_longitude"] != taxiData["pickup_longitude"]))
# print(noTransporters)
# taxiData = taxiData[noTransporters]


# # Midtown
# region = "Midtown"
# region_NE_corner_lat = 40.773175
# region_NE_corner_long = -73.993813
# region_SW_corner_lat = 40.757288
# region_SW_corner_long = -74.005567
# region_SE_corner_lat = 40.743200
# region_SE_corner_long = -73.972172
# region_NW_corner_lat = 40.759118
# region_NW_corner_long = -73.958314

# # UWSide
# region = "UWside"
# region_NE_corner_lat = 40.800671
# region_NE_corner_long =  -73.958329
# region_SW_corner_lat = 40.772352
# region_SW_corner_long = -73.994333
# region_SE_corner_lat = 40.767426
# region_SE_corner_long = -73.982377
# region_NW_corner_lat = 40.805997
# region_NW_corner_long = -73.971221

# UESide
region = "UEside"
region_NE_corner_lat = 40.791259
region_NE_corner_long = -73.934347
region_SW_corner_lat = 40.764372
region_SW_corner_long = -73.972962
region_SE_corner_lat = 40.759042
region_SE_corner_long = -73.957691
region_NW_corner_lat = 40.796858
region_NW_corner_long = -73.949045


lat_pickup_points = taxiData["pickup_latitude"]
long_pickup_points = taxiData["pickup_longitude"]

lat_drop_points = taxiData["dropoff_latitude"]
long_drop_points = taxiData["dropoff_longitude"]

inregion = inRegion(region_NE_corner_lat, 
            region_NE_corner_long, 
            region_SW_corner_lat, 
            region_SW_corner_long, 
            region_SE_corner_lat, 
            region_SE_corner_long, 
            region_NW_corner_lat, 
            region_NW_corner_long, lat_pickup_points, long_pickup_points,
            lat_drop_points, long_drop_points)['a']

taxiData = taxiData[inregion]

# Filter 99 percentil off the top and 0.01 percentil off the bottom
# for both the total_fare and the total_time 

taxiData["total_time"] = (taxiData["tpep_dropoff_datetime"].map(lambda x: dateutil.parser.parse(x)) - taxiData["tpep_pickup_datetime"].map(lambda x: dateutil.parser.parse(x)))/ (np.timedelta64(1, 's') * 60)
taxiData = taxiData[taxiData["total_time"] <
           taxiData["total_time"].quantile(0.99)]
taxiData = taxiData[taxiData["total_time"] >
           taxiData["total_time"].quantile(0.01)]
taxiData = taxiData[taxiData["total_amount"] <
           taxiData["total_amount"].quantile(0.99)]
taxiData = taxiData[taxiData["total_amount"] >
           taxiData["total_amount"].quantile(0.01)]

taxiData.to_csv("Data/CleanedData_"+str(region)+"_"+str(date)+".csv")
# taxiData.to_csv("yellow_tripdata_2016-01.csv")