This experiment has been carried out by simulating myself as an UberEats/DoorDash driver for three different trips and restaurants. Each trip performed five activities (WALKING, DRIVING, SITTING, STANDING, LAYING) with a smartphone (iPhone X). Using its embedded sensors GPS, IMI, etc., captured the data during entire duration of each trip at a constant rate of 1 Hz. For each record it is provided:
The datasets are preprocessed and manually labeled to create dataset to use for training and test dataset with below data points
Below are the frameworks used
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('processedDataset/onDemandFoodDelivery_AllTrips.csv')
df.head()
df.info()
df['label'].value_counts()
from math import radians, cos, sin, asin, sqrt
def haversine(lat1,lon1,lat2,lon2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
All args must be of equal length.
"""
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6371 * c # Radius of earth in kilometers. Use 3956 for mi
meters = km
return meters
X = df.locationSpeed
Y = df.activity
rlat = df['restLatitude']
rlong = df['restLongitude']
driverlat = df['locationLatitude']
driverlong = df['locationLongitude']
dlat = df['destLatitude']
dlong = df['destLongitude']
Z = haversine(rlat, rlong, driverlat, driverlong)
W = haversine(dlat, dlong, driverlat, driverlong)
plt.plot(X, label="Speed")
plt.plot(Y, label="Activity")
plt.plot(Z, label="Rstaurt Distance")
plt.plot(W, label="Dest Distance")
# Add legend
plt.legend(loc='upper left', fontsize=18)
# Add title and x, y labels
plt.title("Line Plot Sensor Data", fontsize=18, fontweight='bold')
plt.xlabel("Time", fontsize=18, fontweight='bold')
plt.ylabel("Sensor Values", fontsize=18, fontweight='bold')
plt.rcParams["figure.figsize"] = [16*2,9*2]
plt.show()
df.label.unique()
import seaborn as sns
grid = sns.pairplot(df, hue='label', vars=['locationSpeed', 'activity', 'restDistance', 'destDistance'])
grid.fig.suptitle('OnDemand Food Delivery Time Series Data');
features = df.drop('label', axis=1)
X = pd.get_dummies(features)
X.head()
df['label'].value_counts()
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(df['label'])
le.classes_
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=0)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model.score(X_train, y_train)
model.score(X_test, y_test)
from sklearn.metrics import confusion_matrix, classification_report
confusion_matrix(y_test, y_pred)
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth=2, random_state=33)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
model.score(X_train, y_train)
model.score(X_test, y_test)
df['label'].value_counts()/len(df['label'])
confusion_matrix(y_test, y_pred)
model = DecisionTreeClassifier(max_depth=5, random_state=42)
model.fit(X_train, y_train)
model.score(X_test, y_test)
model = DecisionTreeClassifier(max_depth=5, min_samples_leaf=5)
model.fit(X_train, y_train)
model.score(X_test, y_test)
depths = range(1, 20)
scores_test = []
scores_train = []
for d in depths:
model = DecisionTreeClassifier(max_depth=d)
model.fit(X_train, y_train)
s_train = model.score(X_train, y_train)
s_test = model.score(X_test, y_test)
scores_train.append(s_train)
scores_test.append(s_test)
plt.plot(scores_train, label="TrainScore")
plt.plot(scores_test, label="TestScore")
plt.legend(loc='upper left', fontsize=18)
plt.ylabel('Scores', fontsize=18, fontweight='bold')
plt.xlabel('Max Depth', fontsize=18, fontweight='bold')