You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
18 KiB
18 KiB
<html>
<head>
</head>
</html>
Model Persistence¶
Imports¶
In [7]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
Data¶
In [8]:
df = pd.read_csv('../DATA/Advertising.csv')
In [172]:
df
Out[172]:
In [173]:
df.describe()
Out[173]:
In [ ]:
Data Preparation¶
In [68]:
X = df.drop('sales',axis=1)
y = df['sales']
In [69]:
from sklearn.model_selection import train_test_split
In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101)
In [71]:
# HOLD OUT SET
In [72]:
# Further split 30% of test into validation and hold-out (15% and 15% each)
X_validation, X_holdout_test, y_validation, y_holdout_test = train_test_split(X_test, y_test, test_size=0.5, random_state=101)
Model Training¶
In [158]:
model = RandomForestRegressor(n_estimators=10,random_state=101)
In [159]:
model.fit(X_train,y_train)
Out[159]:
Model Evaluation¶
In [160]:
validation_predictions = model.predict(X_validation)
In [161]:
from sklearn.metrics import mean_absolute_error,mean_squared_error
In [162]:
mean_absolute_error(y_validation,validation_predictions)
Out[162]:
In [163]:
mean_squared_error(y_validation,validation_predictions)**0.5 #RMSE
Out[163]:
Hyperparameter Tuning¶
In [164]:
model = RandomForestRegressor(n_estimators=35,random_state=101)
model.fit(X_train,y_train)
Out[164]:
In [165]:
validation_predictions = model.predict(X_validation)
In [166]:
mean_absolute_error(y_validation,validation_predictions)
Out[166]:
In [167]:
mean_squared_error(y_validation,validation_predictions)**0.5 #RMSE
Out[167]:
Final Hold Out Test Performance for Reporting¶
In [168]:
model = RandomForestRegressor(n_estimators=35,random_state=101)
model.fit(X_train,y_train)
Out[168]:
In [169]:
test_predictions = model.predict(X_holdout_test)
In [170]:
mean_absolute_error(y_holdout_test,test_predictions)
Out[170]:
In [171]:
mean_squared_error(y_holdout_test,test_predictions)**0.5
Out[171]:
Full Training¶
In [28]:
final_model = RandomForestRegressor(n_estimators=35,random_state=101)
In [30]:
final_model.fit(X,y)
Out[30]:
Saving Model (and anything else as pickle file)¶
In [34]:
import joblib
In [35]:
joblib.dump(final_model,'final_model.pkl')
Out[35]:
In [36]:
X.columns
Out[36]:
In [37]:
list(X.columns)
Out[37]:
In [38]:
joblib.dump(list(X.columns),'column_names.pkl')
Out[38]:
Loading Model (Model Persistence)¶
In [40]:
col_names = joblib.load('column_names.pkl')
In [41]:
col_names
Out[41]:
In [42]:
loaded_model = joblib.load('final_model.pkl')
In [43]:
loaded_model.predict([[230.1,37.8,69.2]])
Out[43]: