Skip to content

Commit

Permalink
formatted decimal number
Browse files Browse the repository at this point in the history
  • Loading branch information
ronylpatil committed Mar 18, 2024
1 parent 22b6134 commit 875fc87
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
8 changes: 4 additions & 4 deletions src/models/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ def train_model(training_feat: np.ndarray, y_true: pd.Series, n_estimators: int,
infologger.info(f'trained {type(model).__name__} model')
y_pred = model.predict(training_feat)
y_pred_prob = model.predict_proba(training_feat)
accuracy = metrics.balanced_accuracy_score(y_true, y_pred)
precision = metrics.precision_score(y_true, y_pred, zero_division = 1, average = 'macro')
recall = metrics.recall_score(y_true, y_pred, average = 'macro')
roc_score = metrics.roc_auc_score(y_true, y_pred_prob, average = 'macro', multi_class = 'ovr')
accuracy = round(metrics.balanced_accuracy_score(y_true, y_pred), 5)
precision = round(metrics.precision_score(y_true, y_pred, zero_division = 1, average = 'macro'), 5)
recall = round(metrics.recall_score(y_true, y_pred, average = 'macro'), 5)
roc_score = round(metrics.roc_auc_score(y_true, y_pred_prob, average = 'macro', multi_class = 'ovr'), 5)

return {'model': model, 'y_pred': y_pred, 'params': {"n_estimator": n_estimators, "criterion": criterion,
"max_depth": max_depth, "seed": random_state},
Expand Down
25 changes: 13 additions & 12 deletions src/models/tune_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import yaml
import pathlib
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
Expand Down Expand Up @@ -33,10 +34,10 @@ def objective(params: dict, yaml_obj: dict, x_train: pd.DataFrame, y_train: pd.S
y_pred = model.predict(x_test)
y_pred_prob = model.predict_proba(x_test)

accuracy = metrics.balanced_accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, zero_division = 1, average = 'macro')
recall = metrics.recall_score(y_test, y_pred, average = 'macro')
roc_score = metrics.roc_auc_score(y_test, y_pred_prob, average = 'macro', multi_class = 'ovr')
accuracy = round(metrics.balanced_accuracy_score(y_test, y_pred), 5)
precision = round(metrics.precision_score(y_test, y_pred, zero_division = 1, average = 'macro'), 5)
recall = round(metrics.recall_score(y_test, y_pred, average = 'macro'), 5)
roc_score = round(metrics.roc_auc_score(y_test, y_pred_prob, average = 'macro', multi_class = 'ovr'), 5)

with mlflow.start_run(description = 'tunning RFC also using hyperopt optimization technique') :
mlflow.set_tags({'project_name': 'wine-quality', 'author' : 'ronil', 'project_quarter': 'Q1-2024'})
Expand Down Expand Up @@ -80,11 +81,11 @@ def main() -> None :
partial_obj = partial(objective, **additional_params)

# we can take the range as input via params.yaml
search_space = {'n_estimators': hp.randint('n_estimators', 200 - 15) + 15,
search_space = {'n_estimators': hp.choice('n_estimators', np.arange(25, 400, dtype = int)),
'criterion': hp.choice('criterion', ['gini', 'entropy']),
'max_depth': hp.randint('max_depth', 100 - 5) + 5,
'min_samples_split': hp.randint('min_samples_split', 100 - 5) + 5,
'min_samples_leaf': hp.randint('min_samples_leaf', 100 - 10) + 10 }
'max_depth': hp.choice('max_depth', np.arange(4, 12, dtype = int)),
'min_samples_split': hp.choice('min_samples_split', np.arange(15, 50, dtype = int)),
'min_samples_leaf': hp.choice('min_samples_leaf', np.arange(15, 100, dtype = int)) }
try :
best_result = fmin(fn = partial_obj,
space = search_space,
Expand All @@ -106,10 +107,10 @@ def main() -> None :
y_pred = best_model.predict(x_test)
y_pred_prob = best_model.predict_proba(x_test)

accuracy = metrics.balanced_accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, zero_division = 1, average = 'macro')
recall = metrics.recall_score(y_test, y_pred, average = 'macro')
roc_score = metrics.roc_auc_score(y_test, y_pred_prob, average = 'macro', multi_class = 'ovr')
accuracy = round(metrics.balanced_accuracy_score(y_test, y_pred), 5)
precision = round(metrics.precision_score(y_test, y_pred, zero_division = 1, average = 'macro'), 5)
recall = round(metrics.recall_score(y_test, y_pred, average = 'macro'), 5)
roc_score = round(metrics.roc_auc_score(y_test, y_pred_prob, average = 'macro', multi_class = 'ovr'), 5)

with mlflow.start_run(description = 'best tunned model') :
mlflow.set_tags({'project_name': 'wine-quality', 'model_status' : 'best_tunned', 'project_quarter': 'Q1-2024'})
Expand Down

0 comments on commit 875fc87

Please sign in to comment.