Supporting a New Model

Explore how to support a new regression model in your ML pipeline by integrating the AutoMPG dataset and LinearRegression module. Understand the necessary code changes such as updating dataset and model factories, adjusting config files, and running training with debug options to effectively extend pipeline capabilities.

We'll cover the following...

Supporting a new model
Putting it all together

Python 3.8

from typing import TYPE_CHECKING, Dict, Tuple
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from joblib import load, dump
if TYPE_CHECKING:
    import logging
    import pandas as pd
    from omegaconf import DictConfig
from ml_pipeline.mixins.reporting_mixin import ReportingMixin
from ml_pipeline.mixins.training_mixin import TrainingMixin
from ml_pipeline.model import Model
class AutoMPGRegressor(TrainingMixin, Model, ReportingMixin):
    def __init__(
        self,
        model_params: "DictConfig",
        training_params: "DictConfig",
        artifact_dir: str,
        logger: "logging.Logger" = None,
    ) -> None:
        self.model = LinearRegression(**model_params)
        self.training_params = training_params
        self.artifact_dir = artifact_dir
        self.logger = logger
    def load(self, model_path: str) -> None:
        self.model = load(model_path)
    def _encode_train_data(
        self, X: "pd.DataFrame" = None, y: "pd.Series" = None
    ) -> Tuple["pd.DataFrame", "pd.Series"]:
        # in this example, we don't do any encoding
        return X, y
    def _encode_test_data(
        self, X: "pd.DataFrame" = None, y: "pd.Series" = None
    ) -> Tuple["pd.DataFrame", "pd.Series"]:
        # in this example, we don't do any encoding
        return X, y
    def _compute_metrics(
        self, y_true: "pd.Series", y_pred: "pd.Series"
    ) -> Dict:
        self.metrics = {}
        self.metrics["mean_squared_error"] = mean_squared_error(y_true, y_pred)
        self.metrics["r2_score"] = r2_score(y_true, y_pred)
    def create_report(self) -> None:
        self.save_metrics()
    def save(self) -> None:
        filename = f"{self.artifact_dir}/model.joblib"
        dump(self.model, filename)
        self.logger.debug(f"Saved {filename}.")
    def predict(self, X: "pd.DataFrame") -> int:
        return self.model.predict(X)

1.Introduction

2.Getting Started

3.Structuring the ML Pipeline

4.Directed Acyclic Graphs (DAGs)

5.The ML Library

Project

6.The Pipeline Core

7.Extending the Pipeline

Project

8.Testing

9.Deployment

10.Other Considerations

11.Wrapping Up

12.Appendix

Assessment

Supporting a New Model

Supporting a new model