gramex.ml_api

search_modelclass(mclass)

Search for an ML algorithm or estimator by its name within supported modules.

Each estimator also comes with a wrapper class through which MLHandler can use it. Wrapper classes are: 1. SklearnModel (for subclasses of sklearn.base.{ClassifierMixin, RegressorMixin}) 2. SklearnTransformer (for subclasses of sklearn.base.TransformerMixin) 3. StatsModel (for statsmodels)

Parameters
str

Name of a model / estimator / algorithm

Returns

tuple of class, wrapper

Raises

ImportError If the required class is not found anywhere in the supported modules.

Example

klass, wrapper = search_modelclass(‘LogisticRegression’) print(klass) print(wrapper)

Source code in gramex\ml_api.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def search_modelclass(mclass: str) -> Any:
    """Search for an ML algorithm or estimator by its name within supported modules.

    Each estimator also comes with a wrapper class through which MLHandler can use it.
    Wrapper classes are:
    1. SklearnModel (for subclasses of sklearn.base.{ClassifierMixin, RegressorMixin})
    2. SklearnTransformer (for subclasses of sklearn.base.TransformerMixin)
    3. StatsModel (for statsmodels)

    Parameters
    ----------
    mclass : str
        Name of a model / estimator / algorithm

    Returns
    -------
    tuple of class, wrapper

    Raises
    ------
    ImportError
        If the required class is not found anywhere in the supported modules.

    Example
    -------
    >>> klass, wrapper = search_modelclass('LogisticRegression')
    >>> print(klass)
    <class 'sklearn.linear_model._logistic.LogisticRegression'>
    >>> print(wrapper)
    <class 'gramex.ml_api.SklearnModel'>
    """
    for wrapper, modules in SEARCH_MODULES.items():
        klass = locate(mclass, modules)
        if klass:
            return klass, wrapper
    raise ImportError(f"{mclass} not found.")

coerce_model_params(mclass, params)

Coerce a dictionary of parameters into the types expected by a given class constructor.

This is typically used when hyperparamters are set through HTTP request bodies.

Parameters
str

Name of a model / estimator / algorithm

dict

A dictionary containing named parameters and their values used to instantiate mlclass.

Returns

dict A copy of params, with values typecasted into the types expected by mlclass.

Example

params = {“C”: “1.0”, max_iter: “100”} # Values are strings coerce_model_params(‘LogisticRegression’, params) {“C”: 1.0, max_iter: 100} # Values are numbers

Source code in gramex\ml_api.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def coerce_model_params(mclass: str, params: dict) -> dict:
    """Coerce a dictionary of parameters into the types expected by a given class constructor.

    This is typically used when hyperparamters are set through HTTP request bodies.

    Parameters
    ----------
    mclass : str
        Name of a model / estimator / algorithm
    params : dict
        A dictionary containing named parameters and their values used to instantiate `mlclass`.

    Returns
    -------
    dict
        A copy of `params`, with values typecasted into the types expected by `mlclass`.

    Example
    -------
    >>> params = {"C": "1.0", max_iter: "100"}  # Values are strings
    >>> coerce_model_params('LogisticRegression', params)
    {"C": 1.0, max_iter: 100}  # Values are numbers
    """
    if not params:
        return {}
    model, _ = search_modelclass(mclass)
    validated = {}
    sig_params = signature(model).parameters
    for param in sig_params & params.keys():
        val = params.pop(param)
        _sig_p = sig_params[param]
        annotation = _sig_p.annotation
        val = annotation(val) if annotation is not _empty else type(_sig_p.default)(val)
        validated[param] = val
    return validated

assemble_pipeline(data, target_col, model, nums=None, cats=None, kwargs)

Create an sklearn pipeline to preprocess features.

Parameters
pd.DataFrame

The training data.

str

The column name of the target, must be present in data.

sklearn.base.BaseEstimator

The sklearn estimator which is fitted at the end of the pipeline, after the preprocessing.

list

Numerical columns in data, to be StandardScaled with the pipeline.

list

Categorical columns in data, to be OneHotEncoded with the pipeline.

kwargs : Additional parameters for the model constructor.

Returns

sklearn.pipeline.Pipleline An sklearn pipeline containing two steps. 1. A sklearn.compose.ColumnTransformer step that one-hot encodes categorical variables, and StandardScales numerical ones. 2. An estimator

Example

df = pd.read_csv(‘superstore-sales.csv’, usecols=[‘region’, ‘discount’, ‘profit’]) assemble_pipeline(df, ‘profit’, ‘LogisticRegression’, nums=[‘discount’], cats=[‘region’]) Pipeline(steps=[(‘transform’, ColumnTransformer(transformers=[(‘ohe’, OneHotEncoder(sparse=False), [‘region’]), (‘scaler’, StandardScaler(), [‘discount’])])), (‘LogisticRegression’, LogisticRegression())])

Source code in gramex\ml_api.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def assemble_pipeline(
    data: pd.DataFrame,
    target_col: str,
    model: Union[BaseEstimator, str],
    nums: Optional[str] = None,
    cats: Optional[str] = None,
    **kwargs,
) -> Pipeline:
    """Create an sklearn pipeline to preprocess features.

    Parameters
    ----------
    data : pd.DataFrame
        The training data.
    target_col : str
        The column name of the target, must be present in `data`.
    model : sklearn.base.BaseEstimator
        The sklearn estimator which is fitted at the end of the pipeline, after the preprocessing.
    nums : list
        Numerical columns in `data`, to be StandardScaled with the pipeline.
    cats : list
        Categorical columns in `data`, to be OneHotEncoded with the pipeline.
    kwargs : Additional parameters for the model constructor.

    Returns
    -------
    sklearn.pipeline.Pipleline
        An sklearn pipeline containing two steps.
        1. A `sklearn.compose.ColumnTransformer` step that one-hot encodes categorical variables,
           and StandardScales numerical ones.
        2. An estimator

    Example
    -------
    >>> df = pd.read_csv('superstore-sales.csv', usecols=['region', 'discount', 'profit'])
    >>> assemble_pipeline(df, 'profit', 'LogisticRegression', nums=['discount'], cats=['region'])
    Pipeline(steps=[('transform',
                    ColumnTransformer(transformers=[('ohe',
                                                     OneHotEncoder(sparse=False),
                                                     ['region']),
                                                    ('scaler', StandardScaler(),
                                                     ['discount'])])),
                    ('LogisticRegression', LogisticRegression())])
    """
    if isinstance(model, str):
        model, _ = search_modelclass(model)(**kwargs)
    nums = set(nums) - {target_col} if nums else set()
    cats = set(cats) - {target_col} if cats else set()
    both = nums & cats
    if len(both) > 0:
        raise ValueError(f"Columns {both} cannot be both numerical and categorical.")
    to_guess = set(data.columns.tolist()) - nums.union(cats) - {target_col}
    numericals = list(nums)
    categoricals = list(cats)
    for c in to_guess:
        if pd.api.types.is_numeric_dtype(data[c]):
            numericals.append(c)
        else:
            categoricals.append(c)

    ct = ColumnTransformer(
        [
            ("ohe", OneHotEncoder(sparse=False), categoricals),
            ("scaler", StandardScaler(), numericals),
        ]
    )
    return Pipeline([("transform", ct), (model.__class__.__name__, model)])

ModelStore(path, args, kwargs)

A hybrid version of keystore that stores models, data and parameters.

Source code in gramex\ml_api.py
199
200
201
202
203
204
def __init__(self, path, *args, **kwargs):
    _mkdir(path)
    self.data_store = op.join(path, "data.h5")
    self.model_path = op.join(path, "model.pkl")
    self.path = path
    super(ModelStore, self).__init__(op.join(path, "config.json"), *args, **kwargs)

AbstractModel

Abstract base class for all models supported by MLHandler. MLHandler will assume ONLY this interface.

fit(args, kwargs) abstractmethod

Fit the model.

Ensure that all variations like partial_fit, or fit called without a target, etc, are sufficiently handled by the concrete implementations.

Source code in gramex\ml_api.py
248
249
250
251
252
253
254
@abstractmethod
def fit(self, *args, **kwargs) -> Any:
    """Fit the model.

    Ensure that all variations like partial_fit, or fit called without a target, etc,
    are sufficiently handled by the concrete implementations.
    """

predict(args, kwargs) abstractmethod

Get a prediction as a pandas Series.

Source code in gramex\ml_api.py
256
257
258
@abstractmethod
def predict(self, *args, **kwargs) -> pd.Series:
    """Get a prediction as a pandas Series."""

get_params(args, kwargs) abstractmethod

Get the (hyper)parameters of the model.

Source code in gramex\ml_api.py
260
261
262
@abstractmethod
def get_params(self, *args, **kwargs) -> dict:
    """Get the (hyper)parameters  of the model."""

score(args, kwargs) abstractmethod

Score the model against some y_true.

Source code in gramex\ml_api.py
264
265
266
@abstractmethod
def score(self, *args, **kwargs) -> float:
    """Score the model against some y_true."""

get_attributes(args, kwargs) abstractmethod

Get the learned attributes of the model.

Source code in gramex\ml_api.py
268
269
270
@abstractmethod
def get_attributes(self, *args, **kwargs) -> dict:
    """Get the _learned_ attributes of the model."""

SklearnModel(model, data=None, target_col=None, nums=None, cats=None, params=None, kwargs)

SklearnModel.

Source code in gramex\ml_api.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
def __init__(
    self,
    model: Any,
    data: Optional[pd.DataFrame] = None,
    target_col: Optional[str] = None,
    nums: Optional[List[str]] = None,
    cats: Optional[List[str]] = None,
    params: Any = None,
    **kwargs,
):
    if not isinstance(model, Pipeline) and any([nums, cats]):
        self.model = assemble_pipeline(data, target_col, model, nums, cats, **kwargs)
    else:
        self.model = model
    self.kwargs = kwargs

fit(X, y, model_path='', name='', kwargs)

Fit the model.

Parameters
array-like

Training features.

array-like

Training labels

str, optional

If specified, the model is saved at this path.

str, optional

Name of the handler instance calling this method.

kwargs : Additional parameters for model.fit

Source code in gramex\ml_api.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
def fit(
    self,
    X: Union[pd.DataFrame, np.ndarray],
    y: Union[pd.Series, np.ndarray],
    model_path: str = "",
    name: str = "",
    **kwargs,
):
    """Fit the model.

    Parameters
    ----------
    X : array-like
        Training features.
    y : array-like
        Training labels
    model_path : str, optional
        If specified, the model is saved at this path.
    name : str, optional
        Name of the handler instance calling this method.
    kwargs : Additional parameters for `model.fit`
    """
    app_log.info("Starting training...")
    try:
        result = self._fit(X, y)
        app_log.info("Done training...")
    except Exception as exc:
        app_log.exception(exc)
        return self.model
    if model_path:
        joblib.dump(self.model, model_path)
        app_log.info(f"{name}: Model saved at {model_path}.")
    return result

predict(X, target_col='', kwargs)

Get a prediction.

Parameters
array-like

Input features

str, optional

If specified, predictions are added as a column to X, with this as the column name.

kwargs : Additionnal parameters for model.predict

Source code in gramex\ml_api.py
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def predict(self, X: Union[pd.DataFrame, np.ndarray], target_col: str = "", **kwargs):
    """Get a prediction.

    Parameters
    ----------
    X : array-like
        Input features
    target_col : str, optional
        If specified, predictions are added as a column to `X`, with this as the column name.
    kwargs : Additionnal parameters for `model.predict`
    """
    p = self._predict(X, **kwargs)
    if target_col:
        try:
            X[target_col] = p
        except ValueError:
            # This happens for NER: predictions of a single sample can be multiple entities.
            X[target_col] = [p]
        return X
    return p

SklearnTransformer

SklearnTransformer.