Source code for hosa.models.rnn.rnn_models

"""
Utilities for Recurrent Neural Network (RNN) models.
"""
import abc

import numpy as np
import tensorflow as tf
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split

from hosa.callbacks import EarlyStoppingAtMinLoss
from hosa.helpers.functions import metrics_multiclass


[docs]class BaseRNN: """Base class for Recurrent Neural Network (RNN) models for classification and regression. Each RNN model comprises an input layer (an RNN or a bidirectional RNN cell), ``n_subs_layers`` subsequent layers (similar to the input cell), a dropout layer, a dense layer, and an output layer. The output layer is a dense layer with ``n_outputs`` units, with the linear activation function. .. warning:: This class should not be used directly. Use derived classes instead, i.e., :class:`.RNNClassification` or :class:`.RNNRegression`. .. note:: The parameters used in this library were adapted from the exact parameters of the TensorFlow library. Descriptions were thus modified accordingly to our approach. However, refer to the TensorFlow documentation for more details about each of those parameters. Args: n_outputs (int): Number of class labels in classification, or the number of numerical values to predict in regression. n_neurons_dense_layer (int): Number of neurons units of the penultimate dense layer ( i.e., before the output layer). n_units (int): Dimensionality of the output space, i.e., the dimensionality of the hidden state. n_subs_layers (int): Number of subsequent recurrent layers beteween the input and output layers. is_bidirectional (bool): If ``true``, then bidirectional layers will be used to build the RNN model. model_type(str): Type of RNN model to be used. Available options are ``lstm``, for a Long Short-Term Memory model, or ``gru``, for a Gated Recurrent Unit model. optimizer (str): Name of the optimizer. See `tensorflow.keras.optimizers <https://www.tensorflow.org/api_docs/python/tf/keras/optimizers>`_. dropout_percentage (float): Fraction of the input units to drop. activation_function_dense (str): Activation function to use on the penultimate dense layer. If not specified, no activation is applied (i.e., uses the linear activation function). See `tensorflow.keras.activations <https://www.tensorflow.org/api_docs/python/tf/keras/activations>`_. kernel_initializer (str): Initializer for the kernel weights matrix, used for the linear transformation of the inputs. batch_size (int or None): Number of samples per batch of computation. If ``None``, ``batch_size`` will default to 32. epochs (int): Maximum number of epochs to train the model. patience (int): Number of epochs with no improvement after which training will be stopped. **kwargs: *Ignored*. Extra arguments that are used for compatibility’s sake. """ def __init__(self, n_outputs, n_neurons_dense_layer, n_units, n_subs_layers, is_bidirectional=False, model_type='lstm', optimizer='adam', dropout_percentage=0.1, activation_function_dense='relu', kernel_initializer='normal', batch_size=1000, epochs=50, patience=5, **kwargs): self.n_outputs = n_outputs self.n_neurons_dense_layer = n_neurons_dense_layer self.n_units = n_units self.n_subs_layers = n_subs_layers self.is_bidirectional = is_bidirectional self.model_type = model_type self.optimizer = optimizer self.dropout_percentage = dropout_percentage self.activation_function_dense = activation_function_dense self.kernel_initializer = kernel_initializer self.batch_size = batch_size self.epochs = epochs self.patience = patience self.model = tf.keras.models.Sequential()
[docs] def prepare(self, x, y): """ Prepares the model by adding the layers to the estimator: input layer, ``n_subs_layers`` subsequent layers, a dropout layer, a dense layer, and an output layer. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (class labels in classification, real numbers in regression). """ # Choose type of layer based on the model choosen by the user if self.model_type == 'lstm': layer_type = tf.keras.layers.LSTM elif self.model_type == 'gru': layer_type = tf.keras.layers.GRU else: raise ValueError( 'Type of RNN model invalid. Available options are ``lstm``, for a Long ' 'Short-Term Memory model, or ``gru``, for a Gated Recurrent Unit model.') # Input layer (no. of timesteps and no. of features) self.model.add(tf.keras.layers.InputLayer(input_shape=x.shape[1:])) if self.is_bidirectional: self.model.add(tf.keras.layers.Bidirectional( layer_type(self.n_units, return_sequences=self.n_subs_layers > 0))) else: self.model.add(layer_type(self.n_units, return_sequences=self.n_subs_layers > 0)) # Subsequent layers for n in range(self.n_subs_layers): if self.is_bidirectional: self.model.add(tf.keras.layers.Bidirectional( layer_type(self.n_units, return_sequences=n < self.n_subs_layers - 1))) else: self.model.add( layer_type(self.n_units, return_sequences=n < self.n_subs_layers - 1)) # Dropout layer self.model.add(tf.keras.layers.Dropout(self.dropout_percentage)) # Dense layer self.model.add(tf.keras.layers.Dense(self.n_neurons_dense_layer, kernel_initializer=self.kernel_initializer, activation=self.activation_function_dense))
[docs] def aux_fit(self, x, y, callback, validation_size, rtol=1e-03, atol=1e-04, class_weights=None, imbalance_correction=None, shuffle=True, **kwargs): """ Auxiliar function for classification and regression models compatibility. .. warning:: This function is not meant to be called by itself. It is just an auxiliary function called by the child classes' ``fit`` function. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (class labels in classification, real numbers in regression). callback (object): Early stopping callback for halting the model's training. validation_size (float or int): Proportion of the training dataset that will be used the validation split. atol (float): Absolute tolerance used for early stopping based on the performance metric. rtol (float): Relative tolerance used for early stopping based on the performance metric. class_weights (None or dict): Dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). **Only used for classification problems. Ignored for regression.** imbalance_correction (None or bool): Whether to apply correction to class imbalances. **Only used for classification problems. Ignored for regression.** shuffle (bool): Whether to shuffle the data before splitting. **kwargs: Extra arguments used in the TensorFlow's model ``fit`` function. See `here <https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit>`_. """ x_train, x_validation, y_train, y_validation = train_test_split(x, y, test_size=validation_size, shuffle=shuffle) callbacks = [ callback(self, self.patience, (x_validation, y_validation), imbalance_correction, rtol, atol)] self.model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, validation_data=(x_validation, y_validation), callbacks=callbacks, class_weight=class_weights, **kwargs)
[docs] @abc.abstractmethod def fit(self, x, y, **kwargs): """ Fits the model to data matrix x and target(s) y. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (class labels in classification, real numbers in regression). **kwargs: Extra arguments explicitly used for regression or classification models. """ raise NotImplementedError
[docs] @abc.abstractmethod def compile(self): """ Compiles the model for training. """ raise NotImplementedError
[docs] @abc.abstractmethod def score(self, x, y, **kwargs): """ Computes the performance metric(s) (e.g., accuracy for classification) on the given input data and target values. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (class labels in classification, real numbers in regression). **kwargs: Extra arguments that are explicitly used for regression or classification models. """ raise NotImplementedError
[docs] @abc.abstractmethod def predict(self, x, **kwargs): """ Predicts the target values using the input data in the trained model. Args: x (numpy.ndarray): Input data. **kwargs: Extra arguments that are used in the TensorFlow's model ``predict`` function. See `here <https://www.tensorflow.org/api_docs/python/tf/keras/Model #predict>`_. """ raise NotImplementedError
@abc.abstractmethod def __dict__(self): """ Prepares a dictonary with the parameters of the model. """ raise NotImplementedError
[docs]class RNNClassification(BaseRNN): """Recurrent Neural Network (RNN) model classifier. The model comprises an input layer (an RNN or a bidirectional RNN cell), ``n_subs_layers`` subsequent layers (similar to the input cell), a dropout layer, a dense layer, and an output layer. Args: n_outputs (int): Number of class labels to predict. n_neurons_dense_layer (int): Number of neurons units of the penultimate dense layer ( i.e., before the output layer). n_units (int): Dimensionality of the output space, i.e., the dimensionality of the hidden state. n_subs_layers (int): Number of subsequent layers beteween the input and output layers. is_bidirectional (bool): If ``true``, then bidirectional layers will be used to build the RNN model. model_type(str): Type of RNN model to be used. Available options are ``lstm``, for a Long Short-Term Memory model, or ``gru``, for a Gated Recurrent Unit model. optimizer (str): Name of the optimizer. See `tensorflow.keras.optimizers <https://www.tensorflow.org/api_docs/python/tf/keras/optimizers>`_. dropout_percentage (float): Fraction of the input units to drop. metrics (list): List of metrics to be evaluated by the model during training and testing. Each item of the list can be a string (name of a TensorFlow's built-in function), function, or a `tf.keras.metrics.Metric <https://www.tensorflow.org/api_docs/python/tf/keras/metrics/Metric>`_ instance. If ``None``, ``metrics`` will default to ``['accuracy']``. activation_function_dense (str): Activation function to use on the penultimate dense layer. If not specified, no activation is applied (i.e., uses the linear activation function). See `tensorflow.keras.activations <https://www.tensorflow.org/api_docs/python/tf/keras/activations>`_. kernel_initializer (str): Initializer for the kernel weights matrix, used for the linear transformation of the inputs. batch_size (int or None): Number of samples per batch of computation. If ``None``, ``batch_size`` will default to 32. epochs (int): Maximum number of epochs to train the model. patience (int): Number of epochs with no improvement after which training will be stopped. **kwargs: *Ignored*. Extra arguments that are used for compatibility’s sake. Examples: .. code-block:: python :linenos: from keras.datasets import imdb from keras_preprocessing.sequence import pad_sequences from tensorflow import keras from hosa.models.rnn import RNNClassification from hosa.aux import create_overlapping # 1 - Load and split the data max_sequence_length = 50 fashion_mnist = keras.datasets.fashion_mnist (x_train, y_train), (X_test, y_test) = imdb.load_data(num_words=50) # 2 - Prepare the data for rnn input x_train = pad_sequences(x_train, maxlen=max_sequence_length, value=0.0) X_test = pad_sequences(X_test, maxlen=max_sequence_length, value=0.0) x_train, y_train = create_overlapping(x_train, y_train, RNNClassification, 'central', 3, stride=1, timesteps=2) X_test, y_test = create_overlapping(X_test, y_test, RNNClassification, 'central', 3, stride=1, timesteps=2) # 3 - Create and fit the model clf = RNNClassification(2, 10, is_bidirectional=True) clf.prepare(x_train, y_train) clf.compile() clf.fit(x_train, y_train) # 4 - Calculate predictions clf.predict(X_test) # 5 - Compute the score score = clf.score(X_test, y_test) """ def __init__(self, n_outputs, n_neurons_dense_layer, n_units, n_subs_layers, is_bidirectional=False, model_type='lstm', optimizer='adam', dropout_percentage=0.1, metrics=None, activation_function_dense='relu', kernel_initializer='normal', batch_size=1000, epochs=50, patience=5, **kwargs): if metrics is None: metrics = ['accuracy'] self.metrics, self.is_binary = metrics, None super().__init__(n_outputs, n_neurons_dense_layer, n_units, n_subs_layers, is_bidirectional, model_type, optimizer, dropout_percentage, activation_function_dense, kernel_initializer, batch_size, epochs, patience, **kwargs)
[docs] def prepare(self, x, y): """ Prepares the model by adding the layers to the estimator: input layer, ``n_subs_layers`` subsequent layers, a dropout layer, a dense layer, and an output layer. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (i.e., class labels). """ super().prepare(x, y) self.model.add(tf.keras.layers.Dense(self.n_outputs, activation='softmax'))
[docs] def fit(self, x, y, validation_size=0.33, shuffle=True, rtol=1e-03, atol=1e-04, class_weights=None, imbalance_correction=False, **kwargs): """ Fits the model to data matrix x and target(s) y. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (i.e., class labels). class_weights (dict): Dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). validation_size (float or int): Proportion of the train dataset to include in the validation split. shuffle (bool): Whether to shuffle the data before splitting. atol (float): Absolute tolerance used for early stopping based on the performance metric. rtol (float): Relative tolerance used for early stopping based on the performance metric. class_weights (None or dict): Dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). imbalance_correction (bool): `True` if correction for imbalance should be applied to the metrics; `False` otherwise. **kwargs: Extra arguments that are used in the TensorFlow's model ``fit`` function. See `here <https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit>`_. Returns: tensorflow.keras.Sequential: Returns a trained TensorFlow model. """ callback = EarlyStoppingAtMinLoss super().aux_fit(x, y, callback, validation_size, rtol=rtol, atol=atol, class_weights=class_weights, imbalance_correction=imbalance_correction, shuffle=shuffle, **kwargs) return self.model
[docs] def score(self, x, y, imbalance_correction=False): """ Computes the performance metrics on the given input data and target values. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (i.e., class labels). imbalance_correction (bool): `True` if correction for imbalance should be applied to the metrics; `False` otherwise. Returns: tuple: Returns a tuple containing the area under the ROC curve (AUC), accuracy, sensitivity, and sensitivity. .. note:: This function can be used for both binary and multiclass classification. """ y_probs, _ = self.predict(x) auc_value, accuracy, sensitivity, specificity = metrics_multiclass(y, y_probs, self.n_outputs, imbalance_correction=imbalance_correction) return auc_value, accuracy, sensitivity, specificity
[docs] def predict(self, x, **kwargs): """ Predicts the target values using the input data in the trained model. Args: x (numpy.ndarray): Input data. **kwargs: Extra arguments that are used in the TensorFlow's model ``predict`` function. See `here <https://www.tensorflow.org/api_docs/python/tf/keras/Model #predict>`_. Returns: tuple: Returns a tuple containing the probability estimates and predicted classes. """ y_probs = self.model.predict(x, **kwargs) y_pred_labels = np.argmax(y_probs, axis=1) return y_probs, y_pred_labels
[docs] def compile(self): """ Compiles the model for training. Returns: tensorflow.keras.Sequential: Returns an untrained but compiled TensorFlow model. """ self.model.compile(loss='sparse_categorical_crossentropy', optimizer=self.optimizer, metrics=self.metrics) return self.model
def __dict__(self): """ Prepares a dictonary with the parameters of the model. Returns: dict: Dictonary with the parameter names mapped to their values. """ parameters = {'n_outputs': self.n_outputs, 'n_neurons_dense_layer': self.n_neurons_dense_layer, 'n_units': self.n_units, 'n_subs_layers': self.n_subs_layers, 'is_bidirectional': self.is_bidirectional, 'model_type': self.model_type, 'optimizer': self.optimizer, 'dropout_percentage': self.dropout_percentage, 'metrics': self.metrics, 'activation_function_dense': self.activation_function_dense, 'kernel_initializer': self.kernel_initializer, 'batch_size': self.batch_size, 'epochs': self.epochs, 'patience': self.patience} return parameters
[docs]class RNNRegression(BaseRNN): """Recurrent Neural Network (RNN) model regressor. The model comprises an input layer (an RNN or a bidirectional RNN cell), ``n_subs_layers`` subsequent layers (similar to the input cell), a dropout layer, a dense layer, and an output layer. Args: n_outputs (int): Number of numerical values to predict in regression. n_neurons_dense_layer (int): Number of neurons units of the penultimate dense layer ( i.e., before the output layer). n_units (int): Dimensionality of the output space, i.e., the dimensionality of the hidden state. n_subs_layers (int): Number of subsequent layers beteween the input and output layers. is_bidirectional (bool): If ``true``, then bidirectional layers will be used to build the RNN model. model_type(str): Type of RNN model to be used. Available options are ``lstm``, for a Long Short-Term Memory model, or ``gru``, for a Gated Recurrent Unit model. optimizer (str): Name of the optimizer. See `tensorflow.keras.optimizers <https://www.tensorflow.org/api_docs/python/tf/keras/optimizers>`_. dropout_percentage (float): Fraction of the input units to drop. metrics (list): List of metrics to be evaluated by the model during training and testing. Each item of the list can be a string (name of a TensorFlow's built-in function), function, or a `tf.keras.metrics.Metric <https://www.tensorflow.org/api_docs/python/tf/keras/metrics/Metric>`_ instance. If ``None``, ``metrics`` will default to ``['mean_squared_error']``. activation_function_dense (str): Activation function to use on the penultimate dense layer. If not specified, no activation is applied (i.e., uses the linear activation function). See `tensorflow.keras.activations <https://www.tensorflow.org/api_docs/python/tf/keras/activations>`_. kernel_initializer (str): Initializer for the kernel weights matrix, used for the linear transformation of the inputs. batch_size (int or None): Number of samples per batch of computation. If ``None``, ``batch_size`` will default to 32. epochs (int): Maximum number of epochs to train the model. patience (int): Number of epochs with no improvement after which training will be stopped. **kwargs: *Ignored*. Extra arguments that are used for compatibility’s sake. Examples: .. code-block:: python :linenos: import pandas as pd from hosa.models.rnn import RNNRegression from hosa.aux import create_overlapping # 1 - Download, load, and split the data dataset = pd.read_csv( 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers .csv', header=0, index_col=0) x = dataset.Passengers.to_numpy().reshape((len(dataset), 1)) y = dataset.Passengers.to_numpy() x_train, y_train = x[:100], y[:100] X_test, y_test = x[100:], y[100:] # 2 - Prepare the data for cnn input x_train, y_train = create_overlapping(x_train, y_train, RNNRegression, 'central', 10, timesteps=1) X_test, y_test = create_overlapping(X_test, y_test, RNNRegression, 'central', 10, timesteps=1) # 3 - Create and fit the model clf = RNNRegression(1, 200, epochs=500, patience=500) clf.prepare(x_train, y_train) clf.compile() clf.fit(x_train, y_train) # 4 - Calculate predictions clf.predict(X_test) # 5 - Compute the score score = clf.score(X_test, y_test) """ def __init__(self, n_outputs, n_neurons_dense_layer, n_units, n_subs_layers, is_bidirectional=False, model_type='lstm', optimizer='adam', dropout_percentage=0.1, metrics=None, activation_function_dense='relu', kernel_initializer='normal', batch_size=1000, epochs=50, patience=5, **kwargs): if metrics is None: metrics = ['mean_squared_error'] self.metrics = metrics super().__init__(n_outputs, n_neurons_dense_layer, n_units, n_subs_layers, is_bidirectional, model_type, optimizer, dropout_percentage, activation_function_dense, kernel_initializer, batch_size, epochs, patience, **kwargs)
[docs] def prepare(self, x, y): """ Prepares the model by adding the layers to the estimator: input layer, ``n_subs_layers`` subsequent layers, a dropout layer, a dense layer, and an output layer. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (i.e., real numbers). """ super().prepare(x, y) self.model.add(tf.keras.layers.Dense(self.n_outputs, activation='linear'))
[docs] def fit(self, x, y, validation_size=0.33, atol=1e-04, rtol=1e-03, shuffle=True, **kwargs): """ Fits the model to data matrix x and target(s) y. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (i.e., real numbers). validation_size (float or int): Proportion of the train dataset to include in the validation split. atol (float): Absolute tolerance used for early stopping based on the performance metric. rtol (float): Relative tolerance used for early stopping based on the performance metric. shuffle (bool): Whether to shuffle the data before splitting. **kwargs: Extra arguments that are used in the TensorFlow's model ``fit`` function. See `here <https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit>`_. Returns: tensorflow.keras.Sequential: Returns a trained TensorFlow model. """ callback = EarlyStoppingAtMinLoss super().aux_fit(x, y, callback, validation_size, atol=atol, rtol=rtol, class_weights=None, imbalance_correction=None, shuffle=shuffle, **kwargs)
[docs] def score(self, x, y, **kwargs): """ Computes the performance metrics on the given input data and target values. Args: x (numpy.ndarray): Input data. y (numpy.ndarray): Target values (i.e., real numbers). **kwargs: *Ignored*. Only included here for compatibility’s sake. Returns: tuple: Returns a tuple containing the mean squared error (MSE) and coefficient of determination (:math:`R^2`). """ y_pred = self.predict(x) r2 = r2_score(y, y_pred) mse = mean_squared_error(y, y_pred) return mse, r2
[docs] def predict(self, x, **kwargs): """ Predicts the target values using the input data in the trained model. Args: x (numpy.ndarray): Input data. **kwargs: Extra arguments that are used in the TensorFlow's model ``predict`` function. See `here <https://www.tensorflow.org/api_docs/python/tf/keras/Model #predict>`_. Returns: numpy.ndarray: Returns an array containing the estimates. """ y_pred = self.model.predict(x, **kwargs) return y_pred
[docs] def compile(self): """ Compiles the model for training. Returns: tensorflow.keras.Sequential: Returns an untrained but compiled TensorFlow model. """ self.model.compile(loss='mean_squared_error', optimizer=self.optimizer, metrics=self.metrics)
def __dict__(self): """ Prepares a dictonary with the parameters of the model. Returns: dict: Dictonary with the parameter names mapped to their values. """ parameters = {'n_outputs': self.n_outputs, 'n_neurons_dense_layer': self.n_neurons_dense_layer, 'n_units': self.n_units, 'n_subs_layers': self.n_subs_layers, 'is_bidirectional': self.is_bidirectional, 'model_type': self.model_type, 'optimizer': self.optimizer, 'dropout_percentage': self.dropout_percentage, 'metrics': self.metrics, 'activation_function_dense': self.activation_function_dense, 'kernel_initializer': self.kernel_initializer, 'batch_size': self.batch_size, 'epochs': self.epochs, 'patience': self.patience} return parameters