Source code for bca.bca

"""
Binary Coordinate Ascent algorithm for feature subset selection

Authors: Amin Zarshenas <mzarshen@hawk.iit.edu>
         Vijay Srinivas Tida
         Kenji Suzuki
"""

import numpy as np
from sklearn.utils import check_X_y
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted


[docs]class BCA(BaseEstimator):

    """Feature Selection with Binary Coordinate Ascent Algorithm

    Given an external estimator, the goal of binary coordinate ascent
    (BCA) algorithm is to select features which maximize an objective
    function of an estimator. It returns a binary vector with its size
    equal to the number of features, where zero or one indicates a
    feature at that position is not selected, or selected, respectively.
    First the best feature subset is initialized (specified as a binary
    vector). The default initialization is the vector of all zeros,
    corresponding to no input features selected. The corresponding
    objective function of an specified estimator is then calculated for
    the initial subset. BCA algorithm then iteratively select or remove
    features, one at a time, by flipping the binary elements of the
    binary vector of features, and examine if the selection/removal can
    increase the objective function. The process will be repeated over
    this vector for several times untill a convergance criteria is
    reached (can be set to number of iterations or a delta for objective
    value). The algorithm will return a binary vector corresponding to
    the "best" subset of features.

    Read more in the reference link specified below:

    http://www.sciencedirect.com/science/article/pii/S0950705116302416

    Parameters
    ----------
    
    estimator : object
        A supervised learning estimator with a `` fit `` method that will
        be used along with an objective function, in order to calculate
        the importance of a feature subset.

    scoring : string
        The metric to be used as objective to be maximized, e.g., roc_auc,
        accuracy, etc.
        Note: at the moment sklearn cross_val_score inside the BCA class
        supports binary classification only for roc_auc.

    cv : int, cross-validation generator or an iterable, optional
        The cv parameter used inside the sklearn cross_val_score.

    delta : float

        The delta used to determine the convergance of the objective function.

    Examples
    --------

    The following example shows how to select the optimial subset of features
    in the breast cancer dataset.

    >>> from bca import BCA
    >>> from sklearn.datasets import load_breast_cancer
    >>> from sklearn.naive_bayes import GaussianNB
    >>> X, y = load_breast_cancer().data, load_breast_cancer().target
    >>> estimator = GaussianNB()
    >>> selector = BCA(estimator, scoring='accuracy', cv=5)
    >>> selector = selector.fit(X, y)
    >>> selector.features
    [ 1  4  6  7 16 20 21 22 23 27 28]
    >>> selector.score
    0.971989226626
    >>> selector.predict(X[20:25])
    [1 1 0 0 0]
    
    """
    
    def __init__(self, estimator, scoring='accuracy', cv=5, delta=10**-5):
        self.estimator = estimator
        self.scoring = scoring
        self.cv = cv
        self.delta = delta

    def _estimator_type(self):
        return self.estimator._estimator_type

[docs]    def fit(self, X, y, initial_subset=None, fit_estimator=True, verbose=True):
        """Fit the BCA model to find the best subset of features, and
        potentially fit the estimator on the best subset.

        Parameters
        ----------

        X : {array-like, sparse matrix},  shape=[n_samples,n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.

        initial_subset : binary vector, shape=[n_features]
            The initial subset. Default to all zeros ("None").

        fit_estimator : boolean,
            Indicates to fit the estimator on the final features or not.

        verbose : boolean
            Indicates the verbosity of the algorithm.

        Returns
        -------

        self : class object 
            The BCA object with trained classifier

        """
        return self._fit(X, y, initial_subset, fit_estimator, verbose)

    def _fit(self, X, y, initial_subset, fit_estimator, verbose):

        X, y = check_X_y(X, y, "csc")

        # scoring function
        def scorer(features):
            return self._scorer(X, y, features, verbose=verbose)

        # initialization
        iteration = 0
        n_features = X.shape[1]
        if initial_subset is not None:
            features = initial_subset
        else:
            features = np.zeros(n_features)

        score = scorer(features)

        if verbose:
            print ("Iteration {0} starts...".format(iteration))

        if verbose:
            self._print_features(features, score)

        # main BCA loop of feature selection
        stop = False

        if verbose:
            print("\nBCA algorithm starts...")

        while not stop:
            iteration += 1
            if verbose:
                print("\nIteration {0} starts...".format(iteration))
            score_best = score

            # one iteration over all features
            for i in range(n_features):
                # flip one of the feature to create a new subset
                features_trial = features.copy()
                features_trial[i] = not features_trial[i]
                score_trial = scorer(features_trial)

                # modify the features if the new subset is better
                if score_trial > score:
                    features = features_trial.copy()
                    score = score_trial

                if verbose:
                    self._print_features(features, score)

            # after each iteration check if the maximization converged
            if abs(score_best-score) < self.delta:
                stop = True

        if verbose:
            print ("\nBCA algorithm finished...")

        # fit a final estimator on the entire dataset
        if fit_estimator:
            if verbose:
                print ("\nFitting the final estimator...")
            self.estimator.fit(X[:, features == 1], y)

        # set final attributes
        self.features = np.arange(len(features))[features == 1]
        self.score = score

        return self

[docs]    def predict(self, X):
        """Reduce X to the selected features and then predict using the
        underlying estimator.
        
        Parameters
        ----------
        
        X : array of shape [n_samples, n_features]
            The input samples.
        
        Returns
        -------
        
        y : array of shape [n_samples]
            The predicted target values.

        """
        check_is_fitted(self, 'estimator')
        return self.estimator.predict(X[:, self.features])

    def _scorer(self, X, y, features, verbose):

        if np.sum(features) > 0:
            return np.mean(cross_val_score(self.estimator, X[:, features == 1],
                           y, scoring=self.scoring, cv=self.cv,
                           verbose=verbose))
        else:
            return -np.inf

    def _print_features(self, features, score):
        features = np.arange(len(features))[features == 1]
        print("best feature set so far is {0} with score = {1}".
              format(features, score))