Source code for mindfoundry.optaas.client.sklearn_pipelines.estimators.ensemble

from typing import Sequence

from sklearn.ensemble import \
    RandomForestClassifier as BaseRandomForestClassifier, \
    ExtraTreesClassifier as BaseExtraTreesClassifier, \
    RandomForestRegressor as BaseRandomForestRegressor, \
    ExtraTreesRegressor as BaseExtraTreesRegressor, \
    GradientBoostingClassifier as BaseGradientBoostingClassifier, \
    GradientBoostingRegressor as BaseGradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier as BaseDecisionTreeClassifier, \
    DecisionTreeRegressor as BaseDecisionTreeRegressor

from mindfoundry.optaas.client.parameter import Distribution
from mindfoundry.optaas.client.sklearn_pipelines.mixin import OptimizableBaseEstimator
from mindfoundry.optaas.client.sklearn_pipelines.parameter_maker import SklearnParameterMaker
from mindfoundry.optaas.client.sklearn_pipelines.utils import ParametersConstraintsAndPriorMeans, SMALLEST_NUMBER_ABOVE_ZERO


class _OptimizableEnsemble(OptimizableBaseEstimator):
    """Superclass for optimizable ensemble estimators."""

    # Properties must be specified in subclasses
    criterion_values: Sequence[str] = None  # type: ignore
    min_samples_split_distribution: Distribution = None  # type: ignore
    max_depth_distribution: Distribution = None  # type: ignore
    min_impurity_decrease_maximum: float = None  # type: ignore

    def make_parameters_constraints_and_prior_means(self, sk: SklearnParameterMaker, **kwargs)\
            -> ParametersConstraintsAndPriorMeans:
        """Generates :class:`Parameters <.Parameter>`, :class:`Constraints <.Constraint>`
         and :class: `PriorMeans <.PriorMeans>` to optimize ensemble estimators."""

        parameters = [
            sk.CategoricalParameter('max_features', values=['auto', 'sqrt', 'log2']),
            sk.IntParameter('min_samples_split', minimum=2, maximum=20,
                            distribution=self.min_samples_split_distribution),
            sk.IntParameter('min_samples_leaf', minimum=1, maximum=20),
            sk.CategoricalParameter('criterion', values=self.criterion_values),
            sk.IntParameter('max_leaf_nodes', minimum=10, maximum=10000, optional=True,
                            distribution=Distribution.LOGUNIFORM),
            sk.IntParameter('max_depth', minimum=1, maximum=100, optional=True,
                            distribution=self.max_depth_distribution),  # type: ignore
            sk.FloatParameter('min_weight_fraction_leaf', minimum=0.0, maximum=0.5),
            sk.FloatParameter('min_impurity_decrease', minimum=0, maximum=self.min_impurity_decrease_maximum),  # type: ignore
        ]
        return parameters, [], []


class _OptimizableForestOrTree(_OptimizableEnsemble):
    """Superclass for optimizable forest/tree-based estimators."""

    def make_parameters_constraints_and_prior_means(self, sk: SklearnParameterMaker, **kwargs) -> ParametersConstraintsAndPriorMeans:
        """Generates :class:`Parameters <.Parameter>`, :class:`Constraints <.Constraint>`
          and :class:`PriorMeans <.PriorMeans>` to optimize forest/tree-based estimators."""

        parameters, constraints, prior_means = super().make_parameters_constraints_and_prior_means(sk, **kwargs)


        parameters.extend([
            sk.BoolParameter('bootstrap'),
            sk.IntParameter('n_estimators', minimum=10, maximum=500),
        ])

        return parameters, constraints, prior_means


class _OptimizableEnsembleClassifier(_OptimizableEnsemble):
    """Superclass for optimizable ensemble classifiers."""

    criterion_values = ['gini', 'entropy']
    min_impurity_decrease_maximum = 1


class _OptimizableEnsembleRegressor(_OptimizableEnsemble):
    """Superclass for optimizable ensemble regressors."""

    criterion_values = ['absolute_error', 'squared_error', 'poisson', 'mse', 'mae']
    min_impurity_decrease_maximum = 0.01


class _OptimizableRandomForest(_OptimizableForestOrTree):
    """Superclass for optimizable RandomForest estimators."""

    min_samples_split_distribution = Distribution.UNIFORM
    max_depth_distribution = Distribution.LOGUNIFORM


class _OptimizableExtraTrees(_OptimizableForestOrTree):
    """Superclass for optimizable ExtraTrees estimators."""

    min_samples_split_distribution = Distribution.LOGUNIFORM
    max_depth_distribution = Distribution.UNIFORM


[docs]class RandomForestClassifier(BaseRandomForestClassifier, _OptimizableEnsembleClassifier, _OptimizableRandomForest): """Allows us to optimize :class:`.RandomForestClassifier` estimators."""
[docs]class ExtraTreesClassifier(BaseExtraTreesClassifier, _OptimizableEnsembleClassifier, _OptimizableExtraTrees): """Allows us to optimize :class:`.ExtraTreesClassifier` estimators."""
[docs]class RandomForestRegressor(BaseRandomForestRegressor, _OptimizableEnsembleRegressor, _OptimizableRandomForest): """Allows us to optimize :class:`.RandomForestRegressor` estimators."""
[docs]class ExtraTreesRegressor(BaseExtraTreesRegressor, _OptimizableEnsembleRegressor, _OptimizableExtraTrees): """Allows us to optimize :class:`.ExtraTreesRegressor` estimators."""
class _OptimizableDecisionTree(_OptimizableEnsemble): """Superclass for optimizable DecisionTree estimators.""" min_samples_split_distribution = Distribution.LOGUNIFORM max_depth_distribution = Distribution.UNIFORM def make_parameters_constraints_and_prior_means(self, sk: SklearnParameterMaker, **kwargs) -> ParametersConstraintsAndPriorMeans: """Generates :class:`Parameters <.Parameter>`, :class:`Constraints <.Constraint>` and :class:`PriorMeans <.PriorMeans>` to optimize DecisionTree-based estimators.""" parameters, constraints, prior_means = super().make_parameters_constraints_and_prior_means(sk, **kwargs) parameters.extend([ sk.CategoricalParameter('splitter', values=["best", "random"]), ]) return parameters, constraints, prior_means
[docs]class DecisionTreeClassifier(BaseDecisionTreeClassifier, _OptimizableDecisionTree, _OptimizableEnsembleClassifier): """Allows us to optimize a :class:`.DecisionTreeClassifier`."""
[docs]class DecisionTreeRegressor(BaseDecisionTreeRegressor, _OptimizableDecisionTree): """Allows us to optimize a :class:`.DecisionTreeRegressor`.""" criterion_values = ["mse", "friedman_mse", "mae", "squared_error"] min_impurity_decrease_maximum = 0.01
class _OptimizableGradientBoosting(_OptimizableEnsemble): """Superclass for optimizable GradientBoosting estimators.""" criterion_values = ["mse", "friedman_mse", "mae", "squared_error"] min_impurity_decrease_maximum = 1 min_samples_split_distribution = Distribution.UNIFORM max_depth_distribution = Distribution.UNIFORM def make_parameters_constraints_and_prior_means(self, sk: SklearnParameterMaker, **kwargs) -> ParametersConstraintsAndPriorMeans: """Generates :class:`Parameters <.Parameter>`, :class:`Constraints <.Constraint>` and :class:`PriorMeans <.PriorMeans>` to optimize a GradientBoosting estimator.""" parameters, constraints, prior_means = super().make_parameters_constraints_and_prior_means(sk, **kwargs) parameters.extend([ sk.FloatParameter('learning_rate', minimum=SMALLEST_NUMBER_ABOVE_ZERO, maximum=1), sk.IntParameter('n_estimators', minimum=10, maximum=500), sk.FloatParameter('subsample', minimum=0, maximum=1), ]) return parameters, constraints, prior_means
[docs]class GradientBoostingClassifier(BaseGradientBoostingClassifier, _OptimizableGradientBoosting): """Allows us to optimize a :class:`.GradientBoostingClassifier`."""
[docs] def make_parameters_constraints_and_prior_means(self, sk: SklearnParameterMaker, **kwargs) -> ParametersConstraintsAndPriorMeans: """Generates :class:`Parameters <.Parameter>`, :class:`Constraints <.Constraint>` and :class:`PriorMeans <.PriorMeans>` and :class:`Constraints <.Constraint>` to optimize a :class:`.GradientBoostingClassifier`. Args: class_count (int, optional): Number of classes in the classification dataset. If set to a number other than `2`, the `loss` parameter will not be optimized (because it can only be set to "deviance"). """ parameters, constraints, prior_means = super().make_parameters_constraints_and_prior_means(sk, **kwargs) if kwargs.get('class_count') == 2: parameters.append(sk.CategoricalParameter('loss', values=["deviance", "exponential"])) return parameters, constraints, prior_means
def _make_estimator(self, append=True): pass # BaseGradientBoostingClassifier doesn't implement this either
[docs]class GradientBoostingRegressor(BaseGradientBoostingRegressor, _OptimizableGradientBoosting): """Allows us to optimize a :class:`.GradientBoostingRegressor`."""
[docs] def make_parameters_constraints_and_prior_means(self, sk: SklearnParameterMaker, **kwargs) -> ParametersConstraintsAndPriorMeans: """Generates :class:`Parameters <.Parameter>`, :class:`Constraints <.Constraint>` and :class:`PriorMeans <.PriorMeans>` to optimize a :class:`.GradientBoostingClassifier`.""" parameters, constraints, prior_means = super().make_parameters_constraints_and_prior_means(sk, **kwargs) parameters.extend([ sk.CategoricalParameter('loss', values=["squared_error", "absolute_error", "huber", "quantile"]), sk.FloatParameter('alpha', minimum=SMALLEST_NUMBER_ABOVE_ZERO, maximum=1), ]) return parameters, constraints, prior_means
def _make_estimator(self, append=True): pass # BaseGradientBoostingClassifier doesn't implement this either