Source code for h2o.estimators.adaboost

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#

import ast
import json
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OAdaBoostEstimator(H2OEstimator): """ AdaBoost Builds an AdaBoost model """ algo = "adaboost" supervised_learning = True def __init__(self, model_id=None, # type: Optional[Union[None, str, H2OEstimator]] training_frame=None, # type: Optional[Union[None, str, H2OFrame]] ignored_columns=None, # type: Optional[List[str]] ignore_const_cols=True, # type: bool categorical_encoding="auto", # type: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"] weights_column=None, # type: Optional[str] nlearners=50, # type: int weak_learner="auto", # type: Literal["auto", "drf", "glm", "gbm", "deep_learning"] learn_rate=0.5, # type: float weak_learner_params=None, # type: Optional[dict] seed=-1, # type: int ): """ :param model_id: Destination id for this model; auto-generated if not specified. Defaults to ``None``. :type model_id: Union[None, str, H2OEstimator], optional :param training_frame: Id of the training data frame. Defaults to ``None``. :type training_frame: Union[None, str, H2OFrame], optional :param ignored_columns: Names of columns to ignore for training. Defaults to ``None``. :type ignored_columns: List[str], optional :param ignore_const_cols: Ignore constant columns. Defaults to ``True``. :type ignore_const_cols: bool :param categorical_encoding: Encoding scheme for categorical features Defaults to ``"auto"``. :type categorical_encoding: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"] :param weights_column: Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0. Defaults to ``None``. :type weights_column: str, optional :param nlearners: Number of AdaBoost weak learners. Defaults to ``50``. :type nlearners: int :param weak_learner: Choose a weak learner type. Defaults to AUTO, which means DRF. Defaults to ``"auto"``. :type weak_learner: Literal["auto", "drf", "glm", "gbm", "deep_learning"] :param learn_rate: Learning rate (from 0.0 to 1.0) Defaults to ``0.5``. :type learn_rate: float :param weak_learner_params: Customized parameters for the weak_learner algorithm. Defaults to ``None``. :type weak_learner_params: dict, optional :param seed: Seed for pseudo random number generator (if applicable) Defaults to ``-1``. :type seed: int """ super(H2OAdaBoostEstimator, self).__init__() self._parms = {} self._id = self._parms['model_id'] = model_id self.training_frame = training_frame self.ignored_columns = ignored_columns self.ignore_const_cols = ignore_const_cols self.categorical_encoding = categorical_encoding self.weights_column = weights_column self.nlearners = nlearners self.weak_learner = weak_learner self.learn_rate = learn_rate self.weak_learner_params = weak_learner_params self.seed = seed @property def training_frame(self): """ Id of the training data frame. Type: ``Union[None, str, H2OFrame]``. """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool``, defaults to ``True``. """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def categorical_encoding(self): """ Encoding scheme for categorical features Type: ``Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"]``, defaults to ``"auto"``. """ return self._parms.get("categorical_encoding") @categorical_encoding.setter def categorical_encoding(self, categorical_encoding): assert_is_type(categorical_encoding, None, Enum("auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited")) self._parms["categorical_encoding"] = categorical_encoding @property def weights_column(self): """ Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0. Type: ``str``. """ return self._parms.get("weights_column") @weights_column.setter def weights_column(self, weights_column): assert_is_type(weights_column, None, str) self._parms["weights_column"] = weights_column @property def nlearners(self): """ Number of AdaBoost weak learners. Type: ``int``, defaults to ``50``. """ return self._parms.get("nlearners") @nlearners.setter def nlearners(self, nlearners): assert_is_type(nlearners, None, int) self._parms["nlearners"] = nlearners @property def weak_learner(self): """ Choose a weak learner type. Defaults to AUTO, which means DRF. Type: ``Literal["auto", "drf", "glm", "gbm", "deep_learning"]``, defaults to ``"auto"``. """ return self._parms.get("weak_learner") @weak_learner.setter def weak_learner(self, weak_learner): assert_is_type(weak_learner, None, Enum("auto", "drf", "glm", "gbm", "deep_learning")) self._parms["weak_learner"] = weak_learner @property def learn_rate(self): """ Learning rate (from 0.0 to 1.0) Type: ``float``, defaults to ``0.5``. """ return self._parms.get("learn_rate") @learn_rate.setter def learn_rate(self, learn_rate): assert_is_type(learn_rate, None, numeric) self._parms["learn_rate"] = learn_rate @property def weak_learner_params(self): """ Customized parameters for the weak_learner algorithm. Type: ``dict``. :examples: >>> prostate_hex = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv") >>> prostate_hex["CAPSULE"] = prostate_hex["CAPSULE"].asfactor() >>> response = "CAPSULE" >>> seed = 42 >>> adaboost_model = H2OAdaBoostEstimator(seed=seed, ... weak_learner="DRF", ... weak_learner_params={'ntrees':1,'max_depth':3}) >>> adaboost_model.train(y=response, ... ignored_columns=["ID"], ... training_frame=prostate_hex) >>> print(adaboost_model) """ if self._parms.get("weak_learner_params") != None: return json.loads(self._parms.get("weak_learner_params")) else: self._parms["weak_learner_params"] = None @weak_learner_params.setter def weak_learner_params(self, weak_learner_params): assert_is_type(weak_learner_params, None, dict) if weak_learner_params is not None and weak_learner_params != "": for k in weak_learner_params: weak_learner_params[k] = weak_learner_params[k] self._parms["weak_learner_params"] = str(json.dumps(weak_learner_params)) else: self._parms["weak_learner_params"] = None @property def seed(self): """ Seed for pseudo random number generator (if applicable) Type: ``int``, defaults to ``-1``. """ return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed