Source code for h2o.estimators.psvm

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OSupportVectorMachineEstimator(H2OEstimator): """ PSVM """ algo = "psvm" supervised_learning = True def __init__(self, model_id=None, # type: Optional[Union[None, str, H2OEstimator]] training_frame=None, # type: Optional[Union[None, str, H2OFrame]] validation_frame=None, # type: Optional[Union[None, str, H2OFrame]] response_column=None, # type: Optional[str] ignored_columns=None, # type: Optional[List[str]] ignore_const_cols=True, # type: bool hyper_param=1.0, # type: float kernel_type="gaussian", # type: Literal["gaussian"] gamma=-1.0, # type: float rank_ratio=-1.0, # type: float positive_weight=1.0, # type: float negative_weight=1.0, # type: float disable_training_metrics=True, # type: bool sv_threshold=0.0001, # type: float fact_threshold=1e-05, # type: float feasible_threshold=0.001, # type: float surrogate_gap_threshold=0.001, # type: float mu_factor=10.0, # type: float max_iterations=200, # type: int seed=-1, # type: int ): """ :param model_id: Destination id for this model; auto-generated if not specified. Defaults to ``None``. :type model_id: Union[None, str, H2OEstimator], optional :param training_frame: Id of the training data frame. Defaults to ``None``. :type training_frame: Union[None, str, H2OFrame], optional :param validation_frame: Id of the validation data frame. Defaults to ``None``. :type validation_frame: Union[None, str, H2OFrame], optional :param response_column: Response variable column. Defaults to ``None``. :type response_column: str, optional :param ignored_columns: Names of columns to ignore for training. Defaults to ``None``. :type ignored_columns: List[str], optional :param ignore_const_cols: Ignore constant columns. Defaults to ``True``. :type ignore_const_cols: bool :param hyper_param: Penalty parameter C of the error term Defaults to ``1.0``. :type hyper_param: float :param kernel_type: Type of used kernel Defaults to ``"gaussian"``. :type kernel_type: Literal["gaussian"] :param gamma: Coefficient of the kernel (currently RBF gamma for gaussian kernel, -1 means 1/#features) Defaults to ``-1.0``. :type gamma: float :param rank_ratio: Desired rank of the ICF matrix expressed as an ration of number of input rows (-1 means use sqrt(#rows)). Defaults to ``-1.0``. :type rank_ratio: float :param positive_weight: Weight of positive (+1) class of observations Defaults to ``1.0``. :type positive_weight: float :param negative_weight: Weight of positive (-1) class of observations Defaults to ``1.0``. :type negative_weight: float :param disable_training_metrics: Disable calculating training metrics (expensive on large datasets) Defaults to ``True``. :type disable_training_metrics: bool :param sv_threshold: Threshold for accepting a candidate observation into the set of support vectors Defaults to ``0.0001``. :type sv_threshold: float :param fact_threshold: Convergence threshold of the Incomplete Cholesky Factorization (ICF) Defaults to ``1e-05``. :type fact_threshold: float :param feasible_threshold: Convergence threshold for primal-dual residuals in the IPM iteration Defaults to ``0.001``. :type feasible_threshold: float :param surrogate_gap_threshold: Feasibility criterion of the surrogate duality gap (eta) Defaults to ``0.001``. :type surrogate_gap_threshold: float :param mu_factor: Increasing factor mu Defaults to ``10.0``. :type mu_factor: float :param max_iterations: Maximum number of iteration of the algorithm Defaults to ``200``. :type max_iterations: int :param seed: Seed for pseudo random number generator (if applicable) Defaults to ``-1``. :type seed: int """ super(H2OSupportVectorMachineEstimator, self).__init__() self._parms = {} self._id = self._parms['model_id'] = model_id self.training_frame = training_frame self.validation_frame = validation_frame self.response_column = response_column self.ignored_columns = ignored_columns self.ignore_const_cols = ignore_const_cols self.hyper_param = hyper_param self.kernel_type = kernel_type self.gamma = gamma self.rank_ratio = rank_ratio self.positive_weight = positive_weight self.negative_weight = negative_weight self.disable_training_metrics = disable_training_metrics self.sv_threshold = sv_threshold self.fact_threshold = fact_threshold self.feasible_threshold = feasible_threshold self.surrogate_gap_threshold = surrogate_gap_threshold self.mu_factor = mu_factor self.max_iterations = max_iterations self.seed = seed @property def training_frame(self): """ Id of the training data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> train, valid = splice.split_frame(ratios=[0.8]) >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False) >>> svm.train(y="C1", training_frame=train) >>> svm.mse() """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def validation_frame(self): """ Id of the validation data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> train, valid = splice.split_frame(ratios=[0.8]) >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False) >>> svm.train(y="C1", training_frame=train, validation_frame=valid) >>> svm.mse() """ return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): self._parms["validation_frame"] = H2OFrame._validate(validation_frame, 'validation_frame') @property def response_column(self): """ Response variable column. Type: ``str``. """ return self._parms.get("response_column") @response_column.setter def response_column(self, response_column): assert_is_type(response_column, None, str) self._parms["response_column"] = response_column @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool``, defaults to ``True``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... ignore_const_cols=False, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def hyper_param(self): """ Penalty parameter C of the error term Type: ``float``, defaults to ``1.0``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... hyper_param=0.01, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("hyper_param") @hyper_param.setter def hyper_param(self, hyper_param): assert_is_type(hyper_param, None, numeric) self._parms["hyper_param"] = hyper_param @property def kernel_type(self): """ Type of used kernel Type: ``Literal["gaussian"]``, defaults to ``"gaussian"``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1, ... rank_ratio=0.1, ... hyper_param=0.01, ... kernel_type="gaussian", ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("kernel_type") @kernel_type.setter def kernel_type(self, kernel_type): assert_is_type(kernel_type, None, Enum("gaussian")) self._parms["kernel_type"] = kernel_type @property def gamma(self): """ Coefficient of the kernel (currently RBF gamma for gaussian kernel, -1 means 1/#features) Type: ``float``, defaults to ``-1.0``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("gamma") @gamma.setter def gamma(self, gamma): assert_is_type(gamma, None, numeric) self._parms["gamma"] = gamma @property def rank_ratio(self): """ Desired rank of the ICF matrix expressed as an ration of number of input rows (-1 means use sqrt(#rows)). Type: ``float``, defaults to ``-1.0``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("rank_ratio") @rank_ratio.setter def rank_ratio(self, rank_ratio): assert_is_type(rank_ratio, None, numeric) self._parms["rank_ratio"] = rank_ratio @property def positive_weight(self): """ Weight of positive (+1) class of observations Type: ``float``, defaults to ``1.0``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1, ... rank_ratio=0.1, ... positive_weight=0.1, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("positive_weight") @positive_weight.setter def positive_weight(self, positive_weight): assert_is_type(positive_weight, None, numeric) self._parms["positive_weight"] = positive_weight @property def negative_weight(self): """ Weight of positive (-1) class of observations Type: ``float``, defaults to ``1.0``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1, ... rank_ratio=0.1, ... negative_weight=10, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("negative_weight") @negative_weight.setter def negative_weight(self, negative_weight): assert_is_type(negative_weight, None, numeric) self._parms["negative_weight"] = negative_weight @property def disable_training_metrics(self): """ Disable calculating training metrics (expensive on large datasets) Type: ``bool``, defaults to ``True``. :examples: >>> from h2o.estimators import H2OSupportVectorMachineEstimator >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("disable_training_metrics") @disable_training_metrics.setter def disable_training_metrics(self, disable_training_metrics): assert_is_type(disable_training_metrics, None, bool) self._parms["disable_training_metrics"] = disable_training_metrics @property def sv_threshold(self): """ Threshold for accepting a candidate observation into the set of support vectors Type: ``float``, defaults to ``0.0001``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... sv_threshold=0.01, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("sv_threshold") @sv_threshold.setter def sv_threshold(self, sv_threshold): assert_is_type(sv_threshold, None, numeric) self._parms["sv_threshold"] = sv_threshold @property def fact_threshold(self): """ Convergence threshold of the Incomplete Cholesky Factorization (ICF) Type: ``float``, defaults to ``1e-05``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False, ... fact_threshold=1e-7) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("fact_threshold") @fact_threshold.setter def fact_threshold(self, fact_threshold): assert_is_type(fact_threshold, None, numeric) self._parms["fact_threshold"] = fact_threshold @property def feasible_threshold(self): """ Convergence threshold for primal-dual residuals in the IPM iteration Type: ``float``, defaults to ``0.001``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False, ... fact_threshold=1e-7) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("feasible_threshold") @feasible_threshold.setter def feasible_threshold(self, feasible_threshold): assert_is_type(feasible_threshold, None, numeric) self._parms["feasible_threshold"] = feasible_threshold @property def surrogate_gap_threshold(self): """ Feasibility criterion of the surrogate duality gap (eta) Type: ``float``, defaults to ``0.001``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01, ... rank_ratio=0.1, ... surrogate_gap_threshold=0.1, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("surrogate_gap_threshold") @surrogate_gap_threshold.setter def surrogate_gap_threshold(self, surrogate_gap_threshold): assert_is_type(surrogate_gap_threshold, None, numeric) self._parms["surrogate_gap_threshold"] = surrogate_gap_threshold @property def mu_factor(self): """ Increasing factor mu Type: ``float``, defaults to ``10.0``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1, ... mu_factor=100.5, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("mu_factor") @mu_factor.setter def mu_factor(self, mu_factor): assert_is_type(mu_factor, None, numeric) self._parms["mu_factor"] = mu_factor @property def max_iterations(self): """ Maximum number of iteration of the algorithm Type: ``int``, defaults to ``200``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1, ... rank_ratio=0.1, ... hyper_param=0.01, ... max_iterations=20, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.mse() """ return self._parms.get("max_iterations") @max_iterations.setter def max_iterations(self, max_iterations): assert_is_type(max_iterations, None, int) self._parms["max_iterations"] = max_iterations @property def seed(self): """ Seed for pseudo random number generator (if applicable) Type: ``int``, defaults to ``-1``. :examples: >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm") >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1, ... rank_ratio=0.1, ... seed=1234, ... disable_training_metrics=False) >>> svm.train(y="C1", training_frame=splice) >>> svm.model_performance """ return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed