Source code for h2o.estimators.psvm

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OSupportVectorMachineEstimator(H2OEstimator):
    """
    PSVM

    """

    algo = "psvm"
    supervised_learning = True

    def __init__(self,
                 model_id=None,  # type: Optional[Union[None, str, H2OEstimator]]
                 training_frame=None,  # type: Optional[Union[None, str, H2OFrame]]
                 validation_frame=None,  # type: Optional[Union[None, str, H2OFrame]]
                 response_column=None,  # type: Optional[str]
                 ignored_columns=None,  # type: Optional[List[str]]
                 ignore_const_cols=True,  # type: bool
                 hyper_param=1.0,  # type: float
                 kernel_type="gaussian",  # type: Literal["gaussian"]
                 gamma=-1.0,  # type: float
                 rank_ratio=-1.0,  # type: float
                 positive_weight=1.0,  # type: float
                 negative_weight=1.0,  # type: float
                 disable_training_metrics=True,  # type: bool
                 sv_threshold=0.0001,  # type: float
                 fact_threshold=1e-05,  # type: float
                 feasible_threshold=0.001,  # type: float
                 surrogate_gap_threshold=0.001,  # type: float
                 mu_factor=10.0,  # type: float
                 max_iterations=200,  # type: int
                 seed=-1,  # type: int
                 ):
        """
        :param model_id: Destination id for this model; auto-generated if not specified.
               Defaults to ``None``.
        :type model_id: Union[None, str, H2OEstimator], optional
        :param training_frame: Id of the training data frame.
               Defaults to ``None``.
        :type training_frame: Union[None, str, H2OFrame], optional
        :param validation_frame: Id of the validation data frame.
               Defaults to ``None``.
        :type validation_frame: Union[None, str, H2OFrame], optional
        :param response_column: Response variable column.
               Defaults to ``None``.
        :type response_column: str, optional
        :param ignored_columns: Names of columns to ignore for training.
               Defaults to ``None``.
        :type ignored_columns: List[str], optional
        :param ignore_const_cols: Ignore constant columns.
               Defaults to ``True``.
        :type ignore_const_cols: bool
        :param hyper_param: Penalty parameter C of the error term
               Defaults to ``1.0``.
        :type hyper_param: float
        :param kernel_type: Type of used kernel
               Defaults to ``"gaussian"``.
        :type kernel_type: Literal["gaussian"]
        :param gamma: Coefficient of the kernel (currently RBF gamma for gaussian kernel, -1 means 1/#features)
               Defaults to ``-1.0``.
        :type gamma: float
        :param rank_ratio: Desired rank of the ICF matrix expressed as an ration of number of input rows (-1 means use
               sqrt(#rows)).
               Defaults to ``-1.0``.
        :type rank_ratio: float
        :param positive_weight: Weight of positive (+1) class of observations
               Defaults to ``1.0``.
        :type positive_weight: float
        :param negative_weight: Weight of positive (-1) class of observations
               Defaults to ``1.0``.
        :type negative_weight: float
        :param disable_training_metrics: Disable calculating training metrics (expensive on large datasets)
               Defaults to ``True``.
        :type disable_training_metrics: bool
        :param sv_threshold: Threshold for accepting a candidate observation into the set of support vectors
               Defaults to ``0.0001``.
        :type sv_threshold: float
        :param fact_threshold: Convergence threshold of the Incomplete Cholesky Factorization (ICF)
               Defaults to ``1e-05``.
        :type fact_threshold: float
        :param feasible_threshold: Convergence threshold for primal-dual residuals in the IPM iteration
               Defaults to ``0.001``.
        :type feasible_threshold: float
        :param surrogate_gap_threshold: Feasibility criterion of the surrogate duality gap (eta)
               Defaults to ``0.001``.
        :type surrogate_gap_threshold: float
        :param mu_factor: Increasing factor mu
               Defaults to ``10.0``.
        :type mu_factor: float
        :param max_iterations: Maximum number of iteration of the algorithm
               Defaults to ``200``.
        :type max_iterations: int
        :param seed: Seed for pseudo random number generator (if applicable)
               Defaults to ``-1``.
        :type seed: int
        """
        super(H2OSupportVectorMachineEstimator, self).__init__()
        self._parms = {}
        self._id = self._parms['model_id'] = model_id
        self.training_frame = training_frame
        self.validation_frame = validation_frame
        self.response_column = response_column
        self.ignored_columns = ignored_columns
        self.ignore_const_cols = ignore_const_cols
        self.hyper_param = hyper_param
        self.kernel_type = kernel_type
        self.gamma = gamma
        self.rank_ratio = rank_ratio
        self.positive_weight = positive_weight
        self.negative_weight = negative_weight
        self.disable_training_metrics = disable_training_metrics
        self.sv_threshold = sv_threshold
        self.fact_threshold = fact_threshold
        self.feasible_threshold = feasible_threshold
        self.surrogate_gap_threshold = surrogate_gap_threshold
        self.mu_factor = mu_factor
        self.max_iterations = max_iterations
        self.seed = seed

    @property
    def training_frame(self):
        """
        Id of the training data frame.

        Type: ``Union[None, str, H2OFrame]``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> train, valid = splice.split_frame(ratios=[0.8])
        >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=train)
        >>> svm.mse()
        """
        return self._parms.get("training_frame")

    @training_frame.setter
    def training_frame(self, training_frame):
        self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame')

    @property
    def validation_frame(self):
        """
        Id of the validation data frame.

        Type: ``Union[None, str, H2OFrame]``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> train, valid = splice.split_frame(ratios=[0.8])
        >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=train, validation_frame=valid)
        >>> svm.mse()
        """
        return self._parms.get("validation_frame")

    @validation_frame.setter
    def validation_frame(self, validation_frame):
        self._parms["validation_frame"] = H2OFrame._validate(validation_frame, 'validation_frame')

    @property
    def response_column(self):
        """
        Response variable column.

        Type: ``str``.
        """
        return self._parms.get("response_column")

    @response_column.setter
    def response_column(self, response_column):
        assert_is_type(response_column, None, str)
        self._parms["response_column"] = response_column

    @property
    def ignored_columns(self):
        """
        Names of columns to ignore for training.

        Type: ``List[str]``.
        """
        return self._parms.get("ignored_columns")

    @ignored_columns.setter
    def ignored_columns(self, ignored_columns):
        assert_is_type(ignored_columns, None, [str])
        self._parms["ignored_columns"] = ignored_columns

    @property
    def ignore_const_cols(self):
        """
        Ignore constant columns.

        Type: ``bool``, defaults to ``True``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        ignore_const_cols=False,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("ignore_const_cols")

    @ignore_const_cols.setter
    def ignore_const_cols(self, ignore_const_cols):
        assert_is_type(ignore_const_cols, None, bool)
        self._parms["ignore_const_cols"] = ignore_const_cols

    @property
    def hyper_param(self):
        """
        Penalty parameter C of the error term

        Type: ``float``, defaults to ``1.0``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        hyper_param=0.01,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("hyper_param")

    @hyper_param.setter
    def hyper_param(self, hyper_param):
        assert_is_type(hyper_param, None, numeric)
        self._parms["hyper_param"] = hyper_param

    @property
    def kernel_type(self):
        """
        Type of used kernel

        Type: ``Literal["gaussian"]``, defaults to ``"gaussian"``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1,
        ...                                        rank_ratio=0.1,
        ...                                        hyper_param=0.01,
        ...                                        kernel_type="gaussian",
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice) 
        >>> svm.mse()
        """
        return self._parms.get("kernel_type")

    @kernel_type.setter
    def kernel_type(self, kernel_type):
        assert_is_type(kernel_type, None, Enum("gaussian"))
        self._parms["kernel_type"] = kernel_type

    @property
    def gamma(self):
        """
        Coefficient of the kernel (currently RBF gamma for gaussian kernel, -1 means 1/#features)

        Type: ``float``, defaults to ``-1.0``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("gamma")

    @gamma.setter
    def gamma(self, gamma):
        assert_is_type(gamma, None, numeric)
        self._parms["gamma"] = gamma

    @property
    def rank_ratio(self):
        """
        Desired rank of the ICF matrix expressed as an ration of number of input rows (-1 means use sqrt(#rows)).

        Type: ``float``, defaults to ``-1.0``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("rank_ratio")

    @rank_ratio.setter
    def rank_ratio(self, rank_ratio):
        assert_is_type(rank_ratio, None, numeric)
        self._parms["rank_ratio"] = rank_ratio

    @property
    def positive_weight(self):
        """
        Weight of positive (+1) class of observations

        Type: ``float``, defaults to ``1.0``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1,
        ...                                        rank_ratio=0.1,
        ...                                        positive_weight=0.1,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)   
        >>> svm.mse()
        """
        return self._parms.get("positive_weight")

    @positive_weight.setter
    def positive_weight(self, positive_weight):
        assert_is_type(positive_weight, None, numeric)
        self._parms["positive_weight"] = positive_weight

    @property
    def negative_weight(self):
        """
        Weight of positive (-1) class of observations

        Type: ``float``, defaults to ``1.0``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1,
        ...                                        rank_ratio=0.1,
        ...                                        negative_weight=10,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)  
        >>> svm.mse()
        """
        return self._parms.get("negative_weight")

    @negative_weight.setter
    def negative_weight(self, negative_weight):
        assert_is_type(negative_weight, None, numeric)
        self._parms["negative_weight"] = negative_weight

    @property
    def disable_training_metrics(self):
        """
        Disable calculating training metrics (expensive on large datasets)

        Type: ``bool``, defaults to ``True``.

        :examples:

        >>> from h2o.estimators import H2OSupportVectorMachineEstimator
        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("disable_training_metrics")

    @disable_training_metrics.setter
    def disable_training_metrics(self, disable_training_metrics):
        assert_is_type(disable_training_metrics, None, bool)
        self._parms["disable_training_metrics"] = disable_training_metrics

    @property
    def sv_threshold(self):
        """
        Threshold for accepting a candidate observation into the set of support vectors

        Type: ``float``, defaults to ``0.0001``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        sv_threshold=0.01,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice) 
        >>> svm.mse()
        """
        return self._parms.get("sv_threshold")

    @sv_threshold.setter
    def sv_threshold(self, sv_threshold):
        assert_is_type(sv_threshold, None, numeric)
        self._parms["sv_threshold"] = sv_threshold

    @property
    def fact_threshold(self):
        """
        Convergence threshold of the Incomplete Cholesky Factorization (ICF)

        Type: ``float``, defaults to ``1e-05``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False,
        ...                                        fact_threshold=1e-7)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("fact_threshold")

    @fact_threshold.setter
    def fact_threshold(self, fact_threshold):
        assert_is_type(fact_threshold, None, numeric)
        self._parms["fact_threshold"] = fact_threshold

    @property
    def feasible_threshold(self):
        """
        Convergence threshold for primal-dual residuals in the IPM iteration

        Type: ``float``, defaults to ``0.001``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(disable_training_metrics=False,
        ...                                        fact_threshold=1e-7)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.mse()
        """
        return self._parms.get("feasible_threshold")

    @feasible_threshold.setter
    def feasible_threshold(self, feasible_threshold):
        assert_is_type(feasible_threshold, None, numeric)
        self._parms["feasible_threshold"] = feasible_threshold

    @property
    def surrogate_gap_threshold(self):
        """
        Feasibility criterion of the surrogate duality gap (eta)

        Type: ``float``, defaults to ``0.001``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.01,
        ...                                        rank_ratio=0.1,
        ...                                        surrogate_gap_threshold=0.1,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice) 
        >>> svm.mse()
        """
        return self._parms.get("surrogate_gap_threshold")

    @surrogate_gap_threshold.setter
    def surrogate_gap_threshold(self, surrogate_gap_threshold):
        assert_is_type(surrogate_gap_threshold, None, numeric)
        self._parms["surrogate_gap_threshold"] = surrogate_gap_threshold

    @property
    def mu_factor(self):
        """
        Increasing factor mu

        Type: ``float``, defaults to ``10.0``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1,
        ...                                        mu_factor=100.5,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice) 
        >>> svm.mse()
        """
        return self._parms.get("mu_factor")

    @mu_factor.setter
    def mu_factor(self, mu_factor):
        assert_is_type(mu_factor, None, numeric)
        self._parms["mu_factor"] = mu_factor

    @property
    def max_iterations(self):
        """
        Maximum number of iteration of the algorithm

        Type: ``int``, defaults to ``200``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1,
        ...                                        rank_ratio=0.1,
        ...                                        hyper_param=0.01,
        ...                                        max_iterations=20,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)  
        >>> svm.mse()
        """
        return self._parms.get("max_iterations")

    @max_iterations.setter
    def max_iterations(self, max_iterations):
        assert_is_type(max_iterations, None, int)
        self._parms["max_iterations"] = max_iterations

    @property
    def seed(self):
        """
        Seed for pseudo random number generator (if applicable)

        Type: ``int``, defaults to ``-1``.

        :examples:

        >>> splice = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/splice/splice.svm")
        >>> svm = H2OSupportVectorMachineEstimator(gamma=0.1,
        ...                                        rank_ratio=0.1,
        ...                                        seed=1234,
        ...                                        disable_training_metrics=False)
        >>> svm.train(y="C1", training_frame=splice)
        >>> svm.model_performance
        """
        return self._parms.get("seed")

    @seed.setter
    def seed(self, seed):
        assert_is_type(seed, None, int)
        self._parms["seed"] = seed