Source code for h2o4gpu.solvers.linear_regression

# - * - encoding : utf - 8 - * -
# pylint: disable=fixme, line-too-long
:copyright: 2017-2018, Inc.
:license:   Apache License Version 2.0 (see LICENSE for details)
# pylint: disable=unused-import
from h2o4gpu.solvers import elastic_net
from h2o4gpu.linear_model import base as sk
from ..solvers.utils import _setter

[docs]class LinearRegression: """H2O LinearRegression Regression Solver Parameters ---------- fit_intercept : boolean, optional, default True whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (e.g. data is expected to be already centered). normalize : boolean, optional, default False This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten. n_jobs : int, optional, default 1 The number of jobs to use for the computation. If -1 all CPUs are used. This will only provide speedup for n_targets > 1 and sufficient large problems. tol : float, (Default=1E-2) Relative tolerance. n_gpus : int Number of gpu's to use in RandomForestRegressor solver. Default is -1. glm_stop_early : bool, (Default=True) Stop early when there is no more relative improvement in the primary and dual residuals for ADMM. glm_stop_early_error_fraction : float, (Default=1.0) Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much). verbose : int, (Default=0) Print verbose information to the console if set to > 0. backend : string, (Default="auto") Which backend to use. Options are 'auto', 'sklearn', 'h2o4gpu'. Saves as attribute for actual backend used. """ def __init__( self, fit_intercept=True, # h2o4gpu normalize=False, copy_X=True, n_jobs=1, n_gpus=-1, tol=1E-4, glm_stop_early=True, # h2o4gpu glm_stop_early_error_fraction=1.0, # h2o4gpu verbose=False, backend='auto', **kwargs): import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend self.do_daal = False self.do_sklearn = False if backend == 'auto': # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False params_string = ['normalize'] params = [normalize] params_default = [False] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose: print("WARNING:" " The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn Linear Regression.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True self.backend = 'sklearn' elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = 'h2o4gpu' elif backend == 'daal': from h2o4gpu import DAAL_SUPPORTED if DAAL_SUPPORTED: from h2o4gpu.solvers.daal_solver.regression \ import LinearRegression as DLR self.do_daal = True self.backend = 'daal' self.model_daal = DLR(fit_intercept=fit_intercept, normalize=normalize, **kwargs) else: import platform print("WARNING:" "DAAL is supported only for x86_64, " "architecture detected {}. Sklearn model" "used instead".format(platform.architecture())) self.do_sklearn = True self.backend = 'sklearn' self.model_sklearn = sk.LinearRegressionSklearn( fit_intercept=fit_intercept, normalize=normalize, copy_X=copy_X, n_jobs=n_jobs) # Equivalent Linear Regression parameters for h2o4gpu n_threads = None lambda_min_ratio = 0.0 n_lambdas = 1 n_folds = 1 n_alphas = 1 tol_seek_factor = 1E-1 lambda_stop_early = False max_iter = 5000 family = 'elasticnet' lambda_max = 0.0 alpha_max = 0.0 alpha_min = 0.0 alphas = None lambdas = None self.model_h2o4gpu = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, tol_seek_factor=tol_seek_factor, family=family, order=None) if self.do_sklearn: if verbose: print("Running sklearn Linear Regression") self.model = self.model_sklearn elif self.do_daal: if verbose: print("Running PyDAAL Linear Regression") self.model = self.model_daal else: if verbose: print("Running h2o4gpu Linear Regression") self.model = self.model_h2o4gpu self.verbose = verbose
[docs] def fit(self, X, y=None, sample_weight=None): if self.do_sklearn: res =, y, sample_weight) self.set_attributes() elif self.do_daal: res =, y) else: res =, y) self.set_attributes() return res
[docs] def get_params(self): return self.model.get_params()
[docs] def predict(self, X): res = self.model.predict(X) self.set_attributes() return res
[docs] def score(self, X, y, sample_weight=None): # TODO add for h2o4gpu # TODO score for DAAL? input parameters are not clear if self.verbose: print("WARNING: score() is using sklearn") if not self.do_sklearn:, y) # Need to re-fit res = self.model_sklearn.score(X, y, sample_weight) return res
[docs] def set_params(self, **params): return self.model.set_params(**params)
[docs] def set_attributes(self): """ set attributes for Linear Regression """ s = _setter(oself=self, e1=NameError, e2=AttributeError) s('oself.coef_ = oself.model.coef_') s('oself.intercept_ = oself.model.intercept_') self.time_prepare = None s('oself.time_prepare = oself.model.time_prepare') self.time_upload_data = None s('oself.time_upload_data = oself.model.time_upload_data') self.time_fitonly = None s('oself.time_fitonly = oself.model.time_fitonly')