Source code for h2o.backend.connection

# -*- encoding: utf-8 -*-
"""
Class for communication with an H2O server.

`H2OConnection` is the main class of this module, and it handles the connection itself:
    hc = H2OConnection.open() : open a new connection
    hc.request(endpoint, [data|json|filename]) : make a REST API request to the server
    hc.info() : return information about the current connection
    hc.close() : close the connection
    hc.session_id : current session id

:copyright: (c) 2016 H2O.ai
:license:   Apache License Version 2.0 (see LICENSE for details)
"""
from h2o.utils.compatibility import *  # NOQA

import atexit
from collections import defaultdict
import os
import re
import sys
import tempfile
import time
import types
from warnings import warn
from weakref import ref

import requests
from requests.auth import AuthBase

from h2o.backend import H2OCluster, H2OLocalServer
from h2o.display import print2
from h2o.exceptions import H2OConnectionError, H2OServerError, H2OResponseError, H2OValueError
from h2o.model.metrics import make_metrics
from h2o.schemas import H2OMetadataV3, H2OErrorV3, H2OModelBuilderErrorV3, define_classes_from_schema
from h2o.two_dim_table import H2OTwoDimTable
from h2o.utils.metaclass import CallableString, backwards_compatibility, h2o_meta
from h2o.utils.shared_utils import stringify_list, stringify_dict, as_resource
from h2o.utils.typechecks import (assert_is_type, assert_matches, assert_satisfies, is_type, numeric)

__all__ = ("H2OConnection", "H2OConnectionConf", )

if tuple(int(x) for x in requests.__version__.split('.')) < (2, 10):
    print("[WARNING] H2O requires requests module of version 2.10 or newer. You have version %s.\n"
          "You can upgrade to the newest version of the module running from the command line\n"
          "    $ pip%s install --upgrade requests" % (requests.__version__, sys.version_info[0]))


_session_hooks_ = defaultdict(list)


def apply_session_hooks(event='open'):
    handler = 'on_{}'.format(event)
    if handler not in _session_hooks_:
        return
    for h in _session_hooks_[handler]:
        try:
            h()
        except Exception:
            pass
        

def register_session_hook(event='open', hook=None):
    assert hook is not None
    handler = 'on_{}'.format(event)
    _session_hooks_[handler].append(hook)


class H2OConnectionConf(object):
    """
    Configuration of connection to H2O.

    The main goal of this class is to specify location of running
    H2O instance and properties of connection. The location of running instance
    is given by schema, ip, port, context_path parameters forming connection URL in
    the following way: ``schema://ip:port/context_path``
    """

    def __init__(self, config=None):
        self._ip = None
        self._port = None
        self._https = None
        self._context_path = ''
        self._verify_ssl_certificates = True
        self._cacert = None
        self._proxy = None
        self._auth = None
        self._cookies = None
        self._verbose = True
        # Fill from config if it is specified
        if config is not None:
            self._fill_from_config(config)

    """List of allowed property names exposed by this class"""
    allowed_properties = ["ip", "port", "https", "context_path", "verify_ssl_certificates", "cacert",
                          "proxy", "auth", "cookies", "verbose"]

    def _fill_from_config(self, config):
        """
        Fill this instance from given dictionary.
        The method only uses keys which corresponds to properties
        this class, throws exception on unknown property name.
        :param conf:  dictionary of parameters
        :return:  a new instance of this class filled with values from given dictionary
        :raises H2OValueError: if input config contains unknown property name.
        """
        for k,v in config.items():
            if k in H2OConnectionConf.allowed_properties:
                setattr(self, k, v)
            else:
                raise H2OValueError(message="Unsupported name of property: %s!" % k, var_name="config")

    @property
    def ip(self):
        return self._ip

    @ip.setter
    def ip(self, value):
        assert_is_type(value, str)
        self._ip = value

    @property
    def port(self):
        return self._port

    @port.setter
    def port(self, value):
        assert_is_type(value, int)
        self._port = value

    @property
    def https(self):
        return self._https

    @https.setter
    def https(self, value):
        assert_is_type(value, bool)
        self._https = value

    @property
    def context_path(self):
        return self._context_path

    @context_path.setter
    def context_path(self, value):
        assert_is_type(value, str)
        self._context_path = value

    @property
    def verify_ssl_certificates(self):
        return self._verify_ssl_certificates

    @verify_ssl_certificates.setter
    def verify_ssl_certificates(self, value):
        assert_is_type(value, bool)
        self._verify_ssl_certificates = value

    @property
    def cacert(self):
        return self._cacert

    @cacert.setter
    def cacert(self, value):
        assert_is_type(value, None, str)
        self._cacert = value

    @property
    def proxy(self):
        return self._proxy

    @proxy.setter
    def proxy(self, value):
        assert_is_type(value, str, None)
        self._proxy = value

    @property
    def auth(self):
        return self._auth

    @auth.setter
    def auth(self, value):
        assert_is_type(value, AuthBase, (str, str), None)
        self._auth = value

    @property
    def cookies(self):
        return self._cookies

    @cookies.setter
    def cookies(self, value):
        assert_is_type(value, [str], None)
        self._cookies = value

    @property
    def verbose(self):
        return self._verbose

    @verbose.setter
    def verbose(self, value):
        assert_is_type(value, bool)
        self._verbose = value

    @property
    def url(self):
        if self.https:
            schema = "https"
        else:
            schema = "http"

        curl = "{}://{}:{}/{}".format(schema, self.ip, self.port, self.context_path)
        return curl


[docs]@backwards_compatibility( class_attrs=dict( __ENCODING__="utf-8", __ENCODING_ERROR__="replace", default=lambda: _deprecated_default(), jar_paths=lambda: list(getattr(H2OLocalServer, "_jar_paths")()), rest_version=lambda: 3, https=lambda: __H2OCONN__.base_url.split(":")[0] == "https", ip=lambda: __H2OCONN__.base_url.split(":")[1][2:], port=lambda: __H2OCONN__.base_url.split(":")[2], username=lambda: _deprecated_username(), password=lambda: _deprecated_password(), insecure=lambda: not getattr(__H2OCONN__, "_verify_ssl_cert"), current_connection=lambda: __H2OCONN__, check_conn=lambda: _deprecated_check_conn(), make_url=lambda url_suffix, _rest_version=3: __H2OCONN__.make_url(url_suffix, _rest_version), get=lambda url_suffix, **kwargs: _deprecated_get(__H2OCONN__, url_suffix, **kwargs), post=lambda url_suffix, file_upload_info=None, **kwa: _deprecated_post(__H2OCONN__, url_suffix, file_upload_info=file_upload_info, **kwa), delete=lambda url_suffix, **kwargs: _deprecated_delete(__H2OCONN__, url_suffix, **kwargs), get_json=lambda url_suffix, **kwargs: _deprecated_get(__H2OCONN__, url_suffix, **kwargs), post_json=lambda url_suffix, file_upload_info=None, **kwa: _deprecated_post(__H2OCONN__, url_suffix, file_upload_info=file_upload_info, **kwa), rest_ctr=lambda: __H2OCONN__.requests_count, ), instance_attrs=dict( cluster_is_up=lambda self: self.cluster.is_running(), info=lambda self, refresh=True: self.cluster, shutdown_server=lambda self, prompt=True: self.cluster.shutdown(prompt), make_url=lambda self, url_suffix, _rest_version=3: "/".join([self._base_url, str(_rest_version), url_suffix]), get=lambda *args, **kwargs: _deprecated_get(*args, **kwargs), post=lambda *args, **kwargs: _deprecated_post(*args, **kwargs), delete=lambda *args, **kwargs: _deprecated_delete(*args, **kwargs), get_json=lambda *args, **kwargs: _deprecated_get(*args, **kwargs), post_json=lambda *args, **kwargs: _deprecated_post(*args, **kwargs), ) ) class H2OConnection(h2o_meta()): """ Connection handle to an H2O cluster. Typically, you don't need to access this class directly. Instead, use :func:`h2o.connect` to establish a connection, and :func:`h2o.api` to make requests to the backend H2O server. However, if your use-case is not typical, then read on. Instances of this class may only be created through the static method :meth:`open`:: hc = H2OConnection.open(...) Once opened, the connection remains active until the script exits (or until you explicitly :meth:`close` it). If the script exits with an exception, then the connection will fail to close, and the backend server will. keep all the temporary frames and the open session. Alternatively, you can use this class as a context manager, which will ensure that the connection gets closed at the end of the ``with ...`` block even if an exception occurs:: with H2OConnection.open() as hc: hc.info().pprint() Once the connection is established, you can send REST API requests to the server using :meth:`request`. """ """ Defines pattern matching URL in the following form ``schema://ip:port/context_path``. """ url_pattern = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?((/[\w-]+)+)?$"
[docs] @staticmethod def open(server=None, url=None, ip=None, port=None, name=None, https=None, auth=None, verify_ssl_certificates=True, cacert=None, proxy=None, cookies=None, verbose=True, msgs=None, strict_version_check=True): r""" Establish connection to an existing H2O server. The connection is not kept alive, so what this method actually does is attempt to connect to the specified server, and check that the server is healthy and responds to REST API requests. If the H2O server cannot be reached, an :class:`H2OConnectionError` will be raised. On a success, this method returns a new :class:`H2OConnection` object, and it is the only "official" way to create instances of this class. There are 3 ways to specify the target to connect to (these settings are mutually exclusive): * pass a ``server`` option, * pass the full ``url`` for the connection, * provide a triple of parameters ``ip``, ``port``, ``https``. :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference between connecting to a local server by specifying its ip and address, and connecting through an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle will allow us to query the server status through OS, and potentially provide snapshot of the server's error log in the exception information. :param url: full url of the server to connect to. :param ip: target server's IP address or hostname (default "localhost"). :param port: H2O server's port (default 54321). :param name: H2O cluster name. :param https: if True then connect using https instead of http (default False). :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When used, it will generate a warning from the requests library. Has no effect when ``https`` is False. :param cacert: Path to a CA bundle file or a directory with certificates of trusted CAs (optional). :param auth: authentication token for connecting to the remote server. This can be either a (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in the ``requests.auth`` module. :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We check for the presence of these variables and issue a warning if they are found. In order to suppress that warning and use proxy from the environment, pass ``proxy="(default)"``. :param cookies: Cookie (or list of) to add to requests. :param verbose: if True, then connection progress info will be printed to the stdout. :param strict_version_check: If True, an error will be raised if the client and server versions don't match. :param msgs: custom messages to display during connection. This is a tuple (initial message, success message, failure message). :returns: A new :class:`H2OConnection` instance. :raises H2OConnectionError: if the server cannot be reached. :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the client itself should decide whether it wants to retry or not). """ if server is not None: assert_is_type(server, H2OLocalServer) assert_is_type(ip, None, "`ip` should be None when `server` parameter is supplied") assert_is_type(url, None, "`url` should be None when `server` parameter is supplied") assert_is_type(name, None, "`name` should be None when `server` parameter is supplied") if not server.is_running(): raise H2OConnectionError("Unable to connect to server because it is not running") ip = server.ip port = server.port scheme = server.scheme context_path = '' elif url is not None: assert_is_type(url, str) assert_is_type(ip, None, "`ip` should be None when `url` parameter is supplied") assert_is_type(name, str, None) # We don't allow any Unicode characters in the URL. Maybe some day we will... match = assert_matches(url, H2OConnection.url_pattern) scheme = match.group(1) ip = match.group(2) port = int(match.group(3)) context_path = '' if match.group(4) is None else "%s" % (match.group(4)) else: if ip is None: ip = str("localhost") if port is None: port = 54321 if https is None: https = False if is_type(port, str) and port.isdigit(): port = int(port) assert_is_type(ip, str) assert_is_type(port, int) assert_is_type(name, str, None) assert_is_type(https, bool) assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+") assert_satisfies(port, 1 <= port <= 65535) scheme = "https" if https else "http" context_path = '' if verify_ssl_certificates is None: verify_ssl_certificates = True assert_is_type(verify_ssl_certificates, bool) assert_is_type(cacert, str, None) assert_is_type(proxy, str, None) assert_is_type(auth, AuthBase, (str, str), None) assert_is_type(cookies, str, [str], None) assert_is_type(msgs, None, (str, str, str)) conn = H2OConnection() conn._verbose = bool(verbose) conn._local_server = server conn._base_url = "%s://%s:%d%s" % (scheme, ip, port, context_path) conn._name = server.name if server else name conn._verify_ssl_cert = bool(verify_ssl_certificates) conn._cacert = cacert conn._auth = auth conn._cookies = cookies conn._proxies = None if proxy and proxy != "(default)": conn._proxies = {scheme: proxy} elif not proxy: # Give user a warning if there are any "*_proxy" variables in the environment. # GH issue: https://github.com/h2oai/h2o-3/issues/15409 # To suppress the warning pass proxy = "(default)". for name in os.environ: if name.lower() == scheme + "_proxy": warn("Proxy is defined in the environment: %s. " "This may interfere with your H2O Connection." % name) if "localhost" in conn.ip() or "127.0.0.1" in conn.ip(): # Empty list will cause requests library to respect the default behavior. # Thus a non-existing proxy is inserted. conn._proxies = { "http": None, "https": None, } try: retries = 20 if server else 5 conn._stage = 1 conn._timeout = 3.0 conn._cluster = conn._test_connection(retries, messages=msgs) # If a server is unable to respond within 1s, it should be considered a bug. However we disable this # setting for now, for no good reason other than to ignore all those bugs :( conn._timeout = None # create a weakref to prevent the atexit callback from keeping hard ref # to the connection even after manual close. conn_ref = ref(conn) apply_session_hooks('open') def exit_close(): con = conn_ref() if con and con.connected: print("Closing connection %s at exit" % con.session_id) con.close() atexit.register(exit_close) except Exception: # Reset _session_id so that we know the connection was not initialized properly. conn._stage = 0 raise conn._cluster.check_version(strict=strict_version_check) return conn
[docs] def request(self, endpoint, data=None, json=None, filename=None, save_to=None): """ Perform a REST API request to the backend H2O server. :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4" :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format. :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`. :param filename: file to upload to the server. Cannot be used with `data` or `json`. :param save_to: if provided, will write the response to that file (additionally, the response will be streamed, so large files can be downloaded seamlessly). This parameter can be either a file name, or a folder name. If the folder doesn't exist, it will be created automatically. :returns: an H2OResponse object representing the server's response (unless ``save_to`` parameter is provided, in which case the output file's name will be returned). :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized). :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON. :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid). """ if self._stage == 0: raise H2OConnectionError("Connection not initialized; run .connect() first.") if self._stage == -1: raise H2OConnectionError("Connection was closed, and can no longer be used.") # Prepare URL assert_is_type(endpoint, str) match = assert_matches(str(endpoint), r"^(GET|POST|PUT|DELETE|PATCH|HEAD|TRACE) (/.*)$") method = match.group(1) urltail = match.group(2) url = self._base_url + urltail # Prepare data if filename is not None: assert_is_type(filename, str) assert_is_type(json, None, "Argument `json` should be None when `filename` is used.") assert_is_type(data, None, "Argument `data` should be None when `filename` is used.") assert_satisfies(method, method == "POST", "File uploads can only be done via POST method, got %s" % method) elif data is not None: assert_is_type(data, dict) assert_is_type(json, None, "Argument `json` should be None when `data` is used.") elif json is not None: assert_is_type(json, dict) request_data = self._prepare_data_payload(data) if filename is None else self._prepare_file_payload(filename) params = None if (method == "GET" or method == "DELETE") and data: params = request_data request_data = None stream = False if save_to is not None: assert_is_type(save_to, str, types.FunctionType) stream = True if self._cookies is not None and isinstance(self._cookies, list): self._cookies = ";".join(self._cookies) # Make the request with as_resource(request_data) as rd: start_time = time.time() try: self._log_start_transaction(endpoint, rd, json, filename, params) args = self._request_args() resp = requests.request(method=method, url=url, data=rd, json=json, params=params, stream=stream, **args) if isinstance(save_to, types.FunctionType): save_to = save_to(resp) self._log_end_transaction(start_time, resp) return self._process_response(resp, save_to) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: if self._local_server and not self._local_server.is_running(): self._log_end_exception("Local server has died.") raise H2OConnectionError("Local server has died unexpectedly. RIP.") else: self._log_end_exception(e) raise H2OConnectionError("Unexpected HTTP error: %s" % e) except requests.exceptions.Timeout as e: self._log_end_exception(e) elapsed_time = time.time() - start_time raise H2OConnectionError("Timeout after %.3fs" % elapsed_time) except H2OResponseError as e: err = e.args[0] if isinstance(err, H2OErrorV3): err.endpoint = endpoint err.payload = (rd, json, filename, params) raise
def _request_args(self): headers = {"User-Agent": "H2O Python client/" + sys.version.replace("\n", ""), "X-Cluster": self._cluster_id, "Cookie": self._cookies} verify = self._cacert if self._verify_ssl_cert and self._cacert else self._verify_ssl_cert return { 'headers': headers, 'timeout': self._timeout, 'auth': self._auth, 'verify': verify, 'proxies': self._proxies } @staticmethod def save_to_detect(resp): disposition = resp.headers['Content-Disposition'] return disposition.split("filename=")[1].strip()
[docs] def close(self): """ Close an existing connection; once closed it cannot be used again. Strictly speaking, it is not necessary to close all connections that you opened -- we have several mechanisms in place that will do so automatically (``__del__()``, ``__exit__()``, and ``atexit()`` handlers), however there is also no good reason to make this method private. """ if self._session_id: try: # If the server gone bad, we don't want to wait forever... if self._timeout is None: self._timeout = 1 self.request("DELETE /4/sessions/%s" % self._session_id) self._print("H2O session %s closed." % self._session_id) except Exception as e: self._print("H2O session %s was not closed properly." % self._session_id) self._log_end_exception(e) self._session_id = None self._stage = -1 apply_session_hooks('close')
@property def connected(self): return self._stage > 0 @property def session_id(self): """ Return the session id of the current connection. The session id is issued (through an API request) the first time it is requested, but no sooner. This is because generating a session id puts it into the DKV on the server, which effectively locks the cluster. Once issued, the session id will stay the same until the connection is closed. """ if self._session_id is None: req = self.request("POST /4/sessions") self._session_id = req.get("session_key") or req.get("session_id") return CallableString(self._session_id) @property def cluster(self): """H2OCluster object describing the underlying cluster.""" return self._cluster @property def base_url(self): """Base URL of the server, without trailing ``"/"``. For example: ``"https://example.com:54321"``.""" return self._base_url @property def name(self): return self._name @property def proxy(self): """URL of the proxy server used for the connection (or None if there is no proxy).""" return self._proxies @property def local_server(self): """Handler to the H2OLocalServer instance (if connected to one).""" return self._local_server @property def requests_count(self): """Total number of request requests made since the connection was opened (used for debug purposes).""" return self._requests_counter @property def timeout_interval(self): """Timeout length for each request, in seconds.""" return self._timeout @timeout_interval.setter def timeout_interval(self, v): assert_is_type(v, numeric, None) self._timeout = v
[docs] def start_logging(self, dest=None): """ Start logging all API requests to the provided destination. :param dest: Where to write the log: either a filename (str), or an open file handle (file). If not given, then a new temporary file will be created. """ assert_is_type(dest, None, str, type(sys.stdout)) if dest is None: dest = os.path.join(tempfile.mkdtemp(), "h2o-connection.log") self._print("Now logging all API requests to file %r" % dest) self._is_logging = True self._logging_dest = dest
[docs] def stop_logging(self): """Stop logging API requests.""" if self._is_logging: self._print("Logging stopped.") self._is_logging = False
# ------------------------------------------------------------------------------------------------------------------ # PRIVATE # ------------------------------------------------------------------------------------------------------------------ def __init__(self): """[Private] Please use H2OConnection.connect() to create H2OConnection objects.""" globals()["__H2OCONN__"] = self # for backward-compatibility: __H2OCONN__ is the latest instantiated object self._stage = 0 # 0 = not connected, 1 = connected, -1 = disconnected self._session_id = None # Rapids session id; issued upon request only self._base_url = None # "{scheme}://{ip}:{port}" self._name = None self._verify_ssl_cert = None self._cacert = None self._auth = None # Authentication token self._proxies = None # `proxies` dictionary in the format required by the requests module self._cluster_id = None self._cookies = None self._cluster = None # H2OCluster object self._verbose = None # Print detailed information about connection status self._requests_counter = 0 # how many API requests were made self._timeout = None # timeout for a single request (in seconds) self._is_logging = False # when True, log every request self._logging_dest = None # where the log messages will be written, either filename or open file handle self._local_server = None # H2OLocalServer instance to which we are connected (if known) # self.start_logging(sys.stdout) def _test_connection(self, max_retries=5, messages=None): """ Test that the H2O cluster can be reached, and retrieve basic cluster status info. :param max_retries: Number of times to try to connect to the cluster (with 0.2s intervals). :returns: Cluster information (an H2OCluster object) :raises H2OConnectionError, H2OServerError: """ if messages is None: messages = ("Connecting to H2O server at {url} ..", "successful.", "failed.") self._print(messages[0].format(url=self._base_url), end="") cld = None errors = [] for _ in range(max_retries): self._print(".", end="", flush=True) if self._local_server and not self._local_server.is_running(): raise H2OServerError("Local server was unable to start") try: define_classes_from_schema(_classes_defined_from_schema_, self) cld = self.request("GET /3/Cloud") if self.name and cld.cloud_name != self.name: raise H2OConnectionError( "Connected to cloud %s but requested %s." % (cld.cloud_name, self.name) ) if cld.consensus and cld.cloud_healthy: self._print(" " + messages[1]) return cld else: if cld.consensus and not cld.cloud_healthy: msg = "in consensus but not healthy" elif not cld.consensus and cld.cloud_healthy: msg = "not in consensus but healthy" else: msg = "not in consensus and not healthy" errors.append("Cloud is in a bad shape: %s (size = %d, bad nodes = %d)" % (msg, cld.cloud_size, cld.bad_nodes)) except (H2OConnectionError, H2OServerError) as e: message = str(e) if "\n" in message: message = message[:message.index("\n")] errors.append("[%s.%02d] %s: %s" % (time.strftime("%M:%S"), int(time.time() * 100) % 100, e.__class__.__name__, message)) # Cloud too small, or voting in progress, or server is not up yet; sleep then try again time.sleep(0.2) self._print(" " + messages[2]) if cld and not cld.cloud_healthy: raise H2OServerError("Cluster reports unhealthy status") if cld and not cld.consensus: raise H2OServerError("Cluster cannot reach consensus") else: raise H2OConnectionError("Could not establish link to the H2O cloud %s after %d retries\n%s" % (self._base_url, max_retries, "\n".join(errors))) @staticmethod def _prepare_data_payload(data): """ Make a copy of the `data` object, preparing it to be sent to the server. The data will be sent via x-www-form-urlencoded or multipart/form-data mechanisms. Both of them work with plain lists of key/value pairs, so this method converts the data into such format. """ if not data: return None res = {} for key, value in data.items(): if value is None: continue # don't send args set to None so backend defaults take precedence if isinstance(value, list): value = stringify_list(value) elif isinstance(value, dict): if "__meta" in value and value["__meta"]["schema_name"].endswith("KeyV3"): value = value["name"] else: value = stringify_dict(value) else: value = str(value) res[key] = value return res @staticmethod def _prepare_file_payload(filename): """ Prepare `filename` to be sent to the server. The "preparation" consists of creating a data structure suitable for passing to requests.request(). """ if not filename: return None absfilename = os.path.abspath(filename) if not os.path.exists(absfilename): raise H2OValueError("File %s does not exist" % filename, skip_frames=1) return open(absfilename, "rb") def _log_start_transaction(self, endpoint, data, json, filename, params): """Log the beginning of an API request.""" # TODO: add information about the caller, i.e. which module + line of code called the .request() method # This can be done by fetching current traceback and then traversing it until we find the request function self._requests_counter += 1 if not self._is_logging: return msg = "\n---- %d --------------------------------------------------------\n" % self._requests_counter msg += "[%s] %s\n" % (time.strftime("%H:%M:%S"), endpoint) if params is not None: msg += " params: {%s}\n" % ", ".join("%s:%s" % item for item in params.items()) if json is not None: import json as j msg += " json: %s\n" % j.dumps(json) if filename is not None: msg += " file: %s\n" % filename elif data is not None: msg += " body: {%s}\n" % ", ".join("%s:%s" % item for item in data.items()) self._log_message(msg + "\n") def _log_end_transaction(self, start_time, response): """Log response from an API request.""" if not self._is_logging: return elapsed_time = int((time.time() - start_time) * 1000) msg = "<<< HTTP %d %s (%d ms)\n" % (response.status_code, response.reason, elapsed_time) if "Content-Type" in response.headers: msg += " Content-Type: %s\n" % response.headers["Content-Type"] msg += response.text self._log_message(msg + "\n\n") def _log_end_exception(self, exception): """Log API request that resulted in an exception.""" if not self._is_logging: return self._log_message(">>> %s\n\n" % str(exception)) def _log_message(self, msg): """ Log the message `msg` to the destination `self._logging_dest`. If this destination is a file name, then we append the message to the file and then close the file immediately. If the destination is an open file handle, then we simply write the message there and do not attempt to close it. """ if is_type(self._logging_dest, str): with open(self._logging_dest, "at", encoding="utf-8") as f: f.write(msg) else: self._logging_dest.write(msg) @staticmethod def _process_response(response, save_to): """ Given a response object, prepare it to be handed over to the external caller. Preparation steps include: * detect if the response has error status, and convert it to an appropriate exception; * detect Content-Type, and based on that either parse the response as JSON or return as plain text. """ status_code = response.status_code if status_code == 200 and save_to: if save_to.startswith("~"): save_to = os.path.expanduser(save_to) if os.path.isdir(save_to) or save_to.endswith(os.path.sep): dirname = os.path.join(os.path.abspath(save_to), '') filename = H2OConnection._find_file_name(response) else: dirname, filename = os.path.split(os.path.abspath(save_to)) fullname = os.path.join(dirname, filename) try: if not os.path.exists(dirname): os.makedirs(dirname) with open(fullname, "wb") as f: for chunk in response.iter_content(chunk_size=65536): if chunk: # Empty chunks may occasionally happen f.write(chunk) except OSError as e: raise H2OValueError("Cannot write to file %s: %s" % (fullname, e)) return fullname content_type = response.headers.get("Content-Type", "") if ";" in content_type: # Remove a ";charset=..." part content_type = content_type[:content_type.index(";")] # this is needed so that response.text() works correctly response.encoding = response.headers.get("Character-Encoding", response.encoding) # Auto-detect response type by its content-type. Decode JSON, all other responses pass as-is. if content_type == "application/json": try: data = response.json(object_pairs_hook=H2OResponse) except (JSONDecodeError, requests.exceptions.ContentDecodingError) as e: raise H2OServerError("Malformed JSON from server (%s):\n%s" % (str(e), response.text)) else: data = response.text # Success (200 = "Ok", 201 = "Created", 202 = "Accepted", 204 = "No Content") if status_code in {200, 201, 202, 204}: return data # Client errors (400 = "Bad Request", 404 = "Not Found", 412 = "Precondition Failed") if status_code in {400, 404, 412} and isinstance(data, H2OErrorV3): data.show_stacktrace = False raise H2OResponseError(data) # Server errors (notably 500 = "Server Error") # Note that it is possible to receive valid H2OErrorV3 object in this case, however it merely means the server # did not provide the correct status code. raise H2OServerError("HTTP %d %s:\n%s" % (status_code, response.reason, data)) @staticmethod def _find_file_name(response): cd = response.headers.get("Content-Disposition", "") mm = re.search(r'filename="(.*)"$', cd) return mm.group(1) if mm else "unknown" def _print(self, msg, flush=False, end="\n"): """Helper function to print connection status messages when in verbose mode.""" if self._verbose: print2(msg, end=end, flush=flush) def __repr__(self): if self._stage == 0: return "<H2OConnection uninitialized>" elif self._stage == 1: sess = "session %s" % self._session_id if self._session_id else "no session" return "<H2OConnection to %s, %s>" % (self._base_url, sess) else: return "<H2OConnection closed>" def __enter__(self): """Called when an H2OConnection object is created within the ``with ...`` statement.""" return self def __exit__(self, *args): """Called at the end of the ``with ...`` statement.""" self.close() assert len(args) == 3 # Avoid warning about unused args... return False # ensure that any exception will be re-raised
class H2OResponse(dict): """Temporary...""" def __new__(cls, keyvals): # This method is called by the simplejson.json(object_pairs_hook=<this>) # `keyvals` is a list of (key,value) tuples. For example: # [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")] schema = None for k, v in keyvals: if k == "__meta" and isinstance(v, dict): schema = v["schema_name"] break if k == "__schema" and is_type(v, str): schema = v break if schema == "MetadataV3": return H2OMetadataV3.make(keyvals) if schema == "CloudV3": return H2OCluster.make(keyvals) if schema == "H2OErrorV3": return H2OErrorV3.make(keyvals) if schema == "H2OModelBuilderErrorV3": return H2OModelBuilderErrorV3.make(keyvals) if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals) if schema and schema.startswith("ModelMetrics"): metrics = make_metrics(schema, keyvals) if metrics is not None: return metrics return super(H2OResponse, cls).__new__(cls, keyvals) # def __getattr__(self, key): # """This gets invoked for any attribute "key" that is NOT yet defined on the object.""" # if key in self: # return self[key] # return None _classes_defined_from_schema_ = [H2OCluster, H2OErrorV3, H2OModelBuilderErrorV3] # Find the exception that occurs on invalid JSON input JSONDecodeError, _r = None, None try: _r = requests.Response() _r._content = b"haha" _r.json() except Exception as exc: JSONDecodeError = type(exc) del _r # ---------------------------------------------------------------------------------------------------------------------- # Deprecated method implementations # ---------------------------------------------------------------------------------------------------------------------- __H2OCONN__ = H2OConnection() # Latest instantiated H2OConnection object. Do not use in any new code! __H2O_REST_API_VERSION__ = 3 # Has no actual meaning def _deprecated_default(): H2OConnection.__ENCODING__ = "utf-8" H2OConnection.__ENCODING_ERROR__ = "replace" def _deprecated_username(): auth = getattr(__H2OCONN__, "_auth") return auth[0] if isinstance(auth, tuple) else None def _deprecated_password(): auth = getattr(__H2OCONN__, "_auth") return auth[1] if isinstance(auth, tuple) else None def _deprecated_check_conn(): if not __H2OCONN__: raise H2OConnectionError("No active connection to an H2O cluster. Try calling `h2o.connect()`") return __H2OCONN__ def _deprecated_get(self, url_suffix, **kwargs): restver = kwargs.pop("_rest_version") if "_rest_version" in kwargs else 3 endpoint = "GET /%d/%s" % (restver, url_suffix) return self.request(endpoint, data=kwargs) def _deprecated_post(self, url_suffix, **kwargs): restver = kwargs.pop("_rest_version") if "_rest_version" in kwargs else 3 endpoint = "POST /%d/%s" % (restver, url_suffix) filename = None if "file_upload_info" in kwargs: filename = next(iter(kwargs.pop("file_upload_info").values())) return self.request(endpoint, data=kwargs, filename=filename) def _deprecated_delete(self, url_suffix, **kwargs): restver = kwargs.pop("_rest_version") if "_rest_version" in kwargs else 3 endpoint = "DELETE /%d/%s" % (restver, url_suffix) return self.request(endpoint, data=kwargs) def end_session(): """Deprecated, use connection.close() instead.""" print("Warning: end_session() is deprecated") __H2OCONN__.close()