See: Description
$ java -jar h2o.jar
$ mkdir experiment $ cd experiment $ mv ~/Downloads/gbm_pojo_test.java . $ curl http://localhost:54321/3/h2o-genmodel.jar > h2o-genmodel.jar
import java.io.*;
import hex.genmodel.easy.RowData;
import hex.genmodel.easy.EasyPredictModelWrapper;
import hex.genmodel.easy.prediction.*;
public class main {
private static String modelClassName = "gbm_pojo_test";
public static void main(String[] args) throws Exception {
hex.genmodel.GenModel rawModel;
rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance();
EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel);
//
// By default, unknown categorical levels throw PredictUnknownCategoricalLevelException.
// Optionally configure the wrapper to treat unknown categorical levels as N/A instead
// and strings that cannot be converted to numbers also to N/As:
//
// EasyPredictModelWrapper model = new EasyPredictModelWrapper(
// new EasyPredictModelWrapper.Config()
// .setModel(rawModel)
// .setConvertUnknownCategoricalLevelsToNa(true)
// .setConvertInvalidNumbersToNa(true)
// );
RowData row = new RowData();
row.put("Year", "1987");
row.put("Month", "10");
row.put("DayofMonth", "14");
row.put("DayOfWeek", "3");
row.put("CRSDepTime", "730");
row.put("UniqueCarrier", "PS");
row.put("Origin", "SAN");
row.put("Dest", "SFO");
BinomialModelPrediction p = model.predictBinomial(row);
System.out.println("Label (aka prediction) is flight departure delayed: " + p.label);
System.out.print("Class probabilities: ");
for (int i = 0; i < p.classProbabilities.length; i++) {
if (i > 0) {
System.out.print(",");
}
System.out.print(p.classProbabilities[i]);
}
System.out.println("");
}
}
$ javac -cp h2o-genmodel.jar -J-Xmx2g -J-XX:MaxPermSize=128m gbm_pojo_test.java main.java # Linux and OS X users $ java -cp .:h2o-genmodel.jar main # Windows users $ java -cp .;h2o-genmodel.jar mainThe following output displays:
Label (aka prediction) is flight departure delayed: YES Class probabilities: 0.4790490513429604,0.5209509486570396
library(h2o)
h2o.init()
path <- system.file("extdata", "prostate.csv", package = "h2o")
h2o_df <- h2o.importFile(path)
h2o_df$CAPSULE <- as.factor(h2o_df$CAPSULE)
model <- h2o.glm(y = "CAPSULE",
x = c("AGE", "RACE", "PSA", "GLEASON"),
training_frame = h2o_df,
family = "binomial")
h2o.download_pojo(model)
import h2o
h2o.init()
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
path = "http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv.zip"
h2o_df = h2o.import_file(path)
h2o_df['CAPSULE'] = h2o_df['CAPSULE'].asfactor()
model = H2OGeneralizedLinearEstimator(family = "binomial")
model.train(y = "CAPSULE",
x = ["AGE", "RACE", "PSA", "GLEASON"],
training_frame = h2o_df)
h2o.download_pojo(model)
import org.apache.spark.h2o._
val h2oContext = H2OContext.getOrCreate(sc)
import h2oContext._
import org.apache.spark.examples.h2o._
import java.io.File
val dataFile = "examples/smalldata/allyears2k_headers.csv.gz"
val airlinesData = new H2OFrame(new File(dataFile))
import _root_.hex.tree.gbm.GBM
import _root_.hex.tree.gbm.GBMModel.GBMParameters
val gbmParams = new GBMParameters()
gbmParams._train = airlinesData
gbmParams._response_column = 'IsDepDelayed
gbmParams._model_id = water.Key.make("model")
gbmParams._ntrees = 5
gbmParams._max_depth = 2
val gbm = new GBM(gbmParams)
val model = gbm.trainModel.get
import water._
import _root_.hex._
import java.net.URI
import water.serial.ObjectTreeBinarySerializer
def exportPOJOModel(model : Model[_, _,_], destination: URI): URI = {
val destFile = new File(destination)
val fos = new java.io.FileOutputStream(destFile)
val writer = new model.JavaModelStreamWriter(false)
try {
writer.writeTo(fos)
} finally {
fos.close()
}
destination
}
exportPOJOModel(model, new File("./GbmModel.java").toURI)
PredictCsv
class is used by the H2O test harness to make
predictions on new data points.
download_mojo()
function saves the model as a zip file. You can
unzip the file to view the options used to build the file along with each
tree built in the model. Note that each tree file is saved as a binary file type.
library(h2o) h2o.init(nthreads=-1) path <- system.file("extdata", "prostate.csv", package="h2o") h2o_df <- h2o.importFile(path) h2o_df$CAPSULE <- as.factor(h2o_df$CAPSULE) model <- h2o.gbm(y="CAPSULE", x=c("AGE", "RACE", "PSA", "GLEASON"), training_frame=h2o_df, distribution="bernoulli", ntrees=100, max_depth=4, learn_rate=0.1)
modelfile <- h2o.download_mojo(model, path="~/experiments/", get_genmodel_jar=TRUE) print("Model saved to " + modelfile) Model saved to /Users/user/GBM_model_R_1475248925871_74.zip"
import h2o from h2o.estimators.gbm import H2OGradientBoostingEstimator h2o.init() h2o_df = h2o.load_dataset("prostate.csv") h2o_df["CAPSULE"] = h2o_df["CAPSULE"].asfactor() model=H2OGradientBoostingEstimator(distribution="bernoulli", ntrees=100, max_depth=4, learn_rate=0.1) model.train(y="CAPSULE", x=["AGE","RACE","PSA","GLEASON"], training_frame=h2o_df)
modelfile = model.download_mojo(path="~/experiment/", get_genmodel_jar=True) print("Model saved to " + modelfile) Model saved to /Users/user/GBM_model_python_1475248925871_888.zip
$ cd experiment
import java.io.*; import hex.genmodel.easy.RowData; import hex.genmodel.easy.EasyPredictModelWrapper; import hex.genmodel.easy.prediction.*; import hex.genmodel.MojoModel; public class main { public static void main(String[] args) throws Exception { EasyPredictModelWrapper model = new EasyPredictModelWrapper(MojoModel.load("GBM_model_R_1475248925871_74.zip")); RowData row = new RowData(); row.put("AGE", "68"); row.put("RACE", "2"); row.put("DCAPS", "2"); row.put("VOL", "0"); row.put("GLEASON", "6"); BinomialModelPrediction p = model.predictBinomial(row); System.out.println("Has penetrated the prostatic capsule (1=yes; 0=no): " + p.label); System.out.print("Class probabilities: "); for (int i = 0; i < p.classProbabilities.length; i++) { if (i > 0) { System.out.print(","); } System.out.print(p.classProbabilities[i]); } System.out.println(""); } }GBM and DRF return classProbabilities, but not all MOJOs will return a classProbabilities field. Refer to the ModelPrediction definition for each algorithm to find the correct field(s) to access. This is available in the H2O-3 GitHub repo at: https://github.com/h2oai/h2o-3/tree/master/h2o-genmodel/src/main/java/hex/genmodel/easy/prediction. In addition to classProbabilities, you can choose to generate additional
leafNodeAssignments
(GBM, DRF, Isolation Forest and XGBoost) and contributions
(GBM, DRF and XGBoost) fields. The leafNodeAssignments
field will show the decision path through each tree. The contributions
field will provide Shapley contributions. Note that these fields may slow down the MOJO as they add computation. Below is the Java code showing how to return both the leaf node assignment and the contributions:
import java.io.*; import hex.genmodel.easy.RowData; import hex.genmodel.easy.EasyPredictModelWrapper; import hex.genmodel.easy.prediction.*; import hex.genmodel.MojoModel; public class main { public static void main(String[] args) throws Exception { EasyPredictModelWrapper.Config config = new EasyPredictModelWrapper.Config().setModel(MojoModel.load("GBM_model_R_1475248925871_74.zip")).setEnableLeafAssignment(true); EasyPredictModelWrapper model = new EasyPredictModelWrapper(config); RowData row = new RowData(); row.put("AGE", "68"); row.put("RACE", "2"); row.put("DCAPS", "2"); row.put("VOL", "0"); row.put("GLEASON", "6"); BinomialModelPrediction p = model.predictBinomial(row); System.out.println("Has penetrated the prostatic capsule (1=yes; 0=no): " + p.label); System.out.print("Class probabilities: "); for (int i = 0; i < p.classProbabilities.length; i++) { if (i > 0) { System.out.print(","); } System.out.print(p.classProbabilities[i]); } System.out.println("Leaf node assighnments: "); for (int i=0; i < p.leafNodeAssignments; i++) { if (i > 0) { System.out.print.(p.leafNodeAssignments[i]); } } System.out.println(""); } }
$ javac -cp h2o-genmodel.jar -J-Xms2g -J-XX:MaxPermSize=128m main.java # Linux and OS X users $ java -cp .:h2o-genmodel.jar main # Windows users $ java -cp .;h2o-genmodel.jar main
Has penetrated the prostatic capsule (1 yes; 0 no): 0 Class probabilities: 0.8059929056296662,0.19400709437033375
library(h2o) h2o.init() df <- h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") model <- h2o.gbm(model_id = "model", training_frame = df, x = c("Year", "Month", "DayofMonth", "DayOfWeek", "UniqueCarrier"), y = "IsDepDelayed", max_depth = 3, ntrees = 5) h2o.download_mojo(model, getwd(), FALSE) # Now download the latest stable h2o release from http://www.h2o.ai/download/ # and run the PrintMojo tool from the command line. # # (For MacOS: brew install graphviz) java -cp h2o.jar hex.genmodel.tools.PrintMojo --tree 0 -i model.zip -o model.gv -f 20 -d 3 dot -Tpng model.gv -o model.png open model.png
ai.h2o xgboost-mojo-example 1.0-SNAPSHOT ai.h2o h2o-genmodel-ext-xgboost 3.20.0.3 ai.h2o h2o-genmodel 3.20.0.3