public abstract class GenModel extends java.lang.Object implements IGenModel, IGeneratedModel, java.io.Serializable
Modifier and Type | Field and Description |
---|---|
java.lang.String[][] |
_domains
Categorical (factor/enum) mappings, per column.
|
java.lang.String |
_foldColumn |
java.lang.String[] |
_names
Column names; last is response for supervised models
|
java.lang.String |
_offsetColumn
Name of the column with offsets (used for certain types of models).
|
java.lang.String |
_responseColumn
Name of the response column used for training (only for supervised models).
|
java.lang.String |
_treatmentColumn
Name of the column determine treatment group, currently only for UpliftDRF models
|
Constructor and Description |
---|
GenModel(java.lang.String[] names,
java.lang.String[][] domains)
Deprecated.
This constructor is deprecated and will be removed in a future version.
use
GenModel(String[] names, String[][] domains, String responseColumn)() instead. |
GenModel(java.lang.String[] names,
java.lang.String[][] domains,
java.lang.String responseColumn) |
GenModel(java.lang.String[] names,
java.lang.String[][] domains,
java.lang.String responseColumn,
java.lang.String treatmentColumn) |
Modifier and Type | Method and Description |
---|---|
static boolean |
bitSetContains(byte[] bits,
int nbits,
int bitoff,
double dnum) |
static boolean |
bitSetIsInRange(int nbits,
int bitoff,
double dnum) |
boolean |
calibrateClassProbabilities(double[] preds)
Subclasses implement calibration of class probabilities.
|
static float[] |
convertDouble2Float(double[] input) |
static double[] |
correctProbabilities(double[] scored,
double[] priorClassDist,
double[] modelClassDist)
Correct a given list of class probabilities produced as a prediction by a model back to prior class distribution
|
static java.lang.String |
createAuxKey(java.lang.String k) |
java.lang.String[] |
features()
Returns names of input features.
|
static void |
GBM_rescale(double[] preds) |
CategoricalEncoding |
getCategoricalEncoding()
Return type of encoding expected by the model implementation.
|
int |
getColIdx(java.lang.String name)
Returns index of a column with given name, or -1 if the column is not found.
|
java.lang.String[][] |
getDomainValues()
Returns domain values for all columns, including the response column.
|
java.lang.String[] |
getDomainValues(int i)
Returns domain values for the i-th column.
|
java.lang.String[] |
getDomainValues(java.lang.String name)
Gets domain of the given column.
|
java.lang.String |
getHeader()
???
|
java.util.EnumSet<ModelCategory> |
getModelCategories()
Override this for models that may produce results in different categories.
|
abstract ModelCategory |
getModelCategory()
Returns this model category.
|
java.lang.String[] |
getNames()
The names of all columns used, including response and offset columns.
|
int |
getNumClasses(int colIdx)
Get number of classes in the given column.
|
int |
getNumCols()
Returns number of columns used as input for training (i.e., exclude response and offset columns).
|
int |
getNumResponseClasses()
Return a number of classes in response column.
|
java.lang.String |
getOffsetName()
The name of the offset column.
|
java.lang.String[][] |
getOrigDomainValues()
Returns original domain values for all columns including response column.
|
java.lang.String[] |
getOrigNames()
The original names of all columns used, including response and offset columns.
|
int |
getOrigNumCols() |
double[] |
getOrigProjectionArray()
Returns original Eigen encoder projections array for all columns.
|
java.lang.String[][] |
getOutputDomains()
Companion method to getOutputNames.
|
java.lang.String[] |
getOutputNames() |
static int |
getPrediction(double[] preds,
double[] priorClassDist,
double[] data,
double threshold)
Utility function to get a best prediction from an array of class
prediction distribution.
|
static int |
getPredictionBinomial(double[] preds,
double threshold) |
static int |
getPredictionMultinomial(double[] preds,
double[] priorClassDist,
double[] data) |
int |
getPredsSize()
Returns the expected size of preds array which is passed to `predict(double[], double[])` function.
|
int |
getPredsSize(ModelCategory mc) |
int |
getResponseIdx()
Returns the index of the response column inside getDomains().
|
java.lang.String |
getResponseName()
The name of the response column.
|
abstract java.lang.String |
getUUID()
Returns model's unique identifier.
|
static double |
GLM_identityInv(double x) |
static double |
GLM_inverseInv(double x) |
static double |
GLM_logInv(double x) |
static double |
GLM_logitInv(double x) |
static double |
GLM_ologitInv(double x) |
static double |
GLM_tweedieInv(double x,
double tweedie_link_power) |
static void |
img2pixels(java.awt.image.BufferedImage img,
int w,
int h,
int channels,
float[] pixels,
int start,
float[] mean) |
GenModel |
internal_threadSafeInstance()
Deprecated.
|
boolean |
isAutoEncoder()
Returns true if this model represents an AutoEncoder.
|
boolean |
isClassifier()
Returns true if this model represents a classifier, else it is used for regression.
|
boolean |
isSupervised()
Returns true for supervised models.
|
static int |
KMeans_closest(double[][] centers,
double[] point,
java.lang.String[][] domains) |
static double |
KMeans_distance(double[] center,
double[] point,
java.lang.String[][] domains) |
static double |
KMeans_distance(double[] center,
float[] point,
int[] modes,
double[] colSum,
double[] colSumSq) |
static int |
KMeans_distances(double[][] centers,
double[] point,
java.lang.String[][] domains,
double[] distances) |
static void |
Kmeans_preprocessData(double[] data,
double[] means,
double[] mults,
int[] modes) |
static double |
Kmeans_preprocessData(double d,
int i,
double[] means,
double[] mults,
int[] modes) |
static double[] |
KMeans_simplex(double[][] centers,
double[] point,
java.lang.String[][] domains) |
static double |
log_rescale(double[] preds) |
int |
mapEnum(int colIdx,
java.lang.String enumValue)
Maps given column's categorical to the integer used by this model (returns -1 if mapping not found).
|
int |
nCatFeatures() |
int |
nclasses()
Returns number of output classes for classifiers, 1 for regression models, and 0 for unsupervised models.
|
int |
nfeatures()
Returns number of input features.
|
boolean |
requiresOffset() |
abstract double[] |
score0(double[] row,
double[] preds)
Subclasses implement the scoring logic.
|
double[] |
score0(double[] row,
double offset,
double[] preds) |
static void |
setCats(double[] from,
double[] nums,
int[] cats,
int _cats,
int[] _catOffsets,
double[] _normMul,
double[] _normSub,
boolean useAllFactorLevels) |
static void |
setCats(double[] from,
int[] to,
int cats,
int[] catOffsets,
boolean useAllFactorLevels) |
static void |
setInput(double[] from,
double[] to,
double[] nums,
int[] cats,
int _nums,
int _cats,
int[] _catOffsets,
double[] _normMul,
double[] _normSub,
boolean useAllFactorLevels,
boolean replaceMissingWithZero) |
static void |
setInput(double[] from,
float[] to,
int _nums,
int _cats,
int[] _catOffsets,
double[] _normMul,
double[] _normSub,
boolean useAllFactorLevels,
boolean replaceMissingWithZero) |
public final java.lang.String[] _names
public final java.lang.String[][] _domains
public final java.lang.String _responseColumn
public java.lang.String _offsetColumn
public java.lang.String _foldColumn
public java.lang.String _treatmentColumn
public GenModel(java.lang.String[] names, java.lang.String[][] domains, java.lang.String responseColumn)
public GenModel(java.lang.String[] names, java.lang.String[][] domains, java.lang.String responseColumn, java.lang.String treatmentColumn)
@Deprecated public GenModel(java.lang.String[] names, java.lang.String[][] domains)
GenModel(String[] names, String[][] domains, String responseColumn)()
instead.public boolean requiresOffset()
public boolean isSupervised()
isSupervised
in interface IGenModel
public int nfeatures()
public int nCatFeatures()
public java.lang.String[] features()
public int nclasses()
public abstract ModelCategory getModelCategory()
getModelCategory
in interface IGenModel
ModelCategory
public java.lang.String[] getOutputNames()
public java.lang.String[][] getOutputDomains()
public java.util.EnumSet<ModelCategory> getModelCategories()
getModelCategories
in interface IGenModel
public abstract java.lang.String getUUID()
IGeneratedModel
getUUID
in interface IGeneratedModel
public int getNumCols()
getNumCols
in interface IGeneratedModel
public java.lang.String[] getNames()
getNames
in interface IGeneratedModel
public int getOrigNumCols()
public java.lang.String[] getOrigNames()
getOrigNames
in interface IGeneratedModel
public java.lang.String getResponseName()
getResponseName
in interface IGeneratedModel
public int getResponseIdx()
getResponseIdx
in interface IGeneratedModel
public java.lang.String getOffsetName()
IGeneratedModel
getOffsetName
in interface IGeneratedModel
public int getNumClasses(int colIdx)
getNumClasses
in interface IGeneratedModel
public int getNumResponseClasses()
getNumResponseClasses
in interface IGeneratedModel
public CategoricalEncoding getCategoricalEncoding()
getCategoricalEncoding
in interface IGeneratedModel
public boolean isClassifier()
isClassifier
in interface IGeneratedModel
public boolean isAutoEncoder()
isAutoEncoder
in interface IGeneratedModel
public java.lang.String[] getDomainValues(java.lang.String name)
getDomainValues
in interface IGeneratedModel
name
- column namepublic java.lang.String[] getDomainValues(int i)
getDomainValues
in interface IGeneratedModel
i
- index of columnpublic java.lang.String[][] getDomainValues()
getDomainValues
in interface IGeneratedModel
public java.lang.String[][] getOrigDomainValues()
IGeneratedModel
getOrigDomainValues
in interface IGeneratedModel
public double[] getOrigProjectionArray()
getOrigProjectionArray
in interface IGeneratedModel
public int getColIdx(java.lang.String name)
getColIdx
in interface IGeneratedModel
public int mapEnum(int colIdx, java.lang.String enumValue)
mapEnum
in interface IGeneratedModel
public int getPredsSize()
getPredsSize
in interface IGeneratedModel
public int getPredsSize(ModelCategory mc)
public static java.lang.String createAuxKey(java.lang.String k)
public abstract double[] score0(double[] row, double[] preds)
public double[] score0(double[] row, double offset, double[] preds)
public boolean calibrateClassProbabilities(double[] preds)
public static double[] correctProbabilities(double[] scored, double[] priorClassDist, double[] modelClassDist)
The implementation is based on Eq. (27) in the paper.
scored
- list of class probabilities beginning at index 1priorClassDist
- original class distributionmodelClassDist
- class distribution used for model building (e.g., data was oversampled)public static int getPrediction(double[] preds, double[] priorClassDist, double[] data, double threshold)
preds
- an array of prediction distribution. Length of arrays is equal to a number of classes+1.priorClassDist
- prior class probabilities (used to break ties)data
- Test datathreshold
- threshold for binary classifierpublic static int getPredictionBinomial(double[] preds, double threshold)
public static int getPredictionMultinomial(double[] preds, double[] priorClassDist, double[] data)
public static boolean bitSetContains(byte[] bits, int nbits, int bitoff, double dnum)
public static boolean bitSetIsInRange(int nbits, int bitoff, double dnum)
public static void Kmeans_preprocessData(double[] data, double[] means, double[] mults, int[] modes)
public static double Kmeans_preprocessData(double d, int i, double[] means, double[] mults, int[] modes)
public static int KMeans_closest(double[][] centers, double[] point, java.lang.String[][] domains)
public static int KMeans_distances(double[][] centers, double[] point, java.lang.String[][] domains, double[] distances)
public static double[] KMeans_simplex(double[][] centers, double[] point, java.lang.String[][] domains)
public static double KMeans_distance(double[] center, float[] point, int[] modes, double[] colSum, double[] colSumSq)
public static double KMeans_distance(double[] center, double[] point, java.lang.String[][] domains)
public static double log_rescale(double[] preds)
public static void GBM_rescale(double[] preds)
public static double GLM_identityInv(double x)
public static double GLM_logitInv(double x)
public static double GLM_logInv(double x)
public static double GLM_inverseInv(double x)
public static double GLM_ologitInv(double x)
public static double GLM_tweedieInv(double x, double tweedie_link_power)
public java.lang.String getHeader()
public static void setInput(double[] from, float[] to, int _nums, int _cats, int[] _catOffsets, double[] _normMul, double[] _normSub, boolean useAllFactorLevels, boolean replaceMissingWithZero)
public static void setInput(double[] from, double[] to, double[] nums, int[] cats, int _nums, int _cats, int[] _catOffsets, double[] _normMul, double[] _normSub, boolean useAllFactorLevels, boolean replaceMissingWithZero)
public static void setCats(double[] from, double[] nums, int[] cats, int _cats, int[] _catOffsets, double[] _normMul, double[] _normSub, boolean useAllFactorLevels)
public static void setCats(double[] from, int[] to, int cats, int[] catOffsets, boolean useAllFactorLevels)
public static float[] convertDouble2Float(double[] input)
public static void img2pixels(java.awt.image.BufferedImage img, int w, int h, int channels, float[] pixels, int start, float[] mean) throws java.io.IOException
java.io.IOException
@Deprecated public GenModel internal_threadSafeInstance()