public class DataInfo extends water.Keyed<DataInfo>
Modifier and Type | Class and Description |
---|---|
static interface |
DataInfo.Imputer |
static class |
DataInfo.MeanImputer |
class |
DataInfo.Row |
class |
DataInfo.Rows |
static class |
DataInfo.TransformType |
Modifier and Type | Field and Description |
---|---|
int[] |
_activeCols |
water.fvec.Frame |
_adaptedFrame |
int[][] |
_catLvls |
boolean[] |
_catMissing |
int[] |
_catOffsets |
int |
_cats |
java.lang.String[] |
_coefNames |
int[] |
_coefOriginalIndices |
boolean |
_fold |
boolean |
_imputeMissing |
hex.Model.InteractionPair[] |
_interactions |
hex.Model.InteractionSpec |
_interactionSpec |
int[] |
_interactionVecs |
boolean |
_intercept |
int[][] |
_intLvls |
double[] |
_normMul |
double[] |
_normRespMul |
double[] |
_normRespSub |
double[] |
_normSigmaStandardizationOff |
double[] |
_normSub |
double[] |
_normSubStandardizationOff |
double[] |
_numMeans |
double[] |
_numNAFill |
int[] |
_numOffsets |
int |
_nums |
boolean |
_offset |
int[] |
_permutation |
DataInfo.TransformType |
_predictor_transform |
DataInfo.TransformType |
_response_transform |
int |
_responses |
boolean |
_skipMissing |
boolean |
_treatment |
boolean |
_useAllFactorLevels |
boolean |
_valid |
boolean |
_weights |
Constructor and Description |
---|
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean intercept) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean treatment,
hex.Model.InteractionSpec interactions) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
hex.Model.InteractionSpec interactions) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
DataInfo.Imputer imputer,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean treatment,
hex.Model.InteractionSpec interactions)
The train/valid Frame instances are sorted by categorical (themselves sorted by
cardinality greatest to least) with all numerical columns following.
|
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
DataInfo.Imputer imputer,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
hex.Model.InteractionSpec interactions) |
Modifier and Type | Method and Description |
---|---|
int[] |
activeCols() |
void |
addOutput(java.lang.String name,
water.fvec.Vec v) |
void |
addResponse(java.lang.String[] names,
water.fvec.Vec[] vecs) |
int[] |
catNAFill() |
int |
catNAFill(int cid) |
protected long |
checksum_impl() |
java.lang.String[] |
coefNames() |
int[] |
coefOriginalColumnIndices() |
int[] |
coefOriginalColumnIndices(water.fvec.Frame adaptedFrame) |
java.lang.String[] |
coefOriginalNames() |
java.lang.String[] |
coefOriginalNames(water.fvec.Frame adaptedFrame) |
double[] |
denormalizeBeta(double[] beta) |
DataInfo |
disableIntercept() |
void |
dropInteractions() |
void |
dropWeights() |
DataInfo.Row |
extractDenseRow(water.fvec.Chunk[] chunks,
int rid,
DataInfo.Row row) |
DataInfo.Row[] |
extractSparseRows(water.fvec.Chunk[] chunks)
Extract (sparse) rows from given chunks.
|
DataInfo |
filterExpandedColumns(int[] cols)
Filter the _adaptedFrame so that it contains only the Vecs referenced by the cols
parameter.
|
int |
foldChunkId() |
protected int[] |
fullCatOffsets() |
int |
fullN()
Get the fully expanded number of predictor columns.
|
int |
getCategoricalId(int cid,
double val) |
int |
getCategoricalId(int cid,
int val)
Get the offset into the expanded categorical
|
int |
getCategoricalIdFromInteraction(int cid,
int val) |
int |
getInteractionOffset(water.fvec.Chunk[] chunks,
int cid,
int rid) |
water.fvec.Vec |
getOffsetVec() |
water.fvec.Vec |
getOutputVec(int i) |
water.fvec.Vec |
getWeightsVec() |
static int |
imputeCat(water.fvec.Vec v) |
static int |
imputeCat(water.fvec.Vec v,
boolean useAllFactorLevels) |
boolean |
isInteractionVec(int colid) |
int |
largestCat() |
int[] |
mapNames(java.lang.String[] names) |
DataInfo.Row |
newDenseRow() |
DataInfo.Row |
newDenseRow(double[] numVals,
long start) |
int |
nextNumericIdx(int currentColIdx)
Get the next expanded number-column index.
|
double[] |
normalizeBeta(double[] beta,
boolean standardize) |
double |
normMul(int i) |
double |
normSub(int i) |
int |
numCats() |
double[] |
numNAFill() |
double |
numNAFill(int nid) |
int |
numNums() |
int |
numStart() |
int |
offsetChunkId() |
int |
outputChunkId() |
int |
outputChunkId(int n) |
int |
responseChunkId(int n) |
DataInfo.Rows |
rows(water.fvec.Chunk[] chks) |
DataInfo.Rows |
rows(water.fvec.Chunk[] chks,
boolean sparse) |
DataInfo |
scoringInfo()
Creates a DataInfo for scoring on a test Frame from a DataInfo instance created during model training
This is a lightweight version of the method only usable for models that don't use advanced features of DataInfo (eg.
|
DataInfo |
scoringInfo(java.lang.String[] names,
water.fvec.Frame adaptFrame) |
DataInfo |
scoringInfo(java.lang.String[] names,
water.fvec.Frame adaptFrame,
int nResponses,
boolean fixIVW)
Creates a scoringInfo from a DataInfo instance created during model training
|
void |
setCatNAFill(int[] catNAFill) |
void |
setPredictorTransform(DataInfo.TransformType t) |
void |
setResponse(java.lang.String name,
water.fvec.Vec v) |
void |
setResponse(java.lang.String name,
water.fvec.Vec v,
int n) |
void |
setResponseTransform(DataInfo.TransformType t) |
water.fvec.Vec |
setWeights(java.lang.String name,
water.fvec.Vec vec) |
int |
treatmentChunkId() |
void |
unScaleNumericals(double[] in,
double[] out)
Undo the standardization/normalization of numerical columns
|
void |
updateWeightedSigmaAndMean(double[] sigmas,
double[] mean) |
void |
updateWeightedSigmaAndMeanForResponse(double[] sigmas,
double[] mean) |
DataInfo |
validDinfo(water.fvec.Frame valid) |
int |
weightChunkId() |
checksum_impl, checksum, checksum, getKey, makeSchema, readAll_impl, readAll, remove_impl, remove_impl, remove_self_key_impl, remove, remove, remove, remove, remove, remove, removeQuietly, writeAll_impl, writeAll
public int[] _activeCols
public water.fvec.Frame _adaptedFrame
public int _responses
public DataInfo.TransformType _predictor_transform
public DataInfo.TransformType _response_transform
public boolean _useAllFactorLevels
public int _nums
public int _cats
public int[] _catOffsets
public boolean[] _catMissing
public double[] _numNAFill
public int[] _permutation
public double[] _normMul
public double[] _normSub
public double[] _normSigmaStandardizationOff
public double[] _normSubStandardizationOff
public double[] _normRespMul
public double[] _normRespSub
public double[] _numMeans
public boolean _intercept
public boolean _offset
public boolean _weights
public boolean _fold
public boolean _treatment
public hex.Model.InteractionPair[] _interactions
public hex.Model.InteractionSpec _interactionSpec
public int[] _interactionVecs
public int[] _numOffsets
public final boolean _skipMissing
public final boolean _imputeMissing
public boolean _valid
public final int[][] _catLvls
public final int[][] _intLvls
public java.lang.String[] _coefNames
public int[] _coefOriginalIndices
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold, hex.Model.InteractionSpec interactions)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold, boolean treatment, hex.Model.InteractionSpec interactions)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, DataInfo.Imputer imputer, boolean missingBucket, boolean weight, boolean offset, boolean fold, hex.Model.InteractionSpec interactions)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, DataInfo.Imputer imputer, boolean missingBucket, boolean weight, boolean offset, boolean fold, boolean treatment, hex.Model.InteractionSpec interactions)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold, boolean intercept)
public water.fvec.Vec setWeights(java.lang.String name, water.fvec.Vec vec)
public void dropWeights()
public void dropInteractions()
public int[] activeCols()
public void addResponse(java.lang.String[] names, water.fvec.Vec[] vecs)
public int[] catNAFill()
public int catNAFill(int cid)
public double[] numNAFill()
public double numNAFill(int nid)
public void setCatNAFill(int[] catNAFill)
public double normSub(int i)
public double normMul(int i)
public int responseChunkId(int n)
public int treatmentChunkId()
public int foldChunkId()
public int offsetChunkId()
public int weightChunkId()
public int outputChunkId()
public int outputChunkId(int n)
public void addOutput(java.lang.String name, water.fvec.Vec v)
public water.fvec.Vec getOutputVec(int i)
public void setResponse(java.lang.String name, water.fvec.Vec v)
public void setResponse(java.lang.String name, water.fvec.Vec v, int n)
protected long checksum_impl()
checksum_impl
in class water.Keyed<DataInfo>
public DataInfo disableIntercept()
public DataInfo validDinfo(water.fvec.Frame valid)
public double[] denormalizeBeta(double[] beta)
public double[] normalizeBeta(double[] beta, boolean standardize)
protected int[] fullCatOffsets()
public static int imputeCat(water.fvec.Vec v)
public static int imputeCat(water.fvec.Vec v, boolean useAllFactorLevels)
public DataInfo filterExpandedColumns(int[] cols)
cols
- Array of the expanded column indices to keep.public void updateWeightedSigmaAndMean(double[] sigmas, double[] mean)
public void updateWeightedSigmaAndMeanForResponse(double[] sigmas, double[] mean)
public void setPredictorTransform(DataInfo.TransformType t)
public void setResponseTransform(DataInfo.TransformType t)
public boolean isInteractionVec(int colid)
public final int fullN()
public final int largestCat()
public final int numStart()
public final int numCats()
public final int numNums()
public final int nextNumericIdx(int currentColIdx)
public final java.lang.String[] coefNames()
public final int[] coefOriginalColumnIndices(water.fvec.Frame adaptedFrame)
public final int[] coefOriginalColumnIndices()
public final java.lang.String[] coefOriginalNames(water.fvec.Frame adaptedFrame)
public final java.lang.String[] coefOriginalNames()
public int[] mapNames(java.lang.String[] names)
public final void unScaleNumericals(double[] in, double[] out)
in
- input valuesout
- output values (can be the same as input)public final int getCategoricalId(int cid, double val)
public final int getCategoricalId(int cid, int val)
cid
- the column idval
- the integer representation of the categorical levelpublic final int getCategoricalIdFromInteraction(int cid, int val)
public final DataInfo.Row extractDenseRow(water.fvec.Chunk[] chunks, int rid, DataInfo.Row row)
public int getInteractionOffset(water.fvec.Chunk[] chunks, int cid, int rid)
public water.fvec.Vec getWeightsVec()
public water.fvec.Vec getOffsetVec()
public DataInfo.Row newDenseRow()
public DataInfo.Row newDenseRow(double[] numVals, long start)
public DataInfo.Rows rows(water.fvec.Chunk[] chks)
public DataInfo.Rows rows(water.fvec.Chunk[] chks, boolean sparse)
public final DataInfo.Row[] extractSparseRows(water.fvec.Chunk[] chunks)
chunks
- - chunk of datasetpublic DataInfo scoringInfo(java.lang.String[] names, water.fvec.Frame adaptFrame)
public DataInfo scoringInfo(java.lang.String[] names, water.fvec.Frame adaptFrame, int nResponses, boolean fixIVW)
names
- column namesadaptFrame
- adapted framenResponses
- number of responses (-1 indicates autodetect: 0/1 based on presence of a single response)fixIVW
- whether to force global useFactorLevels flag to InteractionWrappedVecs (GLM behavior)public DataInfo scoringInfo()