Skip to content

Commit

Permalink
Merge branch 'rel-3.46.0' into valenad-GH-15772-upgrade-to-gradle-8-4
Browse files Browse the repository at this point in the history
  • Loading branch information
valenad1 committed May 2, 2024
2 parents 686268a + a3ae44d commit 9b8e433
Show file tree
Hide file tree
Showing 53 changed files with 7,275 additions and 346 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import water.*;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.runner.CloudSize;
import water.runner.H2ORunner;
Expand Down
4 changes: 2 additions & 2 deletions h2o-algos/src/main/java/hex/gam/GAM.java
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ public void computeImpl() {
if (error_count() > 0) // if something goes wrong, let's throw a fit
throw H2OModelBuilderIllegalArgumentException.makeFromBuilder(GAM.this);
// add gamified columns to training frame
Frame newTFrame = new Frame(rebalance(adaptTrain(), false, _result+".temporary.train"));
Frame newTFrame = new Frame(rebalance(adaptTrain(), false, Key.make()+".temporary.train"));
verifyGamTransformedFrame(newTFrame);

if (error_count() > 0) // if something goes wrong during gam transformation, let's throw a fit again!
Expand All @@ -937,7 +937,7 @@ public void computeImpl() {
int[] singleGamColsCount = new int[]{_cubicSplineNum, _iSplineNum, _mSplineNum};
_valid = rebalance(adaptValidFrame(_parms.valid(), _valid, _parms, _gamColNamesCenter, _binvD,
_zTranspose, _knots, _zTransposeCS, _allPolyBasisList, _gamColMeansRaw, _oneOGamColStd, singleGamColsCount),
false, _result + ".temporary.valid");
false, Key.make() + ".temporary.valid");
}
DKV.put(newTFrame); // This one will cause deleted vectors if add to Scope.track
Frame newValidFrame = _valid == null ? null : new Frame(_valid);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,13 @@ public static void copy2DArray(double[][] src_array, double[][] dest_array) {
src_array[colIdx].length);
}
}

// copy a square array
public static double[][] copy2DArray(double[][] src_array) {
double[][] dest_array = MemoryManager.malloc8d(src_array.length, src_array[0].length);
copy2DArray(src_array, dest_array);
return dest_array;
}

public static void copy2DArray(int[][] src_array, int[][] dest_array) {
int numRows = src_array.length;
Expand Down
4 changes: 4 additions & 0 deletions h2o-algos/src/main/java/hex/glm/CoefIndices.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package hex.glm;

public interface CoefIndices {
}
505 changes: 488 additions & 17 deletions h2o-algos/src/main/java/hex/glm/ComputationState.java

Large diffs are not rendered by default.

859 changes: 859 additions & 0 deletions h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java

Large diffs are not rendered by default.

748 changes: 557 additions & 191 deletions h2o-algos/src/main/java/hex/glm/GLM.java

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions h2o-algos/src/main/java/hex/glm/GLMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ public static class GLMParameters extends Model.Parameters {
public enum MissingValuesHandling {
MeanImputation, PlugValues, Skip
}
public enum Constraints {EqualTo, LessThanEqualTo};
public String algoName() { return "GLM"; }
public String fullName() { return "Generalized Linear Modeling"; }
public String javaName() { return GLMModel.class.getName(); }
Expand Down Expand Up @@ -515,6 +516,8 @@ public enum MissingValuesHandling {
public StringPair[] _interaction_pairs=null;
public boolean _early_stopping = true;
public Key<Frame> _beta_constraints = null;
public Key<Frame> _linear_constraints = null;
public boolean _expose_constraints = false; // internal parameter for testing only.
public Key<Frame> _plug_values = null;
// internal parameter, handle with care. GLM will stop when there is more than this number of active predictors (after strong rule screening)
public int _max_active_predictors = -1;
Expand All @@ -534,6 +537,14 @@ public enum MissingValuesHandling {
public double _dispersion_learning_rate = 0.5;
public Influence _influence; // if set to dfbetas will calculate the difference of betas obtained from including and excluding a data row
public boolean _keepBetaDiffVar = false; // if true, will keep the frame generating the beta without from i and the variance estimation
boolean _testCSZeroGram = false; // internal parameter, to test zero gram dropped column is correctly implemented
public boolean _separate_linear_beta = false; // if true, will perform the beta and linear constraint separately
public boolean _init_optimal_glm = false; // only used when there is linear constraints
public double _constraint_eta0 = 0.1258925; // eta_k = constraint_eta0/pow(constraint_c0, constraint_alpha)
public double _constraint_tau = 10; // ck+1 = tau*ck
public double _constraint_alpha = 0.1; // eta_k = constraint_eta_0/pow(constraint_c0, constraint_alpha)
public double _constraint_beta = 0.9; // eta_k+1 = eta_k/pow(c_k, beta)
public double _constraint_c0 = 10; // set initial epsilon k as 1/c0

public void validate(GLM glm) {
if (_remove_collinear_columns) {
Expand Down Expand Up @@ -1552,6 +1563,9 @@ public static class GLMOutput extends Model.Output {
public int _selected_submodel_idx; // submodel index with best deviance
public int _best_submodel_idx; // submodel index with best deviance
public int _best_lambda_idx; // the same as best_submodel_idx, kept to ensure backward compatibility
public String[] _linear_constraint_states;
public boolean _all_constraints_satisfied;
public TwoDimTable _linear_constraints_table;
public Key<Frame> _regression_influence_diagnostics;
public Key<Frame> _betadiff_var; // for debugging only, no need to serialize
public double lambda_best(){return _submodels.length == 0 ? -1 : _submodels[_best_submodel_idx].lambda_value;}
Expand All @@ -1577,6 +1591,12 @@ public double lambda_selected(){
private double _dispersion;
private boolean _dispersionEstimated;
public int[] _activeColsPerClass;
public ConstrainedGLMUtils.LinearConstraints[] _equalityConstraintsLinear = null;
public ConstrainedGLMUtils.LinearConstraints[] _lessThanEqualToConstraintsLinear = null;
public ConstrainedGLMUtils.LinearConstraints[] _equalityConstraintsBeta = null;
public ConstrainedGLMUtils.LinearConstraints[] _lessThanEqualToConstraintsBeta = null;
public String[] _constraintCoefficientNames = null;
public double[][] _initConstraintMatrix = null;
public boolean hasPValues(){return _zvalues != null;}
public boolean hasVIF() { return _vif_predictor_names != null; }

Expand Down
13 changes: 11 additions & 2 deletions h2o-algos/src/main/java/hex/glm/GLMTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ public final void reduce(GLMGradientTask gmgt){
_likelihood += gmgt._likelihood;
}
@Override public final void postGlobal(){
ArrayUtils.mult(_gradient,_reg);
ArrayUtils.mult(_gradient,_reg); // reg is obj_reg
for(int j = 0; j < _beta.length - 1; ++j)
_gradient[j] += _currentLambda * _beta[j]; // add L2 constraint for gradient
if ((_penalty_mat != null) && (_gamBetaIndices != null)) // update contribution from gam smoothness constraint
Expand Down Expand Up @@ -1522,6 +1522,7 @@ public static class GLMIterationTask extends FrameTask2<GLMIterationTask> {
double wsum, sumOfRowWeights;
double _sumsqe;
int _c = -1;
boolean _hasConstraints = false;

public double[] getXY() {
return _xy;
Expand All @@ -1546,6 +1547,15 @@ public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double
_c = c;
}

public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double [] beta, int c, boolean hasConst) {
super(null,dinfo,jobKey);
_beta = beta;
_ymu = null;
_glmf = glmw;
_c = c;
_hasConstraints = hasConst;
}

@Override public boolean handlesSparseData(){return true;}

transient private double _sparseOffset;
Expand Down Expand Up @@ -1659,7 +1669,6 @@ public boolean hasNaNsOrInf() {
return ArrayUtils.hasNaNsOrInfs(_xy) || _gram.hasNaNsOrInfs();
}
}


/* public static class GLMCoordinateDescentTask extends FrameTask2<GLMCoordinateDescentTask> {
final GLMParameters _params;
Expand Down
8 changes: 8 additions & 0 deletions h2o-algos/src/main/java/hex/glm/GLMUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -365,4 +365,12 @@ public static Frame buildRIDFrame(GLMModel.GLMParameters parms, Frame train, Fra
train.add(parms._response_column, responseVec);
return train;
}

public static boolean notZeroLambdas(double[] lambdas) {
if (lambdas == null) {
return false;
} else {
return ((int) Arrays.stream(lambdas).filter(x -> x != 0.0).boxed().count()) > 0;
}
}
}
16 changes: 16 additions & 0 deletions h2o-algos/src/main/java/hex/gram/Gram.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ public void addGAMPenalty(Integer[] activeColumns, double[][][] ds, int[][] gamI
}
}
}

public void addGAMPenalty(double[][][] ds, int[][] gamIndices, double[][] xmatrix) {
int numGamCols = gamIndices.length;
int numKnots;
int betaIndex, betaIndexJ;
for (int gamInd=0; gamInd<numGamCols; gamInd++) {
numKnots = gamIndices[gamInd].length;
for (int betaInd=0; betaInd<numKnots; betaInd++) {
betaIndex = gamIndices[gamInd][betaInd];
for (int betaIndj=0; betaIndj<= betaInd; betaIndj++) {
betaIndexJ = gamIndices[gamInd][betaIndj];
xmatrix[betaIndex][betaIndexJ] += 2*ds[gamInd][betaInd][betaIndj];
}
}
}
}

public double get(int i, int j) {
if(j > i) {
Expand Down
141 changes: 140 additions & 1 deletion h2o-algos/src/main/java/hex/optimization/OptimizationUtils.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
package hex.optimization;


import hex.glm.ComputationState;
import hex.glm.ConstrainedGLMUtils;
import hex.glm.GLM;
import water.Iced;
import water.util.ArrayUtils;
import water.util.Log;

import java.util.Arrays;
import java.util.List;

import static hex.glm.ComputationState.EPS_CS_SQUARE;
import static hex.glm.ConstrainedGLMUtils.*;

/**
* Created by tomasnykodym on 9/29/15.
*/
public class OptimizationUtils {

public static class GradientInfo extends Iced {
public double _objVal;
public double [] _gradient;
Expand Down Expand Up @@ -456,4 +462,137 @@ public GradientInfo ginfo() {
}
}

/***
* This class implements the exact line search described in the doc, Algorithm 11.5
*/
public static final class ExactLineSearch {
public final double _betaLS1 = 1e-4;
public final double _betaLS2 = 0.99;
public final double _lambdaLS = 2;
public double _alphal;
public double _alphar;
public double _alphai;
public double[] _direction; // beta_k+1 - beta_k
public int _maxIteration = 50; // 40 too low for tight constraints
public double[] _originalBeta;
public double[] _newBeta;
public GLM.GLMGradientInfo _ginfoOriginal;
public double _currGradDirIP; // current gradient and direction inner product
public String[] _coeffNames;

public ExactLineSearch(double[] betaCnd, ComputationState state, List<String> coeffNames) {
reset(betaCnd, state, coeffNames);
}

public void reset(double[] betaCnd, ComputationState state, List<String> coeffNames) {
_direction = new double[betaCnd.length];
ArrayUtils.subtract(betaCnd, state.beta(), _direction);
_ginfoOriginal = state.ginfo();
_originalBeta = state.beta();
_alphai = 1;
_alphal = 0;
_alphar = Double.POSITIVE_INFINITY;
_coeffNames = coeffNames.toArray(new String[0]);
_currGradDirIP = ArrayUtils.innerProduct(_ginfoOriginal._gradient, _direction);
}

/***
* Evaluate and make sure that step size alphi is not too big so that objective function is still decreasing. Refer
* to the doc, Definition 11.6
*/
public boolean evaluateFirstWolfe(GLM.GLMGradientInfo ginfoNew) {
double newObj = ginfoNew._objVal;
double rhs = _ginfoOriginal._objVal+_alphai*_betaLS1* _currGradDirIP;
return (newObj <= rhs);
}

/***
* Evaluate and make sure that step size alphi is not too small so that good progress is made in reducing the
* loss function. Refer to the doc, Definition 11.8
*/
public boolean evaluateSecondWolfe(GLM.GLMGradientInfo ginfo) {
double lhs = ArrayUtils.innerProduct(ginfo._gradient, _direction);
return lhs >= _betaLS2* _currGradDirIP;
}

public boolean setAlphai(boolean firstWolfe, boolean secondWolfe) {
if (!firstWolfe && secondWolfe) { // step is too long
_alphar = _alphai;
_alphai = 0.5*(_alphal+_alphar);
return true;
} else if (firstWolfe && !secondWolfe) { // step is too short
_alphal = _alphai;
if (_alphar < Double.POSITIVE_INFINITY)
_alphai = 0.5*(_alphal+_alphar);
else
_alphai = _lambdaLS * _alphai;
return true;
}
return false;
}

public void setBetaConstraintsDeriv(double[] lambdaEqual, double[] lambdaLessThan, ComputationState state,
ConstrainedGLMUtils.LinearConstraints[] equalityConstraints,
ConstrainedGLMUtils.LinearConstraints[] lessThanEqualToConstraints,
GLM.GLMGradientSolver gradientSolver, double[] betaCnd) {
_newBeta = betaCnd;
updateConstraintValues(betaCnd, Arrays.asList(_coeffNames), equalityConstraints, lessThanEqualToConstraints);
calculateConstraintSquare(state, equalityConstraints, lessThanEqualToConstraints);
// update gradient from constraints transpose(lambda)*h(beta)(value, not changed, active status may change)
// and gram contribution from ck/2*transpose(h(beta))*h(beta)), value not changed but active status may change
state.updateConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
// calculate new gradient and objective function;
_ginfoOriginal = calGradient(betaCnd, state, gradientSolver, lambdaEqual, lambdaLessThan,
equalityConstraints, lessThanEqualToConstraints);
}

/***
* Implements the Line Search algorithm in the doc, Algorithm 11.5.
*/
public boolean findAlpha(double[] lambdaEqual, double[] lambdaLessThan, ComputationState state,
ConstrainedGLMUtils.LinearConstraints[] equalityConstraints,
ConstrainedGLMUtils.LinearConstraints[] lessThanEqualToConstraints,
GLM.GLMGradientSolver gradientSolver) {
if (_currGradDirIP > 0) {
return false;
}
GLM.GLMGradientInfo newGrad;
double[] newCoef;
int betaLen = _originalBeta.length;
double[] tempDirection = new double[betaLen];
boolean firstWolfe;
boolean secondWolfe;
boolean alphaiChange;
for (int index=0; index<_maxIteration; index++) {
ArrayUtils.mult(_direction, tempDirection, _alphai); // tempCoef=alpha_i*direction
newCoef = ArrayUtils.add(tempDirection, _originalBeta); // newCoef = coef + alpha_i*direction
// calculate constraint values with new coefficients, constraints magnitude square
updateConstraintValues(newCoef, Arrays.asList(_coeffNames), equalityConstraints, lessThanEqualToConstraints);
calculateConstraintSquare(state, equalityConstraints, lessThanEqualToConstraints);
// update gradient from constraints transpose(lambda)*h(beta)(value, not changed, active status may change)
// and gram contribution from ck/2*transpose(h(beta))*h(beta)), value not changed but active status may change
state.updateConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
// calculate new gradient and objective function for new coefficients newCoef
newGrad = calGradient(newCoef, state, gradientSolver, lambdaEqual, lambdaLessThan,
equalityConstraints, lessThanEqualToConstraints);
// evaluate if first Wolfe condition is satisfied;
firstWolfe = evaluateFirstWolfe(newGrad);
// evaluate if second Wolfe condition is satisfied;
secondWolfe = evaluateSecondWolfe(newGrad);
// return if both conditions are satisfied;
if (firstWolfe && secondWolfe) {
_newBeta = newCoef;
_ginfoOriginal = newGrad;
return true;
}

// set alphai if first Wolfe condition is not satisfied, set alpha i if second Wolfe condition is not satisfied;
alphaiChange = setAlphai(firstWolfe, secondWolfe);
if (!alphaiChange || _alphar < EPS_CS_SQUARE) { // if alphai, alphar value are not changed and alphar is too small, quit
return false;
}
}
return false;
}
}
}
27 changes: 22 additions & 5 deletions h2o-algos/src/main/java/hex/schemas/GLMModelV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,24 @@ public static final class GLMModelOutputV3 extends ModelOutputSchemaV3<GLMOutput

@API(help = "Predictor names where variable inflation factors are calculated.")
String[] vif_predictor_names;

@API(help = "GLM model coefficients names.")
String[] coefficient_names;

@API(help = "predictor variable inflation factors.")
double[] variable_inflation_factors;

@API(help = "Beta (if exists) and linear constraints states")
String[] linear_constraint_states;

@API(help = "Table of beta (if exists) and linear constraints values and status")
TwoDimTableV3 linear_constraints_table;

@API(help="Contains the original dataset and the dfbetas calculated for each predictor.")
KeyV3.FrameKeyV3 regression_influence_diagnostics;

@API(help="True if all constraints conditions are satisfied. Otherwise, false.")
boolean all_constraints_satisfied;

private GLMModelOutputV3 fillMultinomial(GLMOutput impl) {
if(impl.get_global_beta_multinomial() == null)
Expand Down Expand Up @@ -201,6 +213,9 @@ public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) {
alpha_best = impl.alpha_best();
best_submodel_index = impl.bestSubmodelIndex();
dispersion = impl.dispersion();
coefficient_names = impl.coefficientNames().clone();
if (impl._linear_constraint_states != null) // pass constraint conditions
linear_constraint_states = impl._linear_constraint_states.clone();
variable_inflation_factors = impl.getVariableInflationFactors();
vif_predictor_names = impl.hasVIF() ? impl.getVIFPredictorNames() : null;
List<String> validVIFNames = impl.hasVIF() ? Stream.of(vif_predictor_names).collect(Collectors.toList()) : null;
Expand All @@ -215,11 +230,13 @@ public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) {
random_coefficients_table.fillFromImpl(buildRandomCoefficients2DTable(impl.ubeta(), impl.randomcoefficientNames()));
}
double [] beta = impl.beta();
final double [] magnitudes = new double[beta.length];
int len = magnitudes.length - 1;
int[] indices = new int[len];
for (int i = 0; i < indices.length; ++i)
indices[i] = i;
final double [] magnitudes = beta==null?null:new double[beta.length];
int len = beta==null?0:magnitudes.length - 1;
int[] indices = beta==null?null:new int[len];
if (beta != null) {
for (int i = 0; i < indices.length; ++i)
indices[i] = i;
}

if(beta == null) beta = MemoryManager.malloc8d(names.length);
String [] colTypes = new String[]{"double"};
Expand Down

0 comments on commit 9b8e433

Please sign in to comment.