/R/tests/testdir_jira/runit_hex_2020_LR_beta_constraints.R

https://gitlab.com/alvinahmadov2/h2o-2 · R · 66 lines · 47 code · 11 blank · 8 comment · 3 complexity · dacb5ddecf6e765fa209326f142f740e MD5 · raw file

  1. ## This test is to check the beta contraint argument for GLM
  2. ## The test will import the prostate data set,
  3. ## runs glm with and without beta contraints which will be checked
  4. ## against glmnet's results.
  5. #setwd("/Users/amy/h2o/R/tests/testdir_jira")
  6. setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
  7. source('../findNSourceUtils.R')
  8. test.LR.betaConstraints <- function(conn) {
  9. Log.info("Importing prostate dataset...")
  10. prostate.hex = h2o.importFile(
  11. object = conn,system.file("extdata", "prostate.csv", package = "h2o"))
  12. Log.info("Create beta constraints frame...")
  13. myX = c("AGE","RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON")
  14. lowerbound = rep(-1, times = length(myX))
  15. upperbound = rep(1, times = length(myX))
  16. betaConstraints = data.frame(names = myX, lower_bounds = lowerbound, upper_bounds = upperbound)
  17. prostate.csv = as.data.frame(prostate.hex)
  18. ######## Single variable CAPSULE ~ AGE in H2O and then R
  19. ## actual coeff for Age without constraints = -.00823
  20. Log.info("Run a Linear Regression with CAPSULE ~ AGE with bound beta->[0,1] in H2O...")
  21. beta_age = betaConstraints[betaConstraints$names == "AGE",]
  22. beta_age$lower_bounds = 0
  23. beta_age$upper_bounds = 1
  24. lr.h2o = h2o.glm(x = "AGE", y = "CAPSULE", data = prostate.hex, family = "gaussian", alpha = 0, beta_constraints = beta_age, standardize = T)
  25. lambda = lr.h2o@model$lambda
  26. Log.info("Run a Linear Regression with CAPSULE ~ AGE with bound beta->[0,1] in R...")
  27. intercept = rep(0, times = nrow(prostate.hex))
  28. xDataFrame = data.frame(AGE = prostate.csv[,"AGE"], Intercept = intercept)
  29. xMatrix_age = as.matrix(xDataFrame)
  30. lr.R = glmnet(x = xMatrix_age, alpha = 0., lambda = lr.h2o@model$lambda, standardize = T,
  31. y = prostate.csv[,"CAPSULE"], family = "gaussian", lower.limits = 0., upper.limits = 1.)
  32. checkGLMModel2(lr.h2o, lr.R)
  33. #### shift AGE coefficient by 0.002
  34. run_glm <- function(family_type) {
  35. Log.info("Test Beta Constraints with negative upper bound in H2O...")
  36. lower_bound = -0.008
  37. upper_bound = -0.002
  38. beta_age$lower_bounds = lower_bound
  39. beta_age$upper_bounds = upper_bound
  40. nrow_prior = nrow(prostate.hex)
  41. lr_negativeUpper.h2o = h2o.glm(x = "AGE", y = "CAPSULE", data = prostate.hex, family = family_type, alpha = 0, beta_constraints = beta_age, standardize = T)
  42. nrow_after = nrow(prostate.hex)
  43. if(!nrow_prior == nrow_after) stop("H2OParsedData object is being overwritten.")
  44. Log.info("Shift AGE column to reflect negative upperbound...")
  45. xDataFrame = data.frame(AGE = prostate.csv[,"AGE"]*(1+upper_bound), Intercept = intercept)
  46. xMatrix_age = as.matrix(xDataFrame)
  47. lr_negativeUpper.R = glmnet(x = xMatrix_age, alpha = 0., lambda = lr.h2o@model$lambda, standardize = T,
  48. y = prostate.csv[,"CAPSULE"], family = family_type, lower.limits = lower_bound, upper.limits = 0.)
  49. checkGLMModel2(lr_negativeUpper.h2o, lr_negativeUpper.R)
  50. }
  51. full_test <- sapply(c("binomial", "gaussian"), run_glm)
  52. print(full_test)
  53. testEnd()
  54. }
  55. doTest("GLM Test: LR w/ Beta Constraints", test.LR.betaConstraints)