/src/spn/structure/leaves/piecewise/PiecewiseLinear.py

https://github.com/SPFlow/SPFlow · Python · 136 lines · 89 code · 34 blank · 13 comment · 12 complexity · de73b384a017494d4bd89d933d9e0978 MD5 · raw file

  1. """
  2. Created on May 4, 2018
  3. @author: Alejandro Molina
  4. @author: Antonio Vergari
  5. """
  6. from collections import namedtuple
  7. import numpy as np
  8. from spn.structure.Base import Leaf
  9. from spn.structure.StatisticalTypes import MetaType, Type
  10. from spn.structure.leaves.histogram.Histograms import create_histogram_leaf
  11. import itertools
  12. import logging
  13. logger = logging.getLogger(__name__)
  14. class PiecewiseLinear(Leaf):
  15. type = Type.REAL
  16. property_type = namedtuple("PiecewiseLinear", "x_range y_range bin_repr_points")
  17. def __init__(self, x_range, y_range, bin_repr_points, scope=None):
  18. Leaf.__init__(self, scope=scope)
  19. self._type = type
  20. self.x_range = x_range
  21. self.y_range = y_range
  22. self.bin_repr_points = bin_repr_points
  23. @property
  24. def parameters(self):
  25. return __class__.property_type(x_range=self.x_range, y_range=self.y_range, bin_repr_points=self.bin_repr_points)
  26. @property
  27. def mode(self):
  28. areas = np.zeros(len(self.x_range) - 1)
  29. for i in range(areas.shape[0]):
  30. areas[i] = np.trapz([self.y_range[i], self.y_range[i + 1]], [self.x_range[i], self.x_range[i + 1]])
  31. # areas = np.diff(self.breaks) * self.densities
  32. max_area = np.argmax(areas)
  33. max_x = np.argmax([self.y_range[max_area], self.y_range[max_area + 1]]) + max_area
  34. return self.x_range[max_x]
  35. @property
  36. def mean(self):
  37. # Use the histogram values to compute the mean
  38. y_range_norm = self.y_range / np.sum(self.y_range)
  39. mean = 0.0
  40. for k, x in enumerate(self.x_range):
  41. mean += x * y_range_norm[k]
  42. return mean
  43. @property
  44. def types(self):
  45. return self._type
  46. def isotonic_unimodal_regression_R(x, y):
  47. """
  48. Perform unimodal isotonic regression via the Iso package in R
  49. """
  50. from rpy2.robjects.packages import importr
  51. from rpy2 import robjects
  52. from rpy2.robjects import numpy2ri
  53. numpy2ri.activate()
  54. # n_instances = x.shape[0]
  55. # assert y.shape[0] == n_instances
  56. importr("Iso")
  57. z = robjects.r["ufit"](y, x=x, type="b")
  58. iso_x, iso_y = np.array(z.rx2("x")), np.array(z.rx2("y"))
  59. return iso_x, iso_y
  60. def create_piecewise_leaf(data, ds_context, scope, isotonic=False, prior_weight=0.1, hist_source="numpy"):
  61. assert len(scope) == 1, "scope of univariate Piecewise for more than one variable?"
  62. assert data.shape[1] == 1, "data has more than one feature?"
  63. idx = scope[0]
  64. meta_type = ds_context.meta_types[idx]
  65. hist = create_histogram_leaf(data, ds_context, scope, alpha=False, hist_source=hist_source)
  66. densities = hist.densities
  67. bins = hist.breaks
  68. repr_points = hist.bin_repr_points
  69. if meta_type == MetaType.REAL:
  70. EPS = 1e-8
  71. if len(densities) > 1:
  72. def pairwise(iterable):
  73. "s -> (s0,s1), (s1,s2), (s2, s3), ..."
  74. a, b = itertools.tee(iterable)
  75. next(b, None)
  76. return zip(a, b)
  77. x = [bins[0] - EPS] + [b0 + (b1 - b0) / 2 for (b0, b1) in pairwise(bins)] + [bins[-1] + EPS]
  78. else:
  79. assert len(bins) == 2
  80. x = [bins[0] - EPS] + [(bins[0] + (bins[1] - bins[0]) / 2)] + [bins[-1] + EPS]
  81. elif meta_type == MetaType.DISCRETE:
  82. tail_width = 1
  83. x = [b for b in bins[:-1]]
  84. x = [x[0] - tail_width] + x + [x[-1] + tail_width]
  85. else:
  86. raise Exception("Invalid statistical type: " + meta_type)
  87. y = [0.0] + [d for d in densities] + [0.0]
  88. assert len(densities) == len(bins) - 1
  89. assert len(x) == len(y), (len(x), len(y))
  90. x, y = np.array(x), np.array(y)
  91. if isotonic:
  92. x, y = isotonic_unimodal_regression_R(x, y)
  93. auc = np.trapz(y, x)
  94. y = y / auc
  95. node = PiecewiseLinear(x.tolist(), y.tolist(), repr_points, scope=scope)
  96. if prior_weight is None:
  97. return node
  98. uniform_data = np.zeros_like(data)
  99. uniform_data[:] = np.nan
  100. uniform_hist = create_histogram_leaf(uniform_data, ds_context, scope, alpha=False)
  101. return prior_weight * uniform_hist + (1 - prior_weight) * node