PageRenderTime 51ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/Orange/tests/test_continuize.py

https://gitlab.com/zaverichintan/orange3
Python | 210 lines | 178 code | 24 blank | 8 comment | 32 complexity | 8b2195f3bfff3813bf6340f53bf24c30 MD5 | raw file
  1. import unittest
  2. from Orange.data import Table, Variable
  3. from Orange.preprocess.continuize import DomainContinuizer
  4. from Orange.preprocess import Continuize
  5. from Orange.preprocess import transformation
  6. class ContinuizerTest(unittest.TestCase):
  7. def setUp(self):
  8. Variable._clear_all_caches()
  9. self.data = Table("test4")
  10. def test_default(self):
  11. for inp in (self.data, self.data.domain):
  12. dom = DomainContinuizer(inp)
  13. self.assertTrue(all(attr.is_continuous
  14. for attr in dom.attributes))
  15. self.assertIs(dom.class_var, self.data.domain.class_var)
  16. self.assertIs(dom[0], self.data.domain[0])
  17. self.assertIs(dom[1], self.data.domain[1])
  18. self.assertEqual([attr.name for attr in dom.attributes],
  19. ["c1", "c2", "d2=a", "d2=b", "d3=a", "d3=b", "d3=c"])
  20. self.assertIsInstance(dom[2].compute_value, transformation.Indicator)
  21. dat2 = Table(dom, self.data)
  22. # c1 c2 d2 d3 cl1
  23. self.assertEqual(dat2[0], [1, -2, 1, 0, 1, 0, 0, "a"])
  24. self.assertEqual(dat2[1], [0, 0, 0, 1, 0, 1, 0, "b"])
  25. self.assertEqual(dat2[2], [2, 2, 0, 1, 0, 0, 1, "c"])
  26. def test_continuous_transform_class(self):
  27. for inp in (self.data, self.data.domain):
  28. dom = DomainContinuizer(inp, transform_class=True)
  29. self.assertTrue(all(attr.is_continuous
  30. for attr in dom))
  31. self.assertIsNot(dom.class_var, self.data.domain.class_var)
  32. self.assertIs(dom[0], self.data.domain[0])
  33. self.assertIs(dom[1], self.data.domain[1])
  34. self.assertEqual([attr.name for attr in dom.attributes],
  35. ["c1", "c2", "d2=a", "d2=b", "d3=a", "d3=b", "d3=c"])
  36. self.assertIsInstance(dom[2].compute_value, transformation.Indicator)
  37. dat2 = Table(dom, self.data)
  38. # c1 c2 d2 d3 cl1
  39. self.assertEqual(dat2[0], [1, -2, 1, 0, 1, 0, 0, 1, 0, 0])
  40. self.assertEqual(dat2[1], [0, 0, 0, 1, 0, 1, 0, 0, 1, 0])
  41. self.assertEqual(dat2[2], [2, 2, 0, 1, 0, 0, 1, 0, 0, 1])
  42. def test_multi_indicators(self):
  43. for inp in (self.data, self.data.domain):
  44. dom = DomainContinuizer(inp,
  45. multinomial_treatment=Continuize.Indicators)
  46. self.assertTrue(all(attr.is_continuous
  47. for attr in dom.attributes))
  48. self.assertIs(dom.class_var, self.data.domain.class_var)
  49. self.assertIs(dom[0], self.data.domain[0])
  50. self.assertIs(dom[1], self.data.domain[1])
  51. self.assertEqual([attr.name for attr in dom.attributes],
  52. ["c1", "c2", "d2=a", "d2=b", "d3=a", "d3=b",
  53. "d3=c"])
  54. self.assertIsInstance(dom[2].compute_value,
  55. transformation.Indicator)
  56. dat2 = Table(dom, self.data)
  57. # c1 c2 d2 d3 cl1
  58. self.assertEqual(dat2[0], [1, -2, 1, 0, 1, 0, 0, "a"])
  59. self.assertEqual(dat2[1], [0, 0, 0, 1, 0, 1, 0, "b"])
  60. self.assertEqual(dat2[2], [2, 2, 0, 1, 0, 0, 1, "c"])
  61. def test_multi_lowest_base(self):
  62. for inp in (self.data, self.data.domain):
  63. dom = DomainContinuizer(
  64. inp, multinomial_treatment=Continuize.FirstAsBase)
  65. self.assertTrue(all(attr.is_continuous
  66. for attr in dom.attributes))
  67. self.assertIs(dom.class_var, self.data.domain.class_var)
  68. self.assertIs(dom[0], self.data.domain[0])
  69. self.assertIs(dom[1], self.data.domain[1])
  70. self.assertEqual([attr.name for attr in dom.attributes],
  71. ["c1", "c2", "d2=b", "d3=b", "d3=c"])
  72. self.assertIsInstance(dom[2].compute_value,
  73. transformation.Indicator)
  74. dat2 = Table(dom, self.data)
  75. # c1 c2 d2 d3 cl1
  76. self.assertEqual(dat2[0], [1, -2, 0, 0, 0, "a"])
  77. self.assertEqual(dat2[1], [0, 0, 1, 1, 0, "b"])
  78. self.assertEqual(dat2[2], [2, 2, 1, 0, 1, "c"])
  79. def test_multi_lowest_base_base(self):
  80. self.data.domain[4].base_value = 1
  81. for inp in (self.data, self.data.domain):
  82. dom = DomainContinuizer(
  83. inp, multinomial_treatment=Continuize.FirstAsBase)
  84. self.assertTrue(all(attr.is_continuous
  85. for attr in dom.attributes))
  86. self.assertIs(dom.class_var, self.data.domain.class_var)
  87. self.assertIs(dom[0], self.data.domain[0])
  88. self.assertIs(dom[1], self.data.domain[1])
  89. self.assertEqual([attr.name for attr in dom.attributes],
  90. ["c1", "c2", "d2=b", "d3=a", "d3=c"])
  91. self.assertIsInstance(dom[2].compute_value,
  92. transformation.Indicator)
  93. dat2 = Table(dom, self.data)
  94. # c1 c2 d2 d3 cl1
  95. self.assertEqual(dat2[0], [1, -2, 0, 1, 0, "a"])
  96. self.assertEqual(dat2[1], [0, 0, 1, 0, 0, "b"])
  97. self.assertEqual(dat2[2], [2, 2, 1, 0, 1, "c"])
  98. def test_multi_ignore(self):
  99. dom = DomainContinuizer(self.data.domain,
  100. multinomial_treatment=Continuize.Remove)
  101. self.assertTrue(all(attr.is_continuous
  102. for attr in dom.attributes))
  103. self.assertEqual([attr.name for attr in dom.attributes],
  104. ["c1", "c2"])
  105. def test_multi_ignore_class(self):
  106. dom = DomainContinuizer(self.data.domain,
  107. multinomial_treatment=Continuize.Remove,
  108. transform_class=True)
  109. self.assertTrue(all(attr.is_continuous
  110. for attr in dom.attributes))
  111. self.assertEqual([attr.name for attr in dom.attributes],
  112. ["c1", "c2"])
  113. self.assertEqual(len(dom.class_vars), 0)
  114. self.assertIsNone(dom.class_var)
  115. def test_multi_ignore_multi(self):
  116. dom = DomainContinuizer(
  117. self.data.domain,
  118. multinomial_treatment=Continuize.RemoveMultinomial)
  119. self.assertTrue(all(attr.is_continuous
  120. for attr in dom.attributes))
  121. self.assertEqual([attr.name for attr in dom],
  122. ["c1", "c2", "d2=b", "cl1"])
  123. def test_multi_ignore_class(self):
  124. dom = DomainContinuizer(
  125. self.data.domain,
  126. multinomial_treatment=Continuize.RemoveMultinomial,
  127. transform_class=True)
  128. self.assertTrue(all(attr.is_continuous
  129. for attr in dom.attributes))
  130. self.assertEqual([attr.name for attr in dom.attributes],
  131. ["c1", "c2", "d2=b"])
  132. self.assertEqual(len(dom.class_vars), 0)
  133. self.assertIsNone(dom.class_var)
  134. def test_multi_error(self):
  135. self.assertRaises(ValueError, DomainContinuizer,
  136. self.data.domain,
  137. multinomial_treatment=Continuize.ReportError)
  138. def test_as_ordinal(self):
  139. for inp in (self.data, self.data.domain):
  140. dom = DomainContinuizer(
  141. inp, multinomial_treatment=Continuize.AsOrdinal)
  142. self.assertTrue(all(attr.is_continuous
  143. for attr in dom.attributes))
  144. self.assertIs(dom.class_var, self.data.domain.class_var)
  145. self.assertIs(dom[0], self.data.domain[0])
  146. self.assertIs(dom[1], self.data.domain[1])
  147. self.assertEqual([attr.name for attr in dom],
  148. ["c1", "c2", "d2", "d3", "cl1"])
  149. dat2 = Table(dom, self.data)
  150. # c1 c2 d2 d3 cl1
  151. self.assertEqual(dat2[0], [1, -2, 0, 0, "a"])
  152. self.assertEqual(dat2[1], [0, 0, 1, 1, "b"])
  153. self.assertEqual(dat2[2], [2, 2, 1, 2, "c"])
  154. def test_as_ordinal_class(self):
  155. for inp in (self.data, self.data.domain):
  156. dom = DomainContinuizer(
  157. inp, multinomial_treatment=Continuize.AsOrdinal,
  158. transform_class=True)
  159. self.assertTrue(all(attr.is_continuous
  160. for attr in dom.attributes))
  161. self.assertTrue(dom.has_continuous_class)
  162. self.assertIs(dom[0], self.data.domain[0])
  163. self.assertIs(dom[1], self.data.domain[1])
  164. self.assertEqual([attr.name for attr in dom],
  165. ["c1", "c2", "d2", "d3", "cl1"])
  166. dat2 = Table(dom, self.data)
  167. # c1 c2 d2 d3 cl1
  168. self.assertEqual(dat2[0], [1, -2, 0, 0, 0])
  169. self.assertEqual(dat2[1], [0, 0, 1, 1, 1])
  170. self.assertEqual(dat2[2], [2, 2, 1, 2, 2])
  171. def test_as_normalized_ordinal(self):
  172. for inp in (self.data, self.data.domain):
  173. dom = DomainContinuizer(
  174. inp, multinomial_treatment=Continuize.AsNormalizedOrdinal)
  175. self.assertTrue(all(attr.is_continuous
  176. for attr in dom.attributes))
  177. self.assertIs(dom.class_var, self.data.domain.class_var)
  178. self.assertIs(dom[0], self.data.domain[0])
  179. self.assertIs(dom[1], self.data.domain[1])
  180. self.assertEqual([attr.name for attr in dom],
  181. ["c1", "c2", "d2", "d3", "cl1"])
  182. dat2 = Table(dom, self.data)
  183. # c1 c2 d2 d3 cl1
  184. self.assertEqual(dat2[0], [1, -2, 0, 0, "a"])
  185. self.assertEqual(dat2[1], [0, 0, 1, 0.5, "b"])
  186. self.assertEqual(dat2[2], [2, 2, 1, 1, "c"])