PageRenderTime 49ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/mlbackend/php/phpml/src/Phpml/Association/Apriori.php

https://gitlab.com/unofficial-mirrors/moodle
PHP | 345 lines | 164 code | 45 blank | 136 comment | 14 complexity | 38ef11ecbe8d7b5d3ac6cde255fb9e04 MD5 | raw file
  1. <?php
  2. declare(strict_types=1);
  3. namespace Phpml\Association;
  4. use Phpml\Helper\Predictable;
  5. use Phpml\Helper\Trainable;
  6. class Apriori implements Associator
  7. {
  8. use Trainable, Predictable;
  9. const ARRAY_KEY_ANTECEDENT = 'antecedent';
  10. const ARRAY_KEY_CONFIDENCE = 'confidence';
  11. const ARRAY_KEY_CONSEQUENT = 'consequent';
  12. const ARRAY_KEY_SUPPORT = 'support';
  13. /**
  14. * Minimum relative probability of frequent transactions.
  15. *
  16. * @var float
  17. */
  18. private $confidence;
  19. /**
  20. * The large set contains frequent k-length item sets.
  21. *
  22. * @var mixed[][][]
  23. */
  24. private $large;
  25. /**
  26. * Minimum relative frequency of transactions.
  27. *
  28. * @var float
  29. */
  30. private $support;
  31. /**
  32. * The generated Apriori association rules.
  33. *
  34. * @var mixed[][]
  35. */
  36. private $rules;
  37. /**
  38. * Apriori constructor.
  39. *
  40. * @param float $support
  41. * @param float $confidence
  42. */
  43. public function __construct(float $support = 0.0, float $confidence = 0.0)
  44. {
  45. $this->support = $support;
  46. $this->confidence = $confidence;
  47. }
  48. /**
  49. * Get all association rules which are generated for every k-length frequent item set.
  50. *
  51. * @return mixed[][]
  52. */
  53. public function getRules() : array
  54. {
  55. if (!$this->large) {
  56. $this->large = $this->apriori();
  57. }
  58. if ($this->rules) {
  59. return $this->rules;
  60. }
  61. $this->rules = [];
  62. $this->generateAllRules();
  63. return $this->rules;
  64. }
  65. /**
  66. * Generates frequent item sets.
  67. *
  68. * @return mixed[][][]
  69. */
  70. public function apriori() : array
  71. {
  72. $L = [];
  73. $L[1] = $this->items();
  74. $L[1] = $this->frequent($L[1]);
  75. for ($k = 2; !empty($L[$k - 1]); ++$k) {
  76. $L[$k] = $this->candidates($L[$k - 1]);
  77. $L[$k] = $this->frequent($L[$k]);
  78. }
  79. return $L;
  80. }
  81. /**
  82. * @param mixed[] $sample
  83. *
  84. * @return mixed[][]
  85. */
  86. protected function predictSample(array $sample) : array
  87. {
  88. $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
  89. return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
  90. }));
  91. return array_map(function ($rule) {
  92. return $rule[self::ARRAY_KEY_CONSEQUENT];
  93. }, $predicts);
  94. }
  95. /**
  96. * Generate rules for each k-length frequent item set.
  97. */
  98. private function generateAllRules()
  99. {
  100. for ($k = 2; !empty($this->large[$k]); ++$k) {
  101. foreach ($this->large[$k] as $frequent) {
  102. $this->generateRules($frequent);
  103. }
  104. }
  105. }
  106. /**
  107. * Generate confident rules for frequent item set.
  108. *
  109. * @param mixed[] $frequent
  110. */
  111. private function generateRules(array $frequent)
  112. {
  113. foreach ($this->antecedents($frequent) as $antecedent) {
  114. if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
  115. $consequent = array_values(array_diff($frequent, $antecedent));
  116. $this->rules[] = [
  117. self::ARRAY_KEY_ANTECEDENT => $antecedent,
  118. self::ARRAY_KEY_CONSEQUENT => $consequent,
  119. self::ARRAY_KEY_SUPPORT => $this->support($consequent),
  120. self::ARRAY_KEY_CONFIDENCE => $confidence,
  121. ];
  122. }
  123. }
  124. }
  125. /**
  126. * Generates the power set for given item set $sample.
  127. *
  128. * @param mixed[] $sample
  129. *
  130. * @return mixed[][]
  131. */
  132. private function powerSet(array $sample) : array
  133. {
  134. $results = [[]];
  135. foreach ($sample as $item) {
  136. foreach ($results as $combination) {
  137. $results[] = array_merge([$item], $combination);
  138. }
  139. }
  140. return $results;
  141. }
  142. /**
  143. * Generates all proper subsets for given set $sample without the empty set.
  144. *
  145. * @param mixed[] $sample
  146. *
  147. * @return mixed[][]
  148. */
  149. private function antecedents(array $sample) : array
  150. {
  151. $cardinality = count($sample);
  152. $antecedents = $this->powerSet($sample);
  153. return array_filter($antecedents, function ($antecedent) use ($cardinality) {
  154. return (count($antecedent) != $cardinality) && ($antecedent != []);
  155. });
  156. }
  157. /**
  158. * Calculates frequent k = 1 item sets.
  159. *
  160. * @return mixed[][]
  161. */
  162. private function items() : array
  163. {
  164. $items = [];
  165. foreach ($this->samples as $sample) {
  166. foreach ($sample as $item) {
  167. if (!in_array($item, $items, true)) {
  168. $items[] = $item;
  169. }
  170. }
  171. }
  172. return array_map(function ($entry) {
  173. return [$entry];
  174. }, $items);
  175. }
  176. /**
  177. * Returns frequent item sets only.
  178. *
  179. * @param mixed[][] $samples
  180. *
  181. * @return mixed[][]
  182. */
  183. private function frequent(array $samples) : array
  184. {
  185. return array_filter($samples, function ($entry) {
  186. return $this->support($entry) >= $this->support;
  187. });
  188. }
  189. /**
  190. * Calculates frequent k item sets, where count($samples) == $k - 1.
  191. *
  192. * @param mixed[][] $samples
  193. *
  194. * @return mixed[][]
  195. */
  196. private function candidates(array $samples) : array
  197. {
  198. $candidates = [];
  199. foreach ($samples as $p) {
  200. foreach ($samples as $q) {
  201. if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
  202. continue;
  203. }
  204. $candidate = array_unique(array_merge($p, $q));
  205. if ($this->contains($candidates, $candidate)) {
  206. continue;
  207. }
  208. foreach ((array) $this->samples as $sample) {
  209. if ($this->subset($sample, $candidate)) {
  210. $candidates[] = $candidate;
  211. continue 2;
  212. }
  213. }
  214. }
  215. }
  216. return $candidates;
  217. }
  218. /**
  219. * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
  220. * $set.
  221. *
  222. * @param mixed[] $set
  223. * @param mixed[] $subset
  224. *
  225. * @return float
  226. */
  227. private function confidence(array $set, array $subset) : float
  228. {
  229. return $this->support($set) / $this->support($subset);
  230. }
  231. /**
  232. * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
  233. * pool.
  234. *
  235. * @see \Phpml\Association\Apriori::samples
  236. *
  237. * @param mixed[] $sample
  238. *
  239. * @return float
  240. */
  241. private function support(array $sample) : float
  242. {
  243. return $this->frequency($sample) / count($this->samples);
  244. }
  245. /**
  246. * Counts occurrences of $sample as subset in data pool.
  247. *
  248. * @see \Phpml\Association\Apriori::samples
  249. *
  250. * @param mixed[] $sample
  251. *
  252. * @return int
  253. */
  254. private function frequency(array $sample) : int
  255. {
  256. return count(array_filter($this->samples, function ($entry) use ($sample) {
  257. return $this->subset($entry, $sample);
  258. }));
  259. }
  260. /**
  261. * Returns true if set is an element of system.
  262. *
  263. * @see \Phpml\Association\Apriori::equals()
  264. *
  265. * @param mixed[][] $system
  266. * @param mixed[] $set
  267. *
  268. * @return bool
  269. */
  270. private function contains(array $system, array $set) : bool
  271. {
  272. return (bool) array_filter($system, function ($entry) use ($set) {
  273. return $this->equals($entry, $set);
  274. });
  275. }
  276. /**
  277. * Returns true if subset is a (proper) subset of set by its items string representation.
  278. *
  279. * @param mixed[] $set
  280. * @param mixed[] $subset
  281. *
  282. * @return bool
  283. */
  284. private function subset(array $set, array $subset) : bool
  285. {
  286. return !array_diff($subset, array_intersect($subset, $set));
  287. }
  288. /**
  289. * Returns true if string representation of items does not differ.
  290. *
  291. * @param mixed[] $set1
  292. * @param mixed[] $set2
  293. *
  294. * @return bool
  295. */
  296. private function equals(array $set1, array $set2) : bool
  297. {
  298. return array_diff($set1, $set2) == array_diff($set2, $set1);
  299. }
  300. }