/lib/mlbackend/php/phpml/src/Phpml/Association/Apriori.php

https://github.com/sbourget/moodle · PHP · 332 lines · 165 code · 46 blank · 121 comment · 14 complexity · cda5f2af39aded081229c3e4a48c1aec MD5 · raw file

  1. <?php
  2. declare(strict_types=1);
  3. namespace Phpml\Association;
  4. use Phpml\Helper\Predictable;
  5. use Phpml\Helper\Trainable;
  6. class Apriori implements Associator
  7. {
  8. use Trainable;
  9. use Predictable;
  10. public const ARRAY_KEY_ANTECEDENT = 'antecedent';
  11. public const ARRAY_KEY_CONFIDENCE = 'confidence';
  12. public const ARRAY_KEY_CONSEQUENT = 'consequent';
  13. public const ARRAY_KEY_SUPPORT = 'support';
  14. /**
  15. * Minimum relative probability of frequent transactions.
  16. *
  17. * @var float
  18. */
  19. private $confidence;
  20. /**
  21. * The large set contains frequent k-length item sets.
  22. *
  23. * @var mixed[][][]
  24. */
  25. private $large = [];
  26. /**
  27. * Minimum relative frequency of transactions.
  28. *
  29. * @var float
  30. */
  31. private $support;
  32. /**
  33. * The generated Apriori association rules.
  34. *
  35. * @var mixed[][]
  36. */
  37. private $rules = [];
  38. /**
  39. * Apriori constructor.
  40. */
  41. public function __construct(float $support = 0.0, float $confidence = 0.0)
  42. {
  43. $this->support = $support;
  44. $this->confidence = $confidence;
  45. }
  46. /**
  47. * Get all association rules which are generated for every k-length frequent item set.
  48. *
  49. * @return mixed[][]
  50. */
  51. public function getRules(): array
  52. {
  53. if (count($this->large) === 0) {
  54. $this->large = $this->apriori();
  55. }
  56. if (count($this->rules) > 0) {
  57. return $this->rules;
  58. }
  59. $this->rules = [];
  60. $this->generateAllRules();
  61. return $this->rules;
  62. }
  63. /**
  64. * Generates frequent item sets.
  65. *
  66. * @return mixed[][][]
  67. */
  68. public function apriori(): array
  69. {
  70. $L = [];
  71. $items = $this->frequent($this->items());
  72. for ($k = 1; isset($items[0]); ++$k) {
  73. $L[$k] = $items;
  74. $items = $this->frequent($this->candidates($items));
  75. }
  76. return $L;
  77. }
  78. /**
  79. * @param mixed[] $sample
  80. *
  81. * @return mixed[][]
  82. */
  83. protected function predictSample(array $sample): array
  84. {
  85. $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool {
  86. return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
  87. }));
  88. return array_map(static function ($rule) {
  89. return $rule[self::ARRAY_KEY_CONSEQUENT];
  90. }, $predicts);
  91. }
  92. /**
  93. * Generate rules for each k-length frequent item set.
  94. */
  95. private function generateAllRules(): void
  96. {
  97. for ($k = 2; isset($this->large[$k]); ++$k) {
  98. foreach ($this->large[$k] as $frequent) {
  99. $this->generateRules($frequent);
  100. }
  101. }
  102. }
  103. /**
  104. * Generate confident rules for frequent item set.
  105. *
  106. * @param mixed[] $frequent
  107. */
  108. private function generateRules(array $frequent): void
  109. {
  110. foreach ($this->antecedents($frequent) as $antecedent) {
  111. $confidence = $this->confidence($frequent, $antecedent);
  112. if ($this->confidence <= $confidence) {
  113. $consequent = array_values(array_diff($frequent, $antecedent));
  114. $this->rules[] = [
  115. self::ARRAY_KEY_ANTECEDENT => $antecedent,
  116. self::ARRAY_KEY_CONSEQUENT => $consequent,
  117. self::ARRAY_KEY_SUPPORT => $this->support($frequent),
  118. self::ARRAY_KEY_CONFIDENCE => $confidence,
  119. ];
  120. }
  121. }
  122. }
  123. /**
  124. * Generates the power set for given item set $sample.
  125. *
  126. * @param mixed[] $sample
  127. *
  128. * @return mixed[][]
  129. */
  130. private function powerSet(array $sample): array
  131. {
  132. $results = [[]];
  133. foreach ($sample as $item) {
  134. foreach ($results as $combination) {
  135. $results[] = array_merge([$item], $combination);
  136. }
  137. }
  138. return $results;
  139. }
  140. /**
  141. * Generates all proper subsets for given set $sample without the empty set.
  142. *
  143. * @param mixed[] $sample
  144. *
  145. * @return mixed[][]
  146. */
  147. private function antecedents(array $sample): array
  148. {
  149. $cardinality = count($sample);
  150. $antecedents = $this->powerSet($sample);
  151. return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool {
  152. return (count($antecedent) != $cardinality) && ($antecedent != []);
  153. });
  154. }
  155. /**
  156. * Calculates frequent k = 1 item sets.
  157. *
  158. * @return mixed[][]
  159. */
  160. private function items(): array
  161. {
  162. $items = [];
  163. foreach ($this->samples as $sample) {
  164. foreach ($sample as $item) {
  165. if (!in_array($item, $items, true)) {
  166. $items[] = $item;
  167. }
  168. }
  169. }
  170. return array_map(static function ($entry): array {
  171. return [$entry];
  172. }, $items);
  173. }
  174. /**
  175. * Returns frequent item sets only.
  176. *
  177. * @param mixed[][] $samples
  178. *
  179. * @return mixed[][]
  180. */
  181. private function frequent(array $samples): array
  182. {
  183. return array_values(array_filter($samples, function ($entry): bool {
  184. return $this->support($entry) >= $this->support;
  185. }));
  186. }
  187. /**
  188. * Calculates frequent k item sets, where count($samples) == $k - 1.
  189. *
  190. * @param mixed[][] $samples
  191. *
  192. * @return mixed[][]
  193. */
  194. private function candidates(array $samples): array
  195. {
  196. $candidates = [];
  197. foreach ($samples as $p) {
  198. foreach ($samples as $q) {
  199. if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
  200. continue;
  201. }
  202. $candidate = array_values(array_unique(array_merge($p, $q)));
  203. if ($this->contains($candidates, $candidate)) {
  204. continue;
  205. }
  206. foreach ($this->samples as $sample) {
  207. if ($this->subset($sample, $candidate)) {
  208. $candidates[] = $candidate;
  209. continue 2;
  210. }
  211. }
  212. }
  213. }
  214. return $candidates;
  215. }
  216. /**
  217. * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
  218. * $set.
  219. *
  220. * @param mixed[] $set
  221. * @param mixed[] $subset
  222. */
  223. private function confidence(array $set, array $subset): float
  224. {
  225. return $this->support($set) / $this->support($subset);
  226. }
  227. /**
  228. * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
  229. * pool.
  230. *
  231. * @see \Phpml\Association\Apriori::samples
  232. *
  233. * @param mixed[] $sample
  234. */
  235. private function support(array $sample): float
  236. {
  237. return $this->frequency($sample) / count($this->samples);
  238. }
  239. /**
  240. * Counts occurrences of $sample as subset in data pool.
  241. *
  242. * @see \Phpml\Association\Apriori::samples
  243. *
  244. * @param mixed[] $sample
  245. */
  246. private function frequency(array $sample): int
  247. {
  248. return count(array_filter($this->samples, function ($entry) use ($sample): bool {
  249. return $this->subset($entry, $sample);
  250. }));
  251. }
  252. /**
  253. * Returns true if set is an element of system.
  254. *
  255. * @see \Phpml\Association\Apriori::equals()
  256. *
  257. * @param mixed[][] $system
  258. * @param mixed[] $set
  259. */
  260. private function contains(array $system, array $set): bool
  261. {
  262. return (bool) array_filter($system, function ($entry) use ($set): bool {
  263. return $this->equals($entry, $set);
  264. });
  265. }
  266. /**
  267. * Returns true if subset is a (proper) subset of set by its items string representation.
  268. *
  269. * @param mixed[] $set
  270. * @param mixed[] $subset
  271. */
  272. private function subset(array $set, array $subset): bool
  273. {
  274. return count(array_diff($subset, array_intersect($subset, $set))) === 0;
  275. }
  276. /**
  277. * Returns true if string representation of items does not differ.
  278. *
  279. * @param mixed[] $set1
  280. * @param mixed[] $set2
  281. */
  282. private function equals(array $set1, array $set2): bool
  283. {
  284. return array_diff($set1, $set2) == array_diff($set2, $set1);
  285. }
  286. }