/core/src/main/scala/scalaz/BKTree.scala

http://github.com/scalaz/scalaz · Scala · 201 lines · 148 code · 28 blank · 25 comment · 13 complexity · fe2eb35ea2a24e8a53832ee523af7262 MD5 · raw file

  1. package scalaz
  2. import collection.immutable.IntMap
  3. import annotation.tailrec
  4. /**
  5. * Burkhard-Keller trees provide an implementation of sets which apart
  6. * from the ordinary operations also has an approximate member search,
  7. * allowing you to search for elements that are of a distance `n` from
  8. * the element you are searching for. The distance is determined using
  9. * a metric on the type of elements. Therefore all elements must
  10. * implement the [[scalaz.MetricSpace]] type class, rather than the more usual
  11. * [[scalaz.Ordering]].
  12. *
  13. * The worst case complexity of many of these operations is quite bad,
  14. * but the expected behavior varies greatly with the metric. For
  15. * example, the discrete metric (`distance x y | y == x = 0 |
  16. * otherwise = 1`) makes BK-trees behave abysmally. The metrics
  17. * mentioned above should give good performance characteristics.
  18. *
  19. * This implementation is a port of Haskell's [[http://hackage.haskell.org/packages/archive/bktrees/0.2.1/doc/html/src/Data-Set-BKTree.html Data.Set.BKTree]]
  20. */
  21. @deprecated("This class depends on `MetricSpace` which is deprecated, too.", "7.0.1")
  22. sealed abstract class BKTree[A] extends Product with Serializable {
  23. def isEmpty: Boolean =
  24. this match {
  25. case BKTreeEmpty() => true
  26. case BKTreeNode(_, _, _) => false
  27. }
  28. def map[B](f: A => B): BKTree[B] =
  29. this match {
  30. case BKTreeEmpty() => BKTreeEmpty()
  31. case BKTreeNode(a, s, c) => BKTreeNode(f(a), s, c.transform((_: Int, z: BKTree[A]) => z map f))
  32. }
  33. def size: Int =
  34. this match {
  35. case BKTreeEmpty() => 0
  36. case BKTreeNode(_, s, _) => s
  37. }
  38. def +(a: A)(implicit A: MetricSpace[A]): BKTree[A] =
  39. this match {
  40. case BKTreeEmpty() => BKTreeNode(a, 1, IntMap.empty)
  41. case BKTreeNode(v, s, c) => {
  42. val d = A.distance(v, a)
  43. BKTreeNode(v, s + 1, c + ((d, c get d match {
  44. case None => BKTreeNode(a, 1, IntMap.empty)
  45. case Some(w) => w + a
  46. })))
  47. }
  48. }
  49. def ++(t: BKTree[A])(implicit m: MetricSpace[A]): BKTree[A] = {
  50. var k: BKTree[A] = this
  51. for (v <- t.values)
  52. k = k + v
  53. k
  54. }
  55. def -(a: A)(implicit A: MetricSpace[A]): BKTree[A] =
  56. this match {
  57. case BKTreeEmpty() => BKTreeEmpty()
  58. case BKTreeNode(v, _, c) => {
  59. val d = A.distance(v, a)
  60. if(d == 0) BKTree(c.values.seq.flatMap(_.values).toSeq: _*)
  61. else {
  62. val subTree = updateMap(c, d, (t: BKTree[A]) => Some(t - a))
  63. val size = subTree.values.map(_.size).sum + 1
  64. BKTreeNode(v, size, subTree)
  65. }
  66. }
  67. }
  68. def --(t: BKTree[A])(implicit m: MetricSpace[A]): BKTree[A] = {
  69. var k: BKTree[A] = this
  70. for (v <- t.values)
  71. k = k - v
  72. k
  73. }
  74. def values: List[A] =
  75. this match {
  76. case BKTreeEmpty() => Nil
  77. case BKTreeNode(v, _, c) => v :: c.valuesIterator.toList.flatMap(_.values)
  78. }
  79. /**
  80. * Returns true of this set contains `a`.
  81. */
  82. @tailrec
  83. final def contains(a: A)(implicit A: MetricSpace[A]): Boolean =
  84. this match {
  85. case BKTreeEmpty() => false
  86. case BKTreeNode(v, _, c) =>
  87. val d = A.distance(v, a)
  88. d == 0 || (c get d match {
  89. case None => false
  90. case Some(w) => w contains a
  91. })
  92. }
  93. /** An alias for `contains` */
  94. final def -?-(a: A)(implicit A: MetricSpace[A]): Boolean = contains(a)
  95. /** Returns true if this set contains an element which has a distance from `a` that is less than or equal to `n` */
  96. def containsApproximate(a: A, n: Int)(implicit A: MetricSpace[A]): Boolean =
  97. this match {
  98. case BKTreeEmpty() => false
  99. case BKTreeNode(v, _, c) =>
  100. val d = A.distance(v, a)
  101. d <= n || (subChildren(d, n) exists (_._2 containsApproximate(a, n)))
  102. }
  103. /** An alias for `containsApproximate` */
  104. def =?=(a: A, n: Int)(implicit A: MetricSpace[A]): Boolean = containsApproximate(a, n)
  105. /** Returns the elements which have an distance from `a` that is less than or equal to `n`. */
  106. def valuesApproximate(a: A, n: Int)(implicit A: MetricSpace[A]): List[A] =
  107. this match {
  108. case BKTreeEmpty() => Nil
  109. case BKTreeNode(v, _, c) =>
  110. val d = A.distance(v, a)
  111. val k = subChildren(d, n).valuesIterator.toList flatMap (_ valuesApproximate(a, n))
  112. if (d <= n)
  113. v :: k
  114. else
  115. k
  116. }
  117. /** An alias for `valuesApproximate` */
  118. def |=|(a: A, n: Int)(implicit A: MetricSpace[A]): List[A] = valuesApproximate(a, n)
  119. private type M[A] = IntMap[A]
  120. private def subChildren(d: Int, n: Int): M[BKTree[A]] =
  121. this match {
  122. case BKTreeEmpty() => IntMap.empty
  123. case BKTreeNode(_, _, c) => subMap(c, d, n)
  124. }
  125. private def subMap(m: M[BKTree[A]], d: Int, n: Int): M[BKTree[A]] =
  126. splitMap(splitMap(m, d - n - 1)._2, d + n + 1)._1
  127. private def splitChildren(k: Int): (M[BKTree[A]], M[BKTree[A]]) =
  128. this match {
  129. case BKTreeEmpty() => (IntMap.empty, IntMap.empty)
  130. case BKTreeNode(_, _, c) => splitMap(c, k)
  131. }
  132. private def splitMap(m: M[BKTree[A]], k: Int): (M[BKTree[A]], M[BKTree[A]]) = {
  133. var m1: M[BKTree[A]] = IntMap.empty
  134. var m2: M[BKTree[A]] = IntMap.empty
  135. for ((i, v) <- m.iterator) {
  136. if (i < k)
  137. m1 = m1 + ((i, v))
  138. else if (i > k)
  139. m2 = m2 + ((i, v))
  140. }
  141. (m1, m2)
  142. }
  143. private def updateMap(m: M[BKTree[A]], k: Int, f: BKTree[A] => Option[BKTree[A]]) =
  144. m get k match {
  145. case None => m
  146. case Some(v) => f(v) match {
  147. case None => m - k
  148. case Some(value) => m.updated(k, value)
  149. }
  150. }
  151. }
  152. private case class BKTreeNode[A](value: A, sz: Int, children: IntMap[BKTree[A]]) extends BKTree[A]
  153. private case class BKTreeEmpty[A]() extends BKTree[A]
  154. object BKTree extends BKTreeInstances with BKTreeFunctions {
  155. def apply[A: MetricSpace](as: A*): BKTree[A] = as.foldLeft(emptyBKTree[A])((b, a) => b + a)
  156. }
  157. trait BKTreeFunctions {
  158. def emptyBKTree[A]: BKTree[A] = BKTreeEmpty()
  159. }
  160. sealed abstract class BKTreeInstances {
  161. implicit def bKTreeInstance: Functor[BKTree] with Length[BKTree] = new Functor[BKTree] with Length[BKTree] {
  162. def map[A, B](fa: BKTree[A])(f: A => B): BKTree[B] = fa map f
  163. def length[A](fa: BKTree[A]): Int = fa.size
  164. }
  165. implicit def bKTreeMonoid[A: MetricSpace]: Monoid[BKTree[A]] = new Monoid[BKTree[A]] {
  166. def append(f1: BKTree[A], f2: => BKTree[A]): BKTree[A] = f1 ++ f2
  167. def zero: BKTree[A] = BKTree[A]()
  168. }
  169. implicit def bkTreeEqual[A](implicit A: Equal[A]) = {
  170. import std.list._
  171. Equal.equalBy((ba: BKTree[A]) => ba.values)
  172. }
  173. }