PageRenderTime 58ms CodeModel.GetById 13ms app.highlight 39ms RepoModel.GetById 2ms app.codeStats 0ms

/core/src/main/scala/scalaz/BKTree.scala

http://github.com/scalaz/scalaz
Scala | 201 lines | 148 code | 28 blank | 25 comment | 13 complexity | fe2eb35ea2a24e8a53832ee523af7262 MD5 | raw file
  1package scalaz
  2
  3import collection.immutable.IntMap
  4import annotation.tailrec
  5
  6
  7/**
  8 * Burkhard-Keller trees provide an implementation of sets which apart
  9 * from the ordinary operations also has an approximate member search,
 10 * allowing you to search for elements that are of a distance `n` from
 11 * the element you are searching for. The distance is determined using
 12 * a metric on the type of elements. Therefore all elements must
 13 * implement the [[scalaz.MetricSpace]] type class, rather than the more usual
 14 * [[scalaz.Ordering]].
 15 *
 16 * The worst case complexity of many of these operations is quite bad,
 17 * but the expected behavior varies greatly with the metric. For
 18 * example, the discrete metric (`distance x y | y == x = 0 |
 19 * otherwise = 1`) makes BK-trees behave abysmally. The metrics
 20 * mentioned above should give good performance characteristics.
 21 *
 22 * This implementation is a port of Haskell's [[http://hackage.haskell.org/packages/archive/bktrees/0.2.1/doc/html/src/Data-Set-BKTree.html Data.Set.BKTree]]
 23 */
 24@deprecated("This class depends on `MetricSpace` which is deprecated, too.", "7.0.1")
 25sealed abstract class BKTree[A] extends Product with Serializable {
 26  def isEmpty: Boolean =
 27    this match {
 28      case BKTreeEmpty()       => true
 29      case BKTreeNode(_, _, _) => false
 30    }
 31
 32  def map[B](f: A => B): BKTree[B] =
 33    this match {
 34      case BKTreeEmpty()       => BKTreeEmpty()
 35      case BKTreeNode(a, s, c) => BKTreeNode(f(a), s, c.transform((_: Int, z: BKTree[A]) => z map f))
 36    }
 37
 38  def size: Int =
 39    this match {
 40      case BKTreeEmpty()       => 0
 41      case BKTreeNode(_, s, _) => s
 42    }
 43
 44  def +(a: A)(implicit A: MetricSpace[A]): BKTree[A] =
 45    this match {
 46      case BKTreeEmpty()       => BKTreeNode(a, 1, IntMap.empty)
 47      case BKTreeNode(v, s, c) => {
 48        val d = A.distance(v, a)
 49        BKTreeNode(v, s + 1, c + ((d, c get d match {
 50          case None    => BKTreeNode(a, 1, IntMap.empty)
 51          case Some(w) => w + a
 52        })))
 53      }
 54    }
 55
 56  def ++(t: BKTree[A])(implicit m: MetricSpace[A]): BKTree[A] = {
 57    var k: BKTree[A] = this
 58    for (v <- t.values)
 59      k = k + v
 60    k
 61  }
 62
 63  def -(a: A)(implicit A: MetricSpace[A]): BKTree[A] =
 64    this match {
 65      case BKTreeEmpty()       => BKTreeEmpty()
 66      case BKTreeNode(v, _, c) => {
 67        val d = A.distance(v, a)
 68        if(d == 0) BKTree(c.values.seq.flatMap(_.values).toSeq: _*)
 69        else {
 70          val subTree = updateMap(c, d, (t: BKTree[A]) => Some(t - a))
 71          val size = subTree.values.map(_.size).sum + 1
 72          BKTreeNode(v, size, subTree)
 73        }
 74      }
 75    }
 76
 77  def --(t: BKTree[A])(implicit m: MetricSpace[A]): BKTree[A] = {
 78    var k: BKTree[A] = this
 79    for (v <- t.values)
 80      k = k - v
 81    k
 82  }  
 83
 84  def values: List[A] =
 85    this match {
 86      case BKTreeEmpty()       => Nil
 87      case BKTreeNode(v, _, c) => v :: c.valuesIterator.toList.flatMap(_.values)
 88    }
 89
 90
 91  /**
 92   * Returns true of this set contains `a`.
 93   */
 94  @tailrec
 95  final def contains(a: A)(implicit A: MetricSpace[A]): Boolean =
 96    this match {
 97      case BKTreeEmpty()       => false
 98      case BKTreeNode(v, _, c) =>
 99        val d = A.distance(v, a)
100        d == 0 || (c get d match {
101          case None    => false
102          case Some(w) => w contains a
103        })
104    }
105
106  /** An alias for `contains` */
107  final def -?-(a: A)(implicit A: MetricSpace[A]): Boolean = contains(a)
108
109  /** Returns true if this set contains an element which has a distance from `a` that is less than or equal to `n` */
110  def containsApproximate(a: A, n: Int)(implicit A: MetricSpace[A]): Boolean =
111    this match {
112      case BKTreeEmpty()       => false
113      case BKTreeNode(v, _, c) =>
114        val d = A.distance(v, a)
115        d <= n || (subChildren(d, n) exists (_._2 containsApproximate(a, n)))
116    }
117
118  /** An alias for `containsApproximate` */
119  def =?=(a: A, n: Int)(implicit A: MetricSpace[A]): Boolean = containsApproximate(a, n)
120
121  /** Returns the elements which have an distance from `a` that is less than or equal to `n`. */
122  def valuesApproximate(a: A, n: Int)(implicit A: MetricSpace[A]): List[A] =
123    this match {
124      case BKTreeEmpty()       => Nil
125      case BKTreeNode(v, _, c) =>
126        val d = A.distance(v, a)
127        val k = subChildren(d, n).valuesIterator.toList flatMap (_ valuesApproximate(a, n))
128        if (d <= n)
129          v :: k
130        else
131          k
132    }
133
134  /** An alias for `valuesApproximate` */
135  def |=|(a: A, n: Int)(implicit A: MetricSpace[A]): List[A] = valuesApproximate(a, n)
136
137  private type M[A] = IntMap[A]
138
139  private def subChildren(d: Int, n: Int): M[BKTree[A]] =
140    this match {
141      case BKTreeEmpty()       => IntMap.empty
142      case BKTreeNode(_, _, c) => subMap(c, d, n)
143    }
144
145  private def subMap(m: M[BKTree[A]], d: Int, n: Int): M[BKTree[A]] =
146    splitMap(splitMap(m, d - n - 1)._2, d + n + 1)._1
147
148  private def splitChildren(k: Int): (M[BKTree[A]], M[BKTree[A]]) =
149    this match {
150      case BKTreeEmpty()       => (IntMap.empty, IntMap.empty)
151      case BKTreeNode(_, _, c) => splitMap(c, k)
152    }
153
154  private def splitMap(m: M[BKTree[A]], k: Int): (M[BKTree[A]], M[BKTree[A]]) = {
155    var m1: M[BKTree[A]] = IntMap.empty
156    var m2: M[BKTree[A]] = IntMap.empty
157    for ((i, v) <- m.iterator) {
158      if (i < k)
159        m1 = m1 + ((i, v))
160      else if (i > k)
161        m2 = m2 + ((i, v))
162    }
163    (m1, m2)
164  }
165
166  private def updateMap(m: M[BKTree[A]], k: Int, f: BKTree[A] => Option[BKTree[A]]) =
167    m get k match {
168      case None => m
169      case Some(v) => f(v) match {
170        case None => m - k
171        case Some(value) => m.updated(k, value)
172      }
173    }
174}
175
176private case class BKTreeNode[A](value: A, sz: Int, children: IntMap[BKTree[A]]) extends BKTree[A]
177
178private case class BKTreeEmpty[A]() extends BKTree[A]
179
180object BKTree extends BKTreeInstances with BKTreeFunctions {
181  def apply[A: MetricSpace](as: A*): BKTree[A] = as.foldLeft(emptyBKTree[A])((b, a) => b + a)
182}
183
184trait BKTreeFunctions {
185  def emptyBKTree[A]: BKTree[A] = BKTreeEmpty()
186}
187
188sealed abstract class BKTreeInstances {
189  implicit def bKTreeInstance: Functor[BKTree] with Length[BKTree] = new Functor[BKTree] with Length[BKTree] {
190    def map[A, B](fa: BKTree[A])(f: A => B): BKTree[B] = fa map f
191    def length[A](fa: BKTree[A]): Int = fa.size
192  }
193  implicit def bKTreeMonoid[A: MetricSpace]: Monoid[BKTree[A]] = new Monoid[BKTree[A]] {
194    def append(f1: BKTree[A], f2: => BKTree[A]): BKTree[A] = f1 ++ f2
195    def zero: BKTree[A] = BKTree[A]()
196  }
197  implicit def bkTreeEqual[A](implicit A: Equal[A]) = {
198    import std.list._
199    Equal.equalBy((ba: BKTree[A]) => ba.values)
200  }
201}