/forcomp/src/main/scala/forcomp/Anagrams.scala

https://bitbucket.org/wdlinch3/scalastuff · Scala · 244 lines · 58 code · 19 blank · 167 comment · 6 complexity · 1e607f381697347e563ab241d4c552e1 MD5 · raw file

  1. package forcomp
  2. import common._
  3. import math.Ordering
  4. object Anagrams {
  5. /** A word is simply a `String`. */
  6. type Word = String
  7. /** A sentence is a `List` of words. */
  8. type Sentence = List[Word]
  9. /**
  10. * `Occurrences` is a `List` of pairs of characters and positive integers saying
  11. * how often the character appears.
  12. * This list is sorted alphabetically w.r.t. to the character in each pair.
  13. * All characters in the occurrence list are lowercase.
  14. *
  15. * Any list of pairs of lowercase characters and their frequency which is not sorted
  16. * is **not** an occurrence list.
  17. *
  18. * Note: If the frequency of some character is zero, then that character should not be
  19. * in the list.
  20. */
  21. type Occurrences = List[(Char, Int)]
  22. /**
  23. * The dictionary is simply a sequence of words.
  24. * It is predefined and obtained as a sequence using the utility method `loadDictionary`.
  25. */
  26. val dictionary: List[Word] = loadDictionary
  27. /**
  28. * Converts the word into its character occurence list.
  29. *
  30. * Note: the uppercase and lowercase version of the character are treated as the
  31. * same character, and are represented as a lowercase character in the occurrence list.
  32. */
  33. def wordOccurrences(w: Word): Occurrences = {
  34. val orderedLowerCaseList: List[Char] = w.toLowerCase.sorted.toList
  35. val characterFrequency: List[Int] =
  36. for (letter <- orderedLowerCaseList.distinct)
  37. yield (orderedLowerCaseList filter (l => l == letter)).length
  38. orderedLowerCaseList.distinct zip characterFrequency
  39. }
  40. /** Converts a sentence into its character occurrence list. */
  41. def sentenceOccurrences(s: Sentence): Occurrences = wordOccurrences(s.mkString)
  42. //Note that constructions like:
  43. //s flatMap (w => wordOccurrences(w))
  44. // or
  45. //(for (word <-s) yield wordOccurrences(word)).flatten
  46. // will not work because we would get the list back retaining the word order
  47. //e.g. ``I love you'' would have two separate (o,1) terms instead of a single
  48. // (o,2) term
  49. /**
  50. * The `dictionaryByOccurrences` is a `Map` from different occurrences to a sequence of all
  51. * the words that have that occurrence count.
  52. * This map serves as an easy way to obtain all the anagrams of a word given its occurrence list.
  53. *
  54. * For example, the word "eat" has the following character occurrence list:
  55. *
  56. * `List(('a', 1), ('e', 1), ('t', 1))`
  57. *
  58. * Incidentally, so do the words "ate" and "tea".
  59. *
  60. * This means that the `dictionaryByOccurrences` map will contain an entry:
  61. *
  62. * List(('a', 1), ('e', 1), ('t', 1)) -> Seq("ate", "eat", "tea")
  63. *
  64. */
  65. lazy val dictionaryByOccurrences: Map[Occurrences, List[Word]] = dictionary groupBy (word => wordOccurrences(word))
  66. /** Returns all the anagrams of a given word. */
  67. def wordAnagrams(word: Word): List[Word] = dictionaryByOccurrences(wordOccurrences(word))
  68. /**
  69. * Returns the list of all subsets of the occurrence list.
  70. * This includes the occurrence itself, i.e. `List(('k', 1), ('o', 1))`
  71. * is a subset of `List(('k', 1), ('o', 1))`.
  72. * It also include the empty subset `List()`.
  73. *
  74. * Example: the subsets of the occurrence list `List(('a', 2), ('b', 2))` are:
  75. *
  76. * List(
  77. * List(),
  78. * List(('a', 1)),
  79. * List(('a', 2)),
  80. * List(('b', 1)),
  81. * List(('a', 1), ('b', 1)),
  82. * List(('a', 2), ('b', 1)),
  83. * List(('b', 2)),
  84. * List(('a', 1), ('b', 2)),
  85. * List(('a', 2), ('b', 2))
  86. * )
  87. *
  88. * Note that the order of the occurrence list subsets does not matter -- the subsets
  89. * in the example above could have been displayed in some other order.
  90. */
  91. def combinations(occurrences: Occurrences): List[Occurrences] = occurrences match {
  92. case Nil => List(List())
  93. case o :: oo => generator(listMaker(o), combinations(oo))
  94. }
  95. /**
  96. * ``listMaker'' takes a single ``occurrence'', which is of type (Char, Int), and returns
  97. * a list of occurrences (i.e. a List[Occurrences] = List[List(Char, Int)] that
  98. * contains the original occurrence and its descendants with lower frequencies. This
  99. * includes the freq=0 part which is represented by the empty list.
  100. */
  101. def listMaker(occ: (Char, Int)): List[Occurrences] = {
  102. val newVector = for (index <- 1 to occ._2) yield (occ._1, index)
  103. (for (index <- 0 until newVector.length) yield newVector(index) :: Nil).toList ::: List(Nil)
  104. /**
  105. * This fucker requires some explanation:
  106. * 1) The first for loop coerces automatically to a Vector
  107. * 2) the next for loop takes each entry and cons with Nil to make a Vector of Lists
  108. * 3) the Vector[List] is converted to List[List]
  109. * 4) finally, the empty list List(List()) is included
  110. */
  111. }
  112. /**
  113. * ``generator'' takes two lists of Occurrences (i.e. two List[List(Char, Int)]]) and returns
  114. * a list of Occurrences which generates all ``combinations''. This is the heart of the recursion.
  115. * It is modeled on the simpler:
  116. * def generator0(set1: List[List[Char]], set0: List[List[Char]]): List[List[Char]] =
  117. * for (e1 <- set1; e0 <- set0) yield List(e1, e0).flatten
  118. */
  119. def generator(occ1: List[Occurrences], occ0: List[Occurrences]): List[Occurrences] =
  120. for (o1 <- occ1; o0 <- occ0) yield o1 ::: o0
  121. /**
  122. * Subtracts occurrence list `y` from occurrence list `x`.
  123. *
  124. * The precondition is that the occurrence list `y` is a subset of
  125. * the occurrence list `x` -- any character appearing in `y` must
  126. * appear in `x`, and its frequency in `y` must be smaller or equal
  127. * than its frequency in `x`.
  128. *
  129. * Note: the resulting value is an occurrence - meaning it is sorted
  130. * and has no zero-entries.
  131. */
  132. def subtract(x: Occurrences, y: Occurrences): Occurrences = {
  133. def subtract0(x0: Occurrences, y0: Occurrences): Occurrences = y0 match {
  134. case Nil => x0
  135. case y :: yy =>
  136. val (xChar, xInt) = x0.unzip
  137. val (yChar, yInt) = y0.unzip
  138. val index = xChar.indexOf(yChar.head)
  139. val xNew: List[(Char, Int)] = xChar.zip(xInt.updated(index, xInt(index) - yInt.head))
  140. subtract0(xNew, yy)
  141. }
  142. subtract0(x, y) flatMap dropZero
  143. }
  144. /**
  145. * ``dropZero'' replaces an instance of (*, 0) with the empty list.
  146. * ``flatMap'' above then takes care of the rest.
  147. */
  148. def dropZero(elem: (Char, Int)): List[(Char, Int)] = elem match {
  149. case (char, 0) => Nil
  150. case (char, int) => List((char, int))
  151. }
  152. /**
  153. * Returns a list of all anagram sentences of the given sentence.
  154. *
  155. * An anagram of a sentence is formed by taking the occurrences of all the characters of
  156. * all the words in the sentence, and producing all possible combinations of words with those characters,
  157. * such that the words have to be from the dictionary.
  158. *
  159. * The number of words in the sentence and its anagrams does not have to correspond.
  160. * For example, the sentence `List("I", "love", "you")` is an anagram of the sentence `List("You", "olive")`.
  161. *
  162. * Also, two sentences with the same words but in a different order are considered two different anagrams.
  163. * For example, sentences `List("You", "olive")` and `List("olive", "you")` are different anagrams of
  164. * `List("I", "love", "you")`.
  165. *
  166. * Here is a full example of a sentence `List("Yes", "man")` and its anagrams for our dictionary:
  167. *
  168. * List(
  169. * List(en, as, my),
  170. * List(en, my, as),
  171. * List(man, yes),
  172. * List(men, say),
  173. * List(as, en, my),
  174. * List(as, my, en),
  175. * List(sane, my),
  176. * List(Sean, my),
  177. * List(my, en, as),
  178. * List(my, as, en),
  179. * List(my, sane),
  180. * List(my, Sean),
  181. * List(say, men),
  182. * List(yes, man)
  183. * )
  184. *
  185. * The different sentences do not have to be output in the order shown above - any order is fine as long as
  186. * all the anagrams are there. Every returned word has to exist in the dictionary.
  187. *
  188. * Note: in case that the words of the sentence are in the dictionary, then the sentence is the anagram of itself,
  189. * so it has to be returned in this list.
  190. *
  191. * Note: There is only one anagram of an empty sentence.
  192. */
  193. /**
  194. * What to say about this bastard...
  195. * First we construct a helper sentenceAnagramsHelper which accepts Occurrences instead of Sentences
  196. * Once constructed, it is called on sentenceOccurrences(sentence).
  197. * The helper checks that the occurrence is non-empty and proceeds to construct all possible combinations
  198. * For each combination ``elem'' that is in the occurrences list of a valid word from the dictionary
  199. * and for each distinct anagram ``wordAnagram'' of that occurrence ``elem'' (the dictionaryByOccurrences is a map
  200. * so we have to convert it from Option[List[...]] to List whereupon we have a List[List[...]] and we
  201. * need to take the head) the helper is called with new occurrence ``subtract(occurrences, elem)'' and
  202. * new accumulator ``accumulator.map(List(wordAnagram):::_))''.
  203. * The first guarantees that on the next pass, we cannot reuse the words we just pulled out.
  204. * The second begins the construction of a sentence by including the new word in the accumulator
  205. * The whole for loop is a List[List[Sentence]] which needs to be flattened.
  206. * Finally, the helper is called on the original sentence and the trivial list of sentences List(List()).
  207. */
  208. def sentenceAnagrams(sentence: Sentence): List[Sentence] = {
  209. def sentenceAnagramsHelper(occurrences: Occurrences, accumulator: List[Sentence]): List[Sentence] =
  210. if (occurrences.isEmpty) accumulator
  211. else {
  212. (for {
  213. elem <- combinations(occurrences) filter isWord
  214. wordAnagram <- (dictionaryByOccurrences get elem).toList.head
  215. } yield sentenceAnagramsHelper(subtract(occurrences, elem), accumulator.map(List(wordAnagram) ::: _))
  216. ).flatten
  217. }
  218. sentenceAnagramsHelper(sentenceOccurrences(sentence), List(List()))
  219. }
  220. /**
  221. * This function accepts an Occurrences list and checks whether there is an entry in the dictionary which matches it.
  222. * That is, it checks that there is at least one word with that occurrences list.
  223. */
  224. def isWord(occurrences: Occurrences): Boolean = !(dictionaryByOccurrences get occurrences).isEmpty
  225. }