PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/scala/ebooksearch/models/Books.scala

https://github.com/koduki/eBookSearch
Scala | 224 lines | 200 code | 24 blank | 0 comment | 17 complexity | d52af9d987fb2c3736d778e66b917869 MD5 | raw file
  1. package cn.orz.pascal.ebooksearch.models
  2. import cn.orz.pascal.ebooksearch.models._
  3. import cn.orz.pascal.ebooksearch.agent._
  4. import cn.orz.pascal.ebooksearch.config.MyConfig
  5. import cn.orz.pascal.commons.rakuten.RakutenBooks
  6. import cn.orz.pascal.commons.rakuten.RakutenItem
  7. import cn.orz.pascal.commons.utils.LoggingSupport
  8. import cn.orz.pascal.commons.utils.ConfigReader
  9. import cn.orz.pascal.commons.utils.LevenshteinDistance
  10. import cn.orz.pascal.commons.utils.DateUtils._
  11. import cn.orz.pascal.commons.utils.NetUtils._
  12. import cn.orz.pascal.mechanize._
  13. import com.mongodb.casbah.Imports._
  14. import com.novus.salat.global._
  15. import com.novus.salat._
  16. import com.novus.salat.annotations._
  17. import com.novus.salat.global._
  18. object Books {
  19. def apply(config: MyConfig): Books = {
  20. new Books(config)
  21. }
  22. }
  23. class Books(val config: MyConfig) extends LoggingSupport {
  24. def change(source: Option[Book], item: Item, isbn: String): Option[Book] = {
  25. source match {
  26. case Some(sourceBook) => {
  27. info("isbn is %s".format(isbn))
  28. if (isbn.isEmpty()) {
  29. return None
  30. }
  31. val another = BookDao.find(MongoDBObject("isbn" -> isbn)).toList
  32. debug("books count is %s".format(another.size))
  33. val book = if (another.isEmpty) {
  34. val rbs = new RakutenBooks(config.rakuten.developerId)
  35. val results = rbs.search(isbn)
  36. if (results.isEmpty) {
  37. return None
  38. }
  39. buildBook(item, results.first)
  40. } else {
  41. another.first
  42. }
  43. BookDao.save(book.addItem(item))
  44. BookDao.save(sourceBook.removeItem(item))
  45. info("%s change to %s from %s .".format(item.title + ":" + item.provider.name, book.id, sourceBook.id))
  46. Some(book)
  47. }
  48. case None => None
  49. }
  50. }
  51. def select(item: Item): Book = {
  52. val books = BookDao.find(MongoDBObject("items" -> grater[Item].asDBObject(item))) toList
  53. val result = if (books.isEmpty) {
  54. debug("%s is return new item.".format(item.title))
  55. val book = selectFromRakuten(item)
  56. BookDao.save(book)
  57. book
  58. } else {
  59. debug("%s is return db item.".format(item.title))
  60. books.first
  61. }
  62. result
  63. }
  64. def getFeeds(provider: Provider, size: Int) = {
  65. val selecter = new Books(config)
  66. FeedItemDao
  67. .find((MongoDBObject("_id.provider" -> grater[Provider].asDBObject(provider))))
  68. .sort(orderBy = MongoDBObject("createdAt" -> -1))
  69. .limit(size)
  70. .toList
  71. .foldLeft(Map[(Provider, java.util.Date), List[Item]]()) { (r, x) =>
  72. val createdAt = dateTrim(x.createdAt)
  73. val list = if (r.contains((provider, createdAt))) { r(provider, createdAt) } else { List[Item]() }
  74. r + ((provider, createdAt) -> (list ++ List(x.item)))
  75. }.map { x =>
  76. x._1 -> x._2.map(item => selecter.select(item)).toSet.toList.sort((x, y) => x.title > y.title)
  77. }.toList.sort((x, y) => x._1._2 > y._1._2)
  78. }
  79. private def selectFromRakuten(item: Item): Book = {
  80. val rbs = new RakutenBooks(config.rakuten.developerId)
  81. val title = item.title
  82. .replaceAll("【立ち読み版】", "")
  83. .replaceAll("【電子特別版】", "")
  84. val author = item.author
  85. .replaceAll("\r\n", "")
  86. .replaceAll("\n", "")
  87. .replaceAll("著者:", "")
  88. .replaceAll("イラスト.*", "")
  89. .replaceAll("漫画", "")
  90. .replaceAll("原作", "")
  91. .replaceAll("作画.*", "")
  92. .replaceAll("作画.*", "")
  93. .replaceAll("作者:", "")
  94. .replaceAll("(著)", "")
  95. .replaceAll("/.*", "")
  96. .replaceAll("×.*", "")
  97. .replaceAll(" ", " ")
  98. .split(" ").first
  99. debug("title=%s,\tauthor=%s".format(title, author))
  100. val results = rbs.search(title, author)
  101. if (results.isEmpty) {
  102. val book = getBookFromGoogle(title, author)
  103. book match {
  104. case Some(b) => buildBook(item, b)
  105. case None => {
  106. info("title:%s, author:%s is not found.".format(title, author))
  107. Book(
  108. title = item.title,
  109. author = item.author,
  110. seriesName = "",
  111. publisherName = "",
  112. genre = "",
  113. salesDate = "",
  114. itemCaption = "",
  115. image = Image(item.image_url, item.image_url, item.image_url, item.image_url, item.image_url),
  116. isbn = "",
  117. items = Set(item))
  118. }
  119. }
  120. } else {
  121. val result = selectBestFitBook(item, results)
  122. val books = BookDao.find(MongoDBObject("isbn" -> result.isbn)).toList
  123. if (books.isEmpty) {
  124. debug("create new book [%s].".format(title))
  125. buildBook(item, result)
  126. } else {
  127. debug("update book [%s].".format(title))
  128. books.first.addItem(item)
  129. }
  130. }
  131. }
  132. private def selectBestFitBook(item: Item, results: List[RakutenItem]): RakutenItem = {
  133. val result = results.map(x => (x -> LevenshteinDistance(trim(item.title), trim(x.title)))).sort((x, y) => x._2 < y._2).first._1
  134. result
  135. }
  136. private def trim(str: String): String = {
  137. import com.ibm.icu.text.Transliterator
  138. val transliterator = Transliterator.getInstance("Fullwidth-Halfwidth")
  139. transliterator.transliterate(str)
  140. }
  141. private def buildBook(item: Item, result: RakutenItem): cn.orz.pascal.ebooksearch.models.Book = {
  142. Book
  143. Book(
  144. title = result.title,
  145. author = result.author,
  146. seriesName = result.seriesName,
  147. publisherName = result.publisherName,
  148. genre = result.size,
  149. salesDate = result.salesDate,
  150. itemCaption = result.itemCaption,
  151. image = Image(result.image.small, result.image.medium, result.image.large, result.image.veryLarge, result.image.original),
  152. isbn = result.isbn,
  153. items = Set(item))
  154. }
  155. def getBookFromGoogle(title: String, author: String): Option[RakutenItem] = {
  156. debug("search from google [title=%s, author=%s].".format(title, author))
  157. val isbn1 = getISBN(title)
  158. val isbn2 = getISBN(title + " " + author)
  159. if (isbn1 == isbn2) {
  160. val rbs = new RakutenBooks(config.rakuten.developerId)
  161. val results = rbs.search(isbn1)
  162. if (results.isEmpty) {
  163. info("not found [isbn=%s].".format(isbn1))
  164. None
  165. } else {
  166. Some(results.first)
  167. }
  168. } else {
  169. info("diff search result [title=%s, author=%s].".format(title, author))
  170. None
  171. }
  172. }
  173. def getISBN(keyword: String): String = {
  174. import cn.orz.pascal.commons.utils.ISBN
  175. val google = "http://www.google.co.jp/search?q="
  176. val agent = new Mechanize()
  177. agent.isJavaScriptEnabled_=(false)
  178. def toASIN(html: String) = """amazon.*/dp/(.*?)"""".r.findFirstMatchIn(html) match { case Some(x) => Some(x.group(1)); case None => None }
  179. val html = agent.get(google + utf8(keyword)).asXml.toString
  180. toASIN(html) match {
  181. case Some(asin) => ISBN.to13(asin)
  182. case None => ""
  183. }
  184. }
  185. def cleanUp() = {
  186. val nativeBooks = MongoConnection()("test")("books")
  187. nativeBooks.distinct("isbn", "isbn" $ne "").foreach { (isbn) =>
  188. val books = BookDao.find(MongoDBObject("isbn" -> isbn)).toList
  189. info("isbn is %s, count %d".format(isbn, books.size))
  190. val result = books.tail.foldLeft(books.first) { (r, x) =>
  191. x.items.foldLeft(r) { (book, item) => book.addItem(item) }
  192. }
  193. BookDao.save(result)
  194. books.tail.foreach(BookDao.remove(_))
  195. }
  196. }
  197. }