PageRenderTime 50ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/mstu/sem10/distributed_systems/project/cutephrog/CutePhrog/src/ru/cutephrog/crawler/Test.scala

https://gitlab.com/dvolosnykh/education
Scala | 90 lines | 67 code | 12 blank | 11 comment | 1 complexity | 2e16aa7761e9183e5107ddf40e60399c MD5 | raw file
  1. package ru.cutephrog
  2. package crawler
  3. import scala.io.Source
  4. import com.mongodb.casbah.Imports._
  5. import java.net.{URI, URL}
  6. /**
  7. * Created by IntelliJ IDEA.
  8. * User: dvolosnykh
  9. * Date: 10.09.11
  10. * Time: 14:38
  11. * To change this template use File | Settings | File Templates.
  12. */
  13. object Test extends App {
  14. val urlAttr = "url"
  15. val visitedAttr = "visited"
  16. def testMongo() {
  17. val mongoConnection = MongoConnection()
  18. val mongoDB = mongoConnection("CutePhrog")
  19. val mongoCollection = mongoDB("urls")
  20. val urls = List("a", "b", "d")
  21. val query = MongoDBObject("crawler" -> "xxx")
  22. mongoCollection.update(query, $addToSet("urls" ) $each urls, true, true)
  23. mongoCollection.update(query, $inc("count" -> urls.size), true, true)
  24. val urls2 = List("d", "e", "f")
  25. mongoCollection.update(query, $addToSet("urls" ) $each urls2, true, true)
  26. mongoCollection.update(query, $inc("count" -> urls2.size), true, true)
  27. val obj = mongoCollection.findOne(query).get
  28. val count = obj.getAs[Int]("count").get
  29. if (count > 2) {
  30. mongoCollection.update(query, $unset("urls"))
  31. mongoCollection.update(query, $set("count" -> 0))
  32. Console println obj.get("urls").getClass
  33. val anyList = obj.getAs[BasicDBList]("urls").get.toList
  34. Console println anyList.getClass
  35. val strList = anyList.map(_.asInstanceOf[String])
  36. Console println strList.getClass
  37. }
  38. // mongoDB.dropDatabase()
  39. }
  40. def testURL() {
  41. // val strUrl = "http://dvolosnykh@81.25.57.254:80/projects/ukk/wiki/TracIni?a=100&b=200#repositories-section repositories"
  42. // val strUrl = "http://127.0.0.1"
  43. val strUrl = "http://81.25.57.2/projects/ukk"
  44. // val strUrl = "http://gmail.com"
  45. val url = new URL(strUrl)
  46. Console println "=== URL ==="
  47. Console println "Protocol: " + url.getProtocol
  48. Console println "Authority: " + url.getAuthority
  49. Console println "Userinfo: " + url.getUserInfo
  50. Console println "Host: " + url.getHost
  51. Console println "Port: " + url.getPort
  52. Console println "File: " + url.getFile
  53. Console println "Path: " + url.getPath
  54. Console println "Query: " + url.getQuery
  55. Console println "Reference: " + url.getRef
  56. Console println url
  57. val uri = new URI(url.getProtocol, url.getAuthority, url.getPath, url.getQuery, url.getRef)
  58. Console println "=== URI ==="
  59. Console println "Scheme: " + uri.getScheme
  60. Console println "Authority: " + uri.getAuthority
  61. Console println "Userinfo: " + uri.getUserInfo
  62. Console println "Host: " + uri.getHost
  63. Console println "Port: " + uri.getPort
  64. Console println "Path: " + uri.getPath
  65. Console println "Query: " + uri.getQuery
  66. Console println "Fragment: " + uri.getFragment
  67. Console println uri
  68. val connection = url.openConnection()
  69. connection setConnectTimeout 3000
  70. val source = Source fromInputStream connection.getInputStream
  71. val (anchors, images) = PageParser.parse(url, source)
  72. Console println "ANCHORS"
  73. anchors foreach Console.println
  74. Console println "IMAGES"
  75. images foreach Console.println
  76. }
  77. testMongo()
  78. }