/mstu/sem10/distributed_systems/project/cutephrog/CutePhrog/src/ru/cutephrog/crawler/Test.scala
Scala | 90 lines | 67 code | 12 blank | 11 comment | 1 complexity | 2e16aa7761e9183e5107ddf40e60399c MD5 | raw file
- package ru.cutephrog
- package crawler
- import scala.io.Source
- import com.mongodb.casbah.Imports._
- import java.net.{URI, URL}
- /**
- * Created by IntelliJ IDEA.
- * User: dvolosnykh
- * Date: 10.09.11
- * Time: 14:38
- * To change this template use File | Settings | File Templates.
- */
- object Test extends App {
- val urlAttr = "url"
- val visitedAttr = "visited"
- def testMongo() {
- val mongoConnection = MongoConnection()
- val mongoDB = mongoConnection("CutePhrog")
- val mongoCollection = mongoDB("urls")
- val urls = List("a", "b", "d")
- val query = MongoDBObject("crawler" -> "xxx")
- mongoCollection.update(query, $addToSet("urls" ) $each urls, true, true)
- mongoCollection.update(query, $inc("count" -> urls.size), true, true)
- val urls2 = List("d", "e", "f")
- mongoCollection.update(query, $addToSet("urls" ) $each urls2, true, true)
- mongoCollection.update(query, $inc("count" -> urls2.size), true, true)
- val obj = mongoCollection.findOne(query).get
- val count = obj.getAs[Int]("count").get
- if (count > 2) {
- mongoCollection.update(query, $unset("urls"))
- mongoCollection.update(query, $set("count" -> 0))
- Console println obj.get("urls").getClass
- val anyList = obj.getAs[BasicDBList]("urls").get.toList
- Console println anyList.getClass
- val strList = anyList.map(_.asInstanceOf[String])
- Console println strList.getClass
- }
- // mongoDB.dropDatabase()
- }
- def testURL() {
- // val strUrl = "http://dvolosnykh@81.25.57.254:80/projects/ukk/wiki/TracIni?a=100&b=200#repositories-section repositories"
- // val strUrl = "http://127.0.0.1"
- val strUrl = "http://81.25.57.2/projects/ukk"
- // val strUrl = "http://gmail.com"
- val url = new URL(strUrl)
- Console println "=== URL ==="
- Console println "Protocol: " + url.getProtocol
- Console println "Authority: " + url.getAuthority
- Console println "Userinfo: " + url.getUserInfo
- Console println "Host: " + url.getHost
- Console println "Port: " + url.getPort
- Console println "File: " + url.getFile
- Console println "Path: " + url.getPath
- Console println "Query: " + url.getQuery
- Console println "Reference: " + url.getRef
- Console println url
- val uri = new URI(url.getProtocol, url.getAuthority, url.getPath, url.getQuery, url.getRef)
- Console println "=== URI ==="
- Console println "Scheme: " + uri.getScheme
- Console println "Authority: " + uri.getAuthority
- Console println "Userinfo: " + uri.getUserInfo
- Console println "Host: " + uri.getHost
- Console println "Port: " + uri.getPort
- Console println "Path: " + uri.getPath
- Console println "Query: " + uri.getQuery
- Console println "Fragment: " + uri.getFragment
- Console println uri
- val connection = url.openConnection()
- connection setConnectTimeout 3000
- val source = Source fromInputStream connection.getInputStream
- val (anchors, images) = PageParser.parse(url, source)
- Console println "ANCHORS"
- anchors foreach Console.println
- Console println "IMAGES"
- images foreach Console.println
- }
- testMongo()
- }