/ShowMeTheBestReply/test/unit/ArticleTests.groovy
http://showmethebestreply.googlecode.com/ · Groovy · 76 lines · 60 code · 14 blank · 2 comment · 10 complexity · 77c28d761fce41f9fc79e381aec2094e MD5 · raw file
- import grails.test.*
- import groovyx.net.http.HTTPBuilder
- import static groovyx.net.http.Method.GET
- import static groovyx.net.http.Method.POST
- import static groovyx.net.http.ContentType.TEXT
- import static groovyx.net.http.ContentType.HTML
- class ArticleTests extends GrailsUnitTestCase {
- protected void setUp() {
- super.setUp()
- }
- protected void tearDown() {
- super.tearDown()
- }
- // ?? ?? ??
- void testParsingArticle() {
- String articleUrl = 'http://news.nate.com/view/20091203n08304'
- def url = new URL(articleUrl)
- def articleReader = new HTTPBuilder(url.getProtocol() + '://' + url.getHost()).request(GET,TEXT) {
- uri.path = url.getPath()
- }
- assert articleReader instanceof Reader // response data is buffered in-memory
-
- def slurper = new XmlSlurper(new org.ccil.cowan.tagsoup.Parser())
-
- def html = slurper.parse(articleReader)
- def article = html.body.'**'.find{ it.name() == 'div' && it.@id == 'articleView' }
-
- def info = article.p.find{ it.@class == 'articleInfo' }
- def dateCreated
- def dateModified
- try {
- dateCreated = Date.parse('yyyy-MM-dd hh:mm', info.span.find{it.@class == 'firstDate'}.em.text())
- } catch(java.text.ParseException e) {}
- try {
- dateModified = Date.parse('yyyy-MM-dd hh:mm', info.span.find{it.@class == 'lastDate'}.em.text())
- } catch(java.text.ParseException e) {}
-
- def contentNode = article.div.find{ it.@id == 'articleContetns' }.table.tr.td
- contentNode.'**'.findAll{ it.name() == 'br' }.each{ br -> br.replaceBody('\r\n') }
- contentNode.'**'.findAll{ it.name() == 'img' }.each{ img -> img.replaceBody("<img src='${img.@src.text()}'>") }
-
- contentNode.'**'.findAll{ it.name() == 'script' }.each { script -> script.replaceBody('') }
-
- String title = article.h3
- String imageLink = contentNode.div.span.img.it.@src.text()
- String thumbnailLink = imageLink.replace('orgImg', 'thumb90')
- String content = contentNode.text().trim()
- String articleId = url.getPath().replace('/view/', '')
- String media = info.span.a[0]
-
- assert title
- assert content
- assert articleId
- assert info
- assert media
- assert dateCreated
-
- println "$title($articleId, $dateCreated, $dateModified, $media)"
- println "$imageLink, $thumbnailLink"
- println "$content"
- }
-
- void testArticleClass() {
- // ?? ??
- String articleUrl = 'http://news.nate.com/view/20091203n08304'
- def article = Article.parseArticle(articleUrl)
-
- println "$article.title($article.articleId, $article.url, $article.dateCreated, $article.dateModified, $article.media)"
- println "$article.imageLink, $article.thumbnailLink"
- println "$article.content"
- }
- }