PageRenderTime 19ms CodeModel.GetById 12ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/ShowMeTheBestReply/test/unit/ArticleTests.groovy

http://showmethebestreply.googlecode.com/
Groovy | 76 lines | 60 code | 14 blank | 2 comment | 10 complexity | 77c28d761fce41f9fc79e381aec2094e MD5 | raw file
 1import grails.test.*
 2
 3import groovyx.net.http.HTTPBuilder
 4import static groovyx.net.http.Method.GET
 5import static groovyx.net.http.Method.POST
 6import static groovyx.net.http.ContentType.TEXT
 7import static groovyx.net.http.ContentType.HTML
 8
 9class ArticleTests extends GrailsUnitTestCase {
10    protected void setUp() {
11        super.setUp()
12    }
13
14    protected void tearDown() {
15        super.tearDown()
16    }
17
18    // ?? ?? ?? 
19    void testParsingArticle() {
20    	String articleUrl = 'http://news.nate.com/view/20091203n08304'
21		def url = new URL(articleUrl)
22    	def articleReader = new HTTPBuilder(url.getProtocol() + '://' + url.getHost()).request(GET,TEXT) { 
23    		uri.path = url.getPath()
24		}
25        assert articleReader instanceof Reader // response data is buffered in-memory
26        
27		def slurper = new XmlSlurper(new org.ccil.cowan.tagsoup.Parser())
28        
29    	def html = slurper.parse(articleReader)
30		def article = html.body.'**'.find{ it.name() == 'div' && it.@id == 'articleView' }
31    	
32		def info = article.p.find{ it.@class == 'articleInfo' }
33    	def dateCreated
34    	def dateModified
35		try {
36			dateCreated = Date.parse('yyyy-MM-dd hh:mm', info.span.find{it.@class == 'firstDate'}.em.text())
37		} catch(java.text.ParseException e) {}
38		try {
39			dateModified = Date.parse('yyyy-MM-dd hh:mm', info.span.find{it.@class == 'lastDate'}.em.text())
40		} catch(java.text.ParseException e) {}
41		
42		def contentNode = article.div.find{ it.@id == 'articleContetns' }.table.tr.td
43		contentNode.'**'.findAll{ it.name() == 'br' }.each{ br -> br.replaceBody('\r\n') }
44		contentNode.'**'.findAll{ it.name() == 'img' }.each{ img -> img.replaceBody("<img src='${img.@src.text()}'>") }
45		
46		contentNode.'**'.findAll{ it.name() == 'script' }.each { script -> script.replaceBody('') }
47		
48		String title = article.h3
49		String imageLink = contentNode.div.span.img.it.@src.text()
50		String thumbnailLink = imageLink.replace('orgImg', 'thumb90')
51		String content = contentNode.text().trim()
52		String articleId = url.getPath().replace('/view/', '')
53		String media = info.span.a[0]
54		
55		assert title
56		assert content
57		assert articleId
58		assert info
59		assert media
60		assert dateCreated
61		
62		println "$title($articleId, $dateCreated, $dateModified, $media)"
63		println "$imageLink, $thumbnailLink"
64		println "$content"
65    }
66    
67    void testArticleClass() {
68    	// ?? ??
69		String articleUrl = 'http://news.nate.com/view/20091203n08304'
70		def article = Article.parseArticle(articleUrl)
71    	
72		println "$article.title($article.articleId, $article.url, $article.dateCreated, $article.dateModified, $article.media)"
73		println "$article.imageLink, $article.thumbnailLink"
74		println "$article.content"
75    }
76}