/ShowMeTheBestReply/test/unit/ArticleTests.groovy

http://showmethebestreply.googlecode.com/ · Groovy · 76 lines · 60 code · 14 blank · 2 comment · 10 complexity · 77c28d761fce41f9fc79e381aec2094e MD5 · raw file

  1. import grails.test.*
  2. import groovyx.net.http.HTTPBuilder
  3. import static groovyx.net.http.Method.GET
  4. import static groovyx.net.http.Method.POST
  5. import static groovyx.net.http.ContentType.TEXT
  6. import static groovyx.net.http.ContentType.HTML
  7. class ArticleTests extends GrailsUnitTestCase {
  8. protected void setUp() {
  9. super.setUp()
  10. }
  11. protected void tearDown() {
  12. super.tearDown()
  13. }
  14. // ?? ?? ??
  15. void testParsingArticle() {
  16. String articleUrl = 'http://news.nate.com/view/20091203n08304'
  17. def url = new URL(articleUrl)
  18. def articleReader = new HTTPBuilder(url.getProtocol() + '://' + url.getHost()).request(GET,TEXT) {
  19. uri.path = url.getPath()
  20. }
  21. assert articleReader instanceof Reader // response data is buffered in-memory
  22. def slurper = new XmlSlurper(new org.ccil.cowan.tagsoup.Parser())
  23. def html = slurper.parse(articleReader)
  24. def article = html.body.'**'.find{ it.name() == 'div' && it.@id == 'articleView' }
  25. def info = article.p.find{ it.@class == 'articleInfo' }
  26. def dateCreated
  27. def dateModified
  28. try {
  29. dateCreated = Date.parse('yyyy-MM-dd hh:mm', info.span.find{it.@class == 'firstDate'}.em.text())
  30. } catch(java.text.ParseException e) {}
  31. try {
  32. dateModified = Date.parse('yyyy-MM-dd hh:mm', info.span.find{it.@class == 'lastDate'}.em.text())
  33. } catch(java.text.ParseException e) {}
  34. def contentNode = article.div.find{ it.@id == 'articleContetns' }.table.tr.td
  35. contentNode.'**'.findAll{ it.name() == 'br' }.each{ br -> br.replaceBody('\r\n') }
  36. contentNode.'**'.findAll{ it.name() == 'img' }.each{ img -> img.replaceBody("<img src='${img.@src.text()}'>") }
  37. contentNode.'**'.findAll{ it.name() == 'script' }.each { script -> script.replaceBody('') }
  38. String title = article.h3
  39. String imageLink = contentNode.div.span.img.it.@src.text()
  40. String thumbnailLink = imageLink.replace('orgImg', 'thumb90')
  41. String content = contentNode.text().trim()
  42. String articleId = url.getPath().replace('/view/', '')
  43. String media = info.span.a[0]
  44. assert title
  45. assert content
  46. assert articleId
  47. assert info
  48. assert media
  49. assert dateCreated
  50. println "$title($articleId, $dateCreated, $dateModified, $media)"
  51. println "$imageLink, $thumbnailLink"
  52. println "$content"
  53. }
  54. void testArticleClass() {
  55. // ?? ??
  56. String articleUrl = 'http://news.nate.com/view/20091203n08304'
  57. def article = Article.parseArticle(articleUrl)
  58. println "$article.title($article.articleId, $article.url, $article.dateCreated, $article.dateModified, $article.media)"
  59. println "$article.imageLink, $article.thumbnailLink"
  60. println "$article.content"
  61. }
  62. }