/test/yelp-benchmark.rb

http://github.com/fizx/parsley · Ruby · 53 lines · 46 code · 7 blank · 0 comment · 0 complexity · 72ca30d02e46bfc63393792dfe296f0b MD5 · raw file

  1. require "rubygems"
  2. require "nokogiri"
  3. require "hpricot"
  4. require "parsley"
  5. require "benchmark"
  6. require "pp"
  7. YELP_HTML = File.dirname(__FILE__) + "/yelp.html"
  8. def noko
  9. parse Nokogiri.Hpricot(File.open(YELP_HTML))
  10. end
  11. def hpri
  12. parse Hpricot(File.open(YELP_HTML))
  13. end
  14. def parse(doc)
  15. out = {}
  16. out["name"] = (doc / "h1").first.inner_text
  17. out["phone"] = (doc / "#bizPhone").first.inner_text
  18. out["address"] = (doc / "address").first.inner_text
  19. out["reviews"] = (doc / ".nonfavoriteReview").map do |node|
  20. review = {}
  21. review["date"] = (node / ".ieSucks .smaller").first.inner_text
  22. review["user_name"] = (node / ".reviewer_info a").first.inner_text
  23. review["comment"] = (node / ".review_comment").first.inner_text
  24. review
  25. end
  26. end
  27. def pars
  28. parselet = Parsley.new({
  29. "name" => "h1",
  30. "phone" => "#bizPhone",
  31. "address" => "address",
  32. "reviews(.nonfavoriteReview)" => [
  33. {
  34. "date" => ".ieSucks .smaller",
  35. "user_name" => ".reviewer_info a",
  36. "comment" => ".review_comment"
  37. }
  38. ]
  39. })
  40. parselet.parse(:file => YELP_HTML)
  41. end
  42. Benchmark.bm do |x|
  43. x.report("nokogiri: ") { 3.times { noko } }
  44. x.report("hpricot: ") { 3.times { hpri } }
  45. x.report("parsley: ") { 3.times { pars } }
  46. end