/spec/rewriter_spec.rb

https://github.com/marshluca/twitter-text-rb · Ruby · 558 lines · 442 code · 113 blank · 3 comment · 95 complexity · f0369c2f1890edfa5b78985965fcb567 MD5 · raw file

  1. # encoding: UTF-8
  2. require 'spec_helper'
  3. describe Twitter::Rewriter do
  4. def original_text; end
  5. def url; end
  6. def block(*args)
  7. if Array === @block_args
  8. unless Array === @block_args.first
  9. @block_args = [@block_args]
  10. end
  11. @block_args << args
  12. else
  13. @block_args = args
  14. end
  15. "[rewritten]"
  16. end
  17. describe "rewrite usernames" do #{{{
  18. before do
  19. @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
  20. end
  21. context "username preceded by a space" do
  22. def original_text; "hello @jacob"; end
  23. it "should be rewritten" do
  24. @block_args.should == ["@", "jacob", nil]
  25. @rewritten_text.should == "hello [rewritten]"
  26. end
  27. end
  28. context "username at beginning of line" do
  29. def original_text; "@jacob you're cool"; end
  30. it "should be rewritten" do
  31. @block_args.should == ["@", "jacob", nil]
  32. @rewritten_text.should == "[rewritten] you're cool"
  33. end
  34. end
  35. context "username preceded by word character" do
  36. def original_text; "meet@the beach"; end
  37. it "should not be rewritten" do
  38. @block_args.should be_nil
  39. @rewritten_text.should == "meet@the beach"
  40. end
  41. end
  42. context "username preceded by non-word character" do
  43. def original_text; "great.@jacob"; end
  44. it "should be rewritten" do
  45. @block_args.should == ["@", "jacob", nil]
  46. @rewritten_text.should == "great.[rewritten]"
  47. end
  48. end
  49. context "username containing non-word characters" do
  50. def original_text; "@jacob&^$%^"; end
  51. it "should be rewritten" do
  52. @block_args.should == ["@", "jacob", nil]
  53. @rewritten_text.should == "[rewritten]&^$%^"
  54. end
  55. end
  56. context "username over twenty characters" do
  57. def original_text
  58. @twenty_character_username = "zach" * 5
  59. "@" + @twenty_character_username + "1"
  60. end
  61. it "should be rewritten" do
  62. @block_args.should == ["@", @twenty_character_username, nil]
  63. @rewritten_text.should == "[rewritten]1"
  64. end
  65. end
  66. context "username followed by japanese" do
  67. def original_text; "@jacobの"; end
  68. it "should be rewritten" do
  69. @block_args.should == ["@", "jacob", nil]
  70. @rewritten_text.should == "[rewritten]の"
  71. end
  72. end
  73. context "username preceded by japanese" do
  74. def original_text; "あ@jacob"; end
  75. it "should be rewritten" do
  76. @block_args.should == ["@", "jacob", nil]
  77. @rewritten_text.should == "あ[rewritten]"
  78. end
  79. end
  80. context "username surrounded by japanese" do
  81. def original_text; "あ@jacobの"; end
  82. it "should be rewritten" do
  83. @block_args.should == ["@", "jacob", nil]
  84. @rewritten_text.should == "あ[rewritten]の"
  85. end
  86. end
  87. context "username using full-width at-sign" do
  88. def original_text
  89. "#{[0xFF20].pack('U')}jacob"
  90. end
  91. it "should be rewritten" do
  92. @block_args.should == ["@", "jacob", nil]
  93. @rewritten_text.should == "[rewritten]"
  94. end
  95. end
  96. end #}}}
  97. describe "rewrite lists" do #{{{
  98. before do
  99. @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
  100. end
  101. context "slug preceded by a space" do
  102. def original_text; "hello @jacob/my-list"; end
  103. it "should be rewritten" do
  104. @block_args.should == ["@", "jacob", "/my-list"]
  105. @rewritten_text.should == "hello [rewritten]"
  106. end
  107. end
  108. context "username followed by a slash but no list" do
  109. def original_text; "hello @jacob/ my-list"; end
  110. it "should not be rewritten" do
  111. @block_args.should == ["@", "jacob", nil]
  112. @rewritten_text.should == "hello [rewritten]/ my-list"
  113. end
  114. end
  115. context "empty username followed by a list" do
  116. def original_text; "hello @/my-list"; end
  117. it "should not be rewritten" do
  118. @block_args.should be_nil
  119. @rewritten_text.should == "hello @/my-list"
  120. end
  121. end
  122. context "list slug at beginning of line" do
  123. def original_text; "@jacob/my-list"; end
  124. it "should be rewritten" do
  125. @block_args.should == ["@", "jacob", "/my-list"]
  126. @rewritten_text.should == "[rewritten]"
  127. end
  128. end
  129. context "username preceded by alpha-numeric character" do
  130. def original_text; "meet@jacob/my-list"; end
  131. it "should not be rewritten" do
  132. @block_args.should be_nil
  133. @rewritten_text.should == "meet@jacob/my-list"
  134. end
  135. end
  136. context "username preceded by non-word character" do
  137. def original_text; "great.@jacob/my-list"; end
  138. it "should be rewritten" do
  139. @block_args.should == ["@", "jacob", "/my-list"]
  140. @rewritten_text.should == "great.[rewritten]"
  141. end
  142. end
  143. context "username containing non-word characters" do
  144. def original_text; "@jacob/my-list&^$%^"; end
  145. it "should be rewritten" do
  146. @block_args.should == ["@", "jacob", "/my-list"]
  147. @rewritten_text.should == "[rewritten]&^$%^"
  148. end
  149. end
  150. context "username over twenty characters" do
  151. def original_text
  152. @twentyfive_character_list = "a" * 25
  153. "@jacob/#{@twentyfive_character_list}12345"
  154. end
  155. it "should be rewritten" do
  156. @block_args.should == ["@", "jacob", "/#{@twentyfive_character_list}"]
  157. @rewritten_text.should == "[rewritten]12345"
  158. end
  159. end
  160. end #}}}
  161. describe "rewrite hashtags" do #{{{
  162. before do
  163. @rewritten_text = Twitter::Rewriter.rewrite_hashtags(original_text, &method(:block))
  164. end
  165. context "with an all numeric hashtag" do
  166. def original_text; "#123"; end
  167. it "should not be rewritten" do
  168. @block_args.should be_nil
  169. @rewritten_text.should == "#123"
  170. end
  171. end
  172. context "with a hashtag with alphanumeric characters" do
  173. def original_text; "#ab1d"; end
  174. it "should be rewritten" do
  175. @block_args.should == ["#", "ab1d"]
  176. @rewritten_text.should == "[rewritten]"
  177. end
  178. end
  179. context "with a hashtag with underscores" do
  180. def original_text; "#a_b_c_d"; end
  181. it "should be rewritten" do
  182. @block_args.should == ["#", "a_b_c_d"]
  183. @rewritten_text.should == "[rewritten]"
  184. end
  185. end
  186. context "with a hashtag that is preceded by a word character" do
  187. def original_text; "ab#cd"; end
  188. it "should not be rewritten" do
  189. @block_args.should be_nil
  190. @rewritten_text.should == "ab#cd"
  191. end
  192. end
  193. context "with a page anchor in a url" do
  194. def original_text; "Here's my url: http://foobar.com/#home"; end
  195. it "should not link the hashtag" do
  196. @block_args.should be_nil
  197. @rewritten_text.should == "Here's my url: http://foobar.com/#home"
  198. end
  199. end
  200. context "with a hashtag that starts with a number but has word characters" do
  201. def original_text; "#2ab"; end
  202. it "should be rewritten" do
  203. @block_args.should == ["#", "2ab"]
  204. @rewritten_text.should == "[rewritten]"
  205. end
  206. end
  207. context "with multiple valid hashtags" do
  208. def original_text; "I'm frickin' awesome #ab #cd #ef"; end
  209. it "rewrites each hashtag" do
  210. @block_args.should == [["#", "ab"], ["#", "cd"], ["#", "ef"]]
  211. @rewritten_text.should == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]"
  212. end
  213. end
  214. context "with a hashtag preceded by a ." do
  215. def original_text; "ok, great.#abc"; end
  216. it "should be rewritten" do
  217. @block_args.should == ["#", "abc"]
  218. @rewritten_text.should == "ok, great.[rewritten]"
  219. end
  220. end
  221. context "with a hashtag preceded by a &" do
  222. def original_text; "&#nbsp;"; end
  223. it "should not be rewritten" do
  224. @block_args.should be_nil
  225. @rewritten_text.should == "&#nbsp;"
  226. end
  227. end
  228. context "with a hashtag that ends in an !" do
  229. def original_text; "#great!"; end
  230. it "should be rewritten, but should not include the !" do
  231. @block_args.should == ["#", "great"];
  232. @rewritten_text.should == "[rewritten]!"
  233. end
  234. end
  235. context "with a hashtag followed by Japanese" do
  236. def original_text; "#twj_devの"; end
  237. it "should be rewritten" do
  238. @block_args.should == ["#", "twj_devの"];
  239. @rewritten_text.should == "[rewritten]"
  240. end
  241. end
  242. context "with a hashtag preceded by a full-width space" do
  243. def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
  244. it "should be rewritten" do
  245. @block_args.should == ["#", "twj_dev"];
  246. @rewritten_text.should == " [rewritten]"
  247. end
  248. end
  249. context "with a hashtag followed by a full-width space" do
  250. def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
  251. it "should be rewritten" do
  252. @block_args.should == ["#", "twj_dev"];
  253. @rewritten_text.should == "[rewritten] "
  254. end
  255. end
  256. context "with a hashtag using full-width hash" do
  257. def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
  258. it "should be rewritten" do
  259. @block_args.should == ["#", "twj_dev"];
  260. @rewritten_text.should == "[rewritten]"
  261. end
  262. end
  263. context "with a hashtag containing an accented latin character" do
  264. def original_text
  265. # the hashtag is #éhashtag
  266. "##{[0x00e9].pack('U')}hashtag"
  267. end
  268. it "should be rewritten" do
  269. @block_args.should == ["#", "éhashtag"];
  270. @rewritten_text.should == "[rewritten]"
  271. end
  272. end
  273. end #}}}
  274. describe "rewrite urls" do #{{{
  275. def url; "http://www.google.com"; end
  276. before do
  277. @rewritten_text = Twitter::Rewriter.rewrite_urls(original_text, &method(:block))
  278. end
  279. context "when embedded in plain text" do
  280. def original_text; "On my search engine #{url} I found good links."; end
  281. it "should be rewritten" do
  282. @block_args.should == [url];
  283. @rewritten_text.should == "On my search engine [rewritten] I found good links."
  284. end
  285. end
  286. context "when surrounded by Japanese;" do
  287. def original_text; "いまなにしてる#{url}いまなにしてる"; end
  288. it "should be rewritten" do
  289. @block_args.should == [url];
  290. @rewritten_text.should == "いまなにしてる[rewritten]いまなにしてる"
  291. end
  292. end
  293. context "with a path surrounded by parentheses;" do
  294. def original_text; "I found a neatness (#{url})"; end
  295. it "should be rewritten" do
  296. @block_args.should == [url];
  297. @rewritten_text.should == "I found a neatness ([rewritten])"
  298. end
  299. context "when the URL ends with a slash;" do
  300. def url; "http://www.google.com/"; end
  301. it "should be rewritten" do
  302. @block_args.should == [url];
  303. @rewritten_text.should == "I found a neatness ([rewritten])"
  304. end
  305. end
  306. context "when the URL has a path;" do
  307. def url; "http://www.google.com/fsdfasdf"; end
  308. it "should be rewritten" do
  309. @block_args.should == [url];
  310. @rewritten_text.should == "I found a neatness ([rewritten])"
  311. end
  312. end
  313. end
  314. context "when path contains parens" do
  315. def original_text; "I found a neatness (#{url})"; end
  316. it "should be rewritten" do
  317. @block_args.should == [url];
  318. @rewritten_text.should == "I found a neatness ([rewritten])"
  319. end
  320. context "wikipedia" do
  321. def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
  322. it "should be rewritten" do
  323. @block_args.should == [url];
  324. @rewritten_text.should == "I found a neatness ([rewritten])"
  325. end
  326. end
  327. context "IIS session" do
  328. def url; "http://msdn.com/S(deadbeef)/page.htm"; end
  329. it "should be rewritten" do
  330. @block_args.should == [url];
  331. @rewritten_text.should == "I found a neatness ([rewritten])"
  332. end
  333. end
  334. context "unbalanced parens" do
  335. def url; "http://example.com/i_has_a_("; end
  336. it "should be rewritten" do
  337. @block_args.should == ["http://example.com/i_has_a_"];
  338. @rewritten_text.should == "I found a neatness ([rewritten]()"
  339. end
  340. end
  341. context "balanced parens with a double quote inside" do
  342. def url; "http://foo.bar/foo_(\")_bar" end
  343. it "should be rewritten" do
  344. @block_args.should == ["http://foo.bar/foo_"];
  345. @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
  346. end
  347. end
  348. context "balanced parens hiding XSS" do
  349. def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
  350. it "should be rewritten" do
  351. @block_args.should == ["http://x.xx/"];
  352. @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
  353. end
  354. end
  355. end
  356. context "when preceded by a :" do
  357. def original_text; "Check this out @hoverbird:#{url}"; end
  358. it "should be rewritten" do
  359. @block_args.should == [url];
  360. @rewritten_text.should == "Check this out @hoverbird:[rewritten]"
  361. end
  362. end
  363. context "with a URL ending in allowed punctuation" do
  364. it "does not consume ending punctuation" do
  365. %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
  366. Twitter::Rewriter.rewrite_urls("#{url}#{char}") do |url|
  367. url.should == url; "[rewritten]"
  368. end.should == "[rewritten]#{char}"
  369. end
  370. end
  371. end
  372. context "with a URL preceded in forbidden characters" do
  373. it "should not be rewritten" do
  374. %w| \ ' / ! = |.each do |char|
  375. Twitter::Rewriter.rewrite_urls("#{char}#{url}") do |url|
  376. "[rewritten]" # should not be called here.
  377. end.should == "#{char}#{url}"
  378. end
  379. end
  380. end
  381. context "when embedded in a link tag" do
  382. def original_text; "<link rel='true'>#{url}</link>"; end
  383. it "should be rewritten" do
  384. @block_args.should == [url];
  385. @rewritten_text.should == "<link rel='true'>[rewritten]</link>"
  386. end
  387. end
  388. context "with multiple URLs" do
  389. def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
  390. it "should autolink each one" do
  391. @block_args.should == [["http://www.links.org"], ["http://www.foo.org"]];
  392. @rewritten_text.should == "[rewritten] link at start of page, link at end [rewritten]"
  393. end
  394. end
  395. context "with multiple URLs in different formats" do
  396. def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
  397. it "should autolink each one, in the proper order" do
  398. @block_args.should == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]];
  399. @rewritten_text.should == "[rewritten] [rewritten] [rewritten]"
  400. end
  401. end
  402. context "with a URL having a long TLD" do
  403. def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
  404. it "should autolink it" do
  405. @block_args.should == ["http://golem.mobi/0912/71607.html"]
  406. @rewritten_text.should == "Yahoo integriert Facebook [rewritten]"
  407. end
  408. end
  409. context "with a url lacking the protocol" do
  410. def original_text; "I like www.foobar.com dudes"; end
  411. it "does not link at all" do
  412. @block_args.should be_nil
  413. @rewritten_text.should == "I like www.foobar.com dudes"
  414. end
  415. end
  416. context "with a @ in a URL" do
  417. context "with XSS attack" do
  418. def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
  419. it "should not allow XSS follwing @" do
  420. @block_args.should == ["http://x.xx/"]
  421. @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
  422. end
  423. end
  424. context "with a username not followed by a /" do
  425. def original_text; "http://example.com/@foobar"; end
  426. it "should link small url and username" do
  427. @block_args.should == ["http://example.com/"]
  428. @rewritten_text.should == "[rewritten]@foobar"
  429. end
  430. end
  431. context "with a username followed by a /" do
  432. def original_text; "http://example.com/@foobar/"; end
  433. it "should not link the username but link full url" do
  434. @block_args.should == ["http://example.com/@foobar/"]
  435. @rewritten_text.should == "[rewritten]"
  436. end
  437. end
  438. end
  439. end #}}}
  440. end
  441. # vim: foldmethod=marker