PageRenderTime 25ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/extras/unhtml.vim

http://txt2tags.googlecode.com/
Vim Script | 95 lines | 54 code | 7 blank | 34 comment | 0 complexity | 5aefa158d798e1927c831221845785bf MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
  1. " unhtml.vim - by Aurelio Jargas
  2. " - Converts HTML tags into txt2tags marks
  3. " - Part of the txt2tags <http://txt2tags.org> software
  4. "
  5. " INSTRUCTIONS
  6. " 1. Open the HTML file on Vim and execute
  7. " :so /path/to/unhtml.vim
  8. "
  9. " 2. A new <yourfile>.html.t2t will be saved.
  10. "
  11. " 3. Check the new .t2t file and correct by hand what has left.
  12. "
  13. """ [ preparing ]
  14. " ignore case
  15. set ic
  16. " join multiline tags
  17. g/<\s*\([ap]\|img\)\s*$/join
  18. g/<\s*a\s[^>]*>[^<]*$/join
  19. """ [ do it! ]
  20. " link
  21. %s,<\s*a\s[^>]*href="\(.\{-}\)"[^>]*>\(.\{-}\)<\/a>,[\2 \1],ge
  22. %s,<\s*a\s[^>]*href=\([^ >]\+\)[^>]*>\(.\{-}\)<\/a>,[\2 \1],ge
  23. " images
  24. %s,<\s*img\s[^>]*src="\(.\{-}\)"[^>]*>,[\1],ge
  25. %s,<\s*img\s[^>]*src=\([^ >]\+\)[^>]*>,[\1],ge
  26. " anchor
  27. %s,^<\s*a\s\+name=.\{-}>\(.*\)<\/a>,== \1 ==,ge
  28. " comments
  29. %s,\s*<!--\(.*\)-->,\% \1,ge
  30. /<!--/,/-->/s,^,\% ,e
  31. " paragraph
  32. %s,<\s*p\(\s[^>]*\)\=\s*>,
  33. ,ge
  34. " bar
  35. %s,<\s*hr[^>]*>,-------------------------------------------------,ge
  36. " title
  37. %s,</\=\s*h1\s*>,=,ge
  38. %s,</\=\s*h2\s*>,==,ge
  39. %s,</\=\s*h3\s*>,===,ge
  40. %s,</\=\s*h4\s*>,====,ge
  41. %s,</\=\s*h5\s*>,=====,ge
  42. %s,</\=\s*h6\s*>,=====,ge
  43. " beautifiers
  44. %s,</\=\s*code\s*>,``,ge
  45. %s,</\=\s*\(b\|strong\)\s*>,**,ge
  46. %s,</\=\s*\(i\|em\)\s*>,//,ge
  47. %s,</\=\s*u\s*>,__,ge
  48. %s,</\=\s*s\s*>,--,ge
  49. " pre
  50. %s,</\=\s*pre\s*>,
  51. ```
  52. ,ge
  53. " bullet/numbered list
  54. %s,<\s*li\s*>,- ,ge
  55. %s,</\s*li\s*>,,ge
  56. %s,<\s*[uo]l\s*>,,ge
  57. %s,</\s*[uo]l\s*>,
  58. ,ge
  59. " definition list
  60. %s,<\s*dl\s*>,,ge
  61. %s,</\s*dl\s*>,
  62. ,ge
  63. %s,<\s*dt\s*>,: ,ge
  64. %s,</\s*dt\s*>,
  65. ,ge
  66. %s,</\=\s*dd\s*>,,ge
  67. " BR is ignored
  68. %s,<\s*br\s*/*>,
  69. ,ge
  70. " trash
  71. %s,</\s*font[^>]*\s*>,,ge
  72. %s,</\s*p\s*>,,ge
  73. %s,</\s*a\s*>,,ge
  74. %s,</\=\s*blink\s*>,,ge
  75. %s,<\s*a\s\+name=[^>]*>,,ge
  76. %s,</\=\s*\(html\|body\|head\|title\)\(\s[^>]*\)\=\s*>,,ge
  77. " mmmmm, dangerous! it removes all remaining HTML tags
  78. "%s,<[^>]*>,,ge
  79. " clear just-blanks lines
  80. %s,^\s*$,,
  81. " special entities
  82. %s,&quot;,",ge
  83. %s,&amp;,\&,ge
  84. %s,&gt;,>,ge
  85. %s,&lt;,<,ge
  86. %s,&nbsp;, ,ge
  87. " save new .t2t file and turn on syntax
  88. saveas! %.t2t | set ft=txt2tags