PageRenderTime 55ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/wikipedia.tcl

http://cklib.googlecode.com/
TCL | 201 lines | 178 code | 12 blank | 11 comment | 23 complexity | 208d0839c97cc7b0e0209fc40bcf7764 MD5 | raw file
  1. encoding system utf-8
  2. ::ck::require cmd 0.7
  3. ::ck::require http 0.2
  4. ::ck::require cache 0.2
  5. namespace eval ::wikipedia {
  6. variable version 1.0
  7. variable author "Chpock <chpock@gmail.com>"
  8. namespace import -force ::ck::cmd::*
  9. namespace import -force ::ck::cache::cache
  10. namespace import -force ::ck::strings::html
  11. namespace import -force ::ck::http::http
  12. }
  13. proc ::wikipedia::init {} {
  14. cmd register wikipedia ::wikipedia::run -doc "wiki" -autousage \
  15. -bind "wiki|pedia" -bind "wp" -bind "????|?????" -flood 10:60
  16. cmd doc -link "wiki.page" "wiki" {~*!wiki* [-?????] [????] <??????>~ - ?????? ? ?????????. <?????> - ??????? ??????, <????> - ????????????? \
  17. ??????????? ????? &K(&n????????&K: &Ben&n - ??????????, &Bfr&n - ??????????&K).}
  18. cmd doc -link "wiki" "wiki.page" {~*!wiki* <??????>~~<?????>~ - ????? ????? ?????? ?? ??????. ?????? ??????? - ??. ??????? <wiki>.}
  19. config register -id "num.search" -type bool -default 0 \
  20. -desc "????????? ?? ????? ?????????? ??????." -access "m" -folder "wikipedia"
  21. config register -id "multi.count" -type int -default 1 \
  22. -desc "??????? ????? ???????? ??? ?????? ???????? ????." -access "m" -folder "wikipedia"
  23. config register -id "extmark" -type bool -default 0 \
  24. -desc "???????? ?? ?????? ?????? ?? ??????." -access "m" -folder "wikipedia"
  25. cache register -nobotnet -nobotnick -ttl 10d -maxrec 30
  26. msgreg {
  27. err.http &B?????? ????? ? ??????????&K:&R %s
  28. err.noarticle &B?????? ? ????????? ?? ???????.
  29. err.search &BWiki ?????&K: &R? ?????????, ?? ?????? ??????? ?? ???? ??????? ?????? ????????????.
  30. search &BWiki ?????%s:&n %s
  31. search.j "&n, "
  32. search.onum1 &K[&R%s-%s&K/&r%s&K]
  33. search.onum0 &K[&R%s-%s&K]
  34. search.num0 &U%2$s
  35. search.num1 %s.&U%s
  36. mark.page &B
  37. mark.new ""
  38. }
  39. }
  40. proc ::wikipedia::run { sid } {
  41. session import
  42. if { $Event == "CmdPass" } {
  43. set req [join [lrange $StdArgs 1 end] " "]
  44. if { [regexp -- {-(\d+)} $req - sindex] } {
  45. regfilter {\s*-\d+\s*} req
  46. set sindex [string trimleft $sindex 0]
  47. } {
  48. set sindex ""
  49. }
  50. if { [regexp {^-?(\w\w)\s+} $req - lang] } { regfilter {^-?\w\w\s+} req } { set lang "ru" }
  51. if { [regexp {^(.*?)~(\d+)$} $req - req WikiPage] } {
  52. if { [set WikiPage [string trimleft $WikiPage 0]] eq "" } { set WikiPage -1 }
  53. } {
  54. set WikiPage -1
  55. }
  56. regsub -all -- {\s} $req {_} req
  57. session export -grablist [list "lang" "req" "sindex" "WikiPage"]
  58. cache makeid $lang $req [set Mark "Search"]
  59. if { ![cache get HttpData] } {
  60. cache makeid $lang $req [set Mark ""]
  61. if { ![cache get HttpData] } {
  62. http run "http://${lang}.wikipedia.org/wiki/[string urlencode [encoding convertto utf-8 $req]]" -redirects 5 -return
  63. }
  64. }
  65. } elseif { $Event == "HttpResponse"} {
  66. if { $HttpStatus < 0 } {
  67. debug -err "::wikipedia:: http return code\(%s\): %s" $HttpStatus $HttpError
  68. reply -err http $HttpError
  69. }
  70. cache makeid $lang $req $Mark
  71. cache put $HttpData
  72. }
  73. if { [set_ [config get "multi.count"]] < 1 } { set_ 1 }
  74. session set CmdReplyParam [list "-multi" "-multi-max" $_]
  75. regfilter {^.+?<!-- start content -->[\s\r\n]*} HttpData
  76. regfilter {[\s\r\n]*<!-- end content -->.+$} HttpData
  77. # ?????? ?? ??? ????????? ??????
  78. if { $Mark eq "Search" && [regexp {<!--\squerying\s[^>]+\s-->(.+)$} $HttpData - HttpData] } {
  79. if { ![regexp {<strong>\s*\D+(\d+)\D+(\d+)\D+(\d+)\D*</strong>} $HttpData - r1 r2 r3] } {
  80. reply -err search
  81. }
  82. # ???????? ???? ?????????? ??????
  83. regfilter {^.+?<ul>} HttpData
  84. regfilter {</ul>.+$} HttpData
  85. set result [list]
  86. set resultraw [list]
  87. set i 0
  88. while { [regexp {<li\s+[^>]+>\s*<a\s+[^>]+>([^<]+)</a>(.*)$} $HttpData - _ HttpData] } {
  89. lappend resultraw $_
  90. lappend result [cformat search.num[config get num.search] [incr i] $_]
  91. }
  92. if { $sindex eq "" } {
  93. if { [set r4 $r2] > [llength $resultraw] } { set r4 [llength $resultraw] }
  94. if { $r2 == $r3 } {
  95. set resultnum [cformat search.onum0 $r1 $r4]
  96. } {
  97. set resultnum [cformat search.onum1 $r1 $r4 $r3]
  98. }
  99. reply -noperson -return -uniq search $resultnum [cjoin $result search.j]
  100. }
  101. if { $sindex > [llength $resultraw] } { set sindex [llength $resultraw] }
  102. set req [lindex $resultraw [incr sindex -1]]
  103. regsub -all -- {\s} $req {_} req
  104. session export -grab "req"
  105. cache makeid $lang $req [set Mark ""]
  106. if { ![cache get HttpData] } {
  107. http run "http://${lang}.wikipedia.org/wiki/[string urlencode [encoding convertto utf-8 $req]]" -redirects 5 -return
  108. }
  109. regfilter {^.+?<!-- start content -->[\s\r\n]*} HttpData
  110. regfilter {[\s\r\n]*<!-- end content -->.+$} HttpData
  111. }
  112. # ??????? html ????????
  113. regfilter -all {<!-- .*? -->} HttpData
  114. # ???????? ???????, ??? ????? ??????????
  115. regfilter -all {<table .+?</table>} HttpData
  116. # ???????? ?????????
  117. regfilter -all {<h2>.+?</h2>} HttpData
  118. regfilter -all {<h3>.+?</h3>} HttpData
  119. # ??????? ?????????
  120. regfilter -all {<div [^>]+ class="NavFrame">.+$} HttpData
  121. # ?????? ???? "???????"
  122. regfilter -all {<sup>\[.+?\]</sup>} HttpData
  123. # ?????? ???? ??????
  124. regfilter -all {<sup[^>]+><a[^>]+>\[\d+\]</a></sup>} HttpData
  125. # ??????? ?????? ????????
  126. if { [regexp {^\s*<ul>(.*?)</ul>\s*<p>(.*)$} $HttpData - _ HttpData] } {
  127. # ??? ????????? ????? ?????? ("????????")
  128. set HttpData "$_$HttpData"
  129. } {
  130. regfilter -- {.*?<p>} HttpData
  131. }
  132. # ????????? ?????????? ?? ??????
  133. if { [string first "=\"noarticletext" $HttpData] != -1 } {
  134. if { $Mark ne "Search" } {
  135. if { [regexp {<a href="(/wiki/[^/]+:Search/[^"]+)} $HttpData - newurl] } {
  136. http run "http://${lang}.wikipedia.org$newurl" -redirects 5 -return -mark "Search"
  137. }
  138. }
  139. reply -err noarticle
  140. }
  141. # ???????? ???????? div'?
  142. regfilter -all {<div class="notice noprint".*?</div>} HttpData
  143. regfilter -all {<div class="floatleft".*?</div>} HttpData
  144. regfilter -all {<div class="notice metadata".*?</div>} HttpData
  145. regsub -all -- {&#160;} $HttpData { } HttpData
  146. set extmark [config get "extmark"]
  147. set nowM [set nowB [set nowU 0]]
  148. set mark_page [rawformat mark.page]
  149. set mark_new [rawformat mark.new]
  150. html parse -stripspace -stripbadchar \
  151. -tag {
  152. if { $_tag eq "b" } {
  153. append _parsed {&L}
  154. set nowB $_tag_open
  155. } elseif { $_tag eq "i" } {
  156. append _parsed {&U}
  157. set nowU $_tag_open
  158. } elseif { $_tag eq "li" && $_tag_open } {
  159. append _parsed {* }
  160. } elseif { $extmark && $_tag eq "a" } {
  161. if { $nowM && !$_tag_open } {
  162. set nowM 0
  163. append _parsed {&n}
  164. if { $nowB } { append _parsed {&L} }
  165. if { $nowU } { append _parsed {&U} }
  166. } elseif { !$nowM && $_tag_open } {
  167. if { $mark_new ne "" && [lsearch -exact [split $_tag_param { }] {class="new"}] != -1 } {
  168. set nowM 1
  169. append _parsed $mark_new
  170. } elseif { $mark_page ne "" && [regexp {href="/wiki/[^:"]+""?} $_tag_param] } {
  171. set nowM 1
  172. append _parsed $mark_page
  173. }
  174. }
  175. }
  176. } \
  177. -text {
  178. append _parsed [cquote $_text]
  179. } \
  180. -spec {
  181. append _parsed [cquote $_replace]
  182. } $HttpData
  183. if { $WikiPage != -1 } {
  184. session set CmdReplyParam [list "-multi" "-multi-only" [incr WikiPage -1]]
  185. }
  186. reply -uniq -noperson [cmark $_parsed]
  187. }