PageRenderTime 62ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/dic.rb

https://github.com/yifili09/kk-irc-bot
Ruby | 1227 lines | 1096 code | 46 blank | 85 comment | 55 complexity | fcc2ccb94705d42f0f836a4e5fae04f8 MD5 | raw file
  1. #!/usr/bin/env ruby
  2. # -*- coding: utf-8 -*-
  3. # Sevkme@gmail.com
  4. $: << '.'
  5. $: << 'lib'
  6. $:.uniq!
  7. require 'filesize'
  8. load 'log.rb'
  9. load 'utf.rb'
  10. load 'irc_user.rb'
  11. load 'color.rb'
  12. load 'plugin.rb' rescue log
  13. require 'json'
  14. def nil.empty?
  15. true
  16. end
  17. class String
  18. def slice_u!(n)
  19. self.force_encoding('ascii-8bit')
  20. self.slice!(n) #Deletes the specified portion from str
  21. self.force_encoding('utf-8')
  22. end
  23. def uri_decode
  24. URI.decode self
  25. end
  26. def uri_encode
  27. URI.encode self
  28. end
  29. def decode64
  30. Base64.decode64 self
  31. end
  32. alias unbase64 decode64
  33. alias ub64 decode64
  34. def encode64
  35. Base64.encode64 self
  36. end
  37. alias base64 encode64
  38. def rot13
  39. self.tr "A-Za-z", "N-ZA-Mn-za-m"
  40. end
  41. #"\343\213\206"
  42. def ii(s=['☘',"\322\211"][rand(2)])
  43. self.split(//u).join(s)
  44. end
  45. def addTimCh
  46. self << Time.now.hm.to_s
  47. end
  48. def md5
  49. Digest::MD5.digest self
  50. end
  51. def md5hex
  52. Digest::MD5.hexdigest self
  53. end
  54. #整理html里的 &nbsp; 等转义串
  55. def unescapeHTML
  56. #CGI.unescapeHTML(self) rescue self
  57. HTMLEntities.new.decode(self) rescue self
  58. end
  59. alias dir public_methods
  60. end
  61. load 'ipwry.rb'
  62. begin
  63. #apt-get install rubygems
  64. require 'rubygems' # 以便引用相关的库, 兼容1.8的写法
  65. #gem install htmlentities
  66. require 'htmlentities'
  67. #gem install mechanize
  68. require 'mechanize'
  69. load 'color.rb'
  70. load 'showpic.rb'
  71. rescue LoadError
  72. puts $!.message
  73. s="载入库错误, 看 README \n"
  74. s = s.utf8_to_gb if win_platform?
  75. puts s
  76. end
  77. begin
  78. require 'charguess.so'
  79. rescue LoadError
  80. #p 'charguess.so not found'
  81. end
  82. require 'time'
  83. require 'timeout'
  84. require 'open-uri'
  85. require 'uri'
  86. require 'net/http'
  87. require 'rss'
  88. require 'base64'
  89. require 'digest'
  90. require 'resolv'
  91. require 'yaml'
  92. require 'pp'
  93. require 'mechanize'
  94. #require 'mathn'
  95. load 'do_as_rb19.rb'
  96. #load 'color.rb'
  97. #todo http://www.sharej.com/ 下载查询
  98. #todo http://netkiller.hikz.com/book/linux/ linux资料查询
  99. $old_feed_date = nil unless defined?$old_feed_date
  100. $_time=0 if not defined?$_time
  101. $kick_info = "请勿Flood,超过6行请贴至paste.ubuntu.com ."
  102. Ver='v0.52' unless defined? Ver
  103. Help = "我是 kk-irc-bot Ver:#{Ver} ㉿ s 新手资料 g google d define `new 取论坛新贴 `deb 包查询 tt 翻译 `t 词典 > s 计算s的值 > gg 公告 > b 服务器状态 `address 查某人地址 `host 查域名 `i 机器人源码. 末尾加入|重定向,如 g ubuntu | nick" unless defined? Help
  104. def help
  105. Help
  106. end
  107. UserAgent="kk-bot/#{Ver} (X11; U; Linux i686; en-US; rv:1.9.1.2) Gecko/20090810 Ubuntu/#{`lsb_release -r`.split(/\s/)[1] rescue ''} (ub) kk-bot/#{Ver}" unless defined? UserAgent
  108. CN_re = /(?:\xe4[\xb8-\xbf][\x80-\xbf]|[\xe5-\xe8][\x80-\xbf][\x80-\xbf]|\xe9[\x80-\xbd][\x80-\xbf]|\xe9\xbe[\x80-\xa5])+/n unless defined? CN_re
  109. $re_http=/(....s?)(:\/\/.+)\s?$/iu#类似 http:// https:// ed2k://
  110. # /....s?:\/\/\S*?[^\s<>\\\[\]\{\}\^\`\~\|#":]/i
  111. $min_next_say = Time.now
  112. $Lsay=Time.now; $Lping=Time.now
  113. $last_save = Time.now - 110
  114. $proxy_status_ok = false if not defined? $proxy_status_ok
  115. ChFreePlay=/\-ot|arch|fire/i unless defined? ChFreePlay
  116. $botlist=/fity|badgirl|pocoyo.?.?|iphone|\^?[Ou]_[ou]|MadGirl/i
  117. $botlist_Code=/badgirl|\^?[Ou]_[ou]/i
  118. $botlist_ub_feed=/crazyghost|\^?[Ou]_[ou]/i
  119. $urlList = $tiList = /ubunt|linux|unix|debi|kernel|redhat|suse|gentoo|fedora|java|c\+\+|python|ruby|perl|Haskell|lisp|flash|vim|emacs|github|gnome|kde|x11|gtk|qt|xorg|wine|sql|wikipedia|source|android|xterm|progra|google|devel|sed|awk|regex|solaris|\.org\/|编译/i
  120. $urlProxy=/.|\.ubuntu\.(org|com)\.cn|\.archive\.org|linux\.org|ubuntuforums\.org|\.wikipedia\.org|\.twitter\.com|\.youtube\.com|\.haskell\.org/i
  121. $urlNoMechanize=/.|google|\.cnbeta\.com|combatsim\.bbs\.net\/bbs|wikipedia\.org|wiki\.ubuntu/i
  122. $my_s= '我的源码: http://github.com/sevk/kk-irc-bot/ '
  123. #字符串编码集猜测
  124. def guess_charset(str)
  125. #s=str.force_encoding("ASCII-8BIT")
  126. #s=str.clone
  127. return if str.empty?
  128. s=str.gsub(/[\x0-\x7f]/,'') rescue str.clone
  129. return if s.bytesize < 6
  130. while s.bytesize < 25
  131. s << s
  132. end
  133. return guess(s) rescue nil
  134. end
  135. #sudo gem install charguess
  136. #require "charguess"
  137. if defined?CharGuess
  138. def guess(s)
  139. c=CharGuess::guess(s)
  140. if c =~ /gbk|gb2312/i
  141. return 'GB18030'
  142. end
  143. c
  144. end
  145. else
  146. #第二种字符集猜测库
  147. begin
  148. require 'rchardet' if RUBY_VERSION < '1.9'
  149. require 'rchardet19' if RUBY_VERSION > '1.9'
  150. rescue LoadError
  151. s="载入库错误,命令:\napt-get install rubygems; #安装ruby库管理器 \ngem install rchardet; #安装字符猜测库\n否则字符编码检测功能可能失效. \n\n"
  152. s = s.utf8_to_gb if win_platform?
  153. puts s
  154. puts $!.message + $@[0]
  155. end
  156. def guess(s)
  157. CharDet.detect(s)['encoding'].upcase
  158. end
  159. end
  160. def reload_all
  161. load 'dic.rb'
  162. loadDic
  163. Thread.list.each {|x| puts "#{x.inspect}: #{x[:name]}" }
  164. rescue Exception
  165. log
  166. rescue
  167. log
  168. end
  169. def loadDic
  170. $str1 = IO.read('U.txt') rescue ''
  171. puts 'Dic load [ok]'
  172. end
  173. #保存缓存的users
  174. def saveu
  175. return if Time.now - $last_save < 120 rescue nil
  176. $last_save = Time.now
  177. File.open("_#{ARGV[0]}.yaml","w") do |o|
  178. YAML.dump($u, o)
  179. end
  180. puts ' save u ok'.red
  181. end
  182. def safe_eval(str)
  183. str.force_encoding('utf-8')
  184. Thread.new {
  185. if str !~ $eval_black_list
  186. $SAFE=4
  187. end
  188. begin
  189. eval(str).to_s.gsub(/\s+/,' ')
  190. rescue Exception
  191. $!
  192. rescue
  193. $!
  194. end
  195. }.value
  196. end
  197. def safe(level)
  198. result = nil
  199. Thread.start {
  200. Thread.current[:name]= 'safe eval thread'
  201. $SAFE = level
  202. p $SAFE
  203. result = yield
  204. }.join
  205. return result
  206. rescue
  207. log
  208. end
  209. #取ubuntu.com.cn的 feed.
  210. def get_feed(url= 'http://forum.ubuntu.org.cn/feed.php',not_re = true)
  211. p 'in get_feed'
  212. begin
  213. feed = Timeout.timeout(11) {
  214. RSS::Parser.parse(url)
  215. }
  216. rescue Timeout::Error
  217. log ''
  218. return $!
  219. end
  220. $ub=nil
  221. p feed if feed.class != RSS::Atom::Feed
  222. #return if feed.empty?
  223. feed.items.each { |i|
  224. link = i.link.href.gsub(/&p=\d+#p\d+$/i,'')
  225. des = i.content.to_s
  226. #date = i.updated.content
  227. $date = link
  228. ti = i.title.content.to_s
  229. next if ti =~ /Re:/i && not_re
  230. puts i.updated.content
  231. $ub = "新 #{ti} #{link} #{des}"
  232. #p $ub
  233. break
  234. }
  235. if $old_feed_date == $date and $ub
  236. #link = feed.items[0].link.href
  237. #ti = feed.items[0].title.content
  238. ##date = feed.items[0].updated.content
  239. #$date = link
  240. #des = feed.items[0].content
  241. #$ub = "新⇨ #{ti} #{link} #{des}"
  242. $ub = ".. 逛了一下论坛,暂时无新贴.只有Re: ."
  243. $ub = '' if rand > 0.1
  244. else
  245. $old_feed_date = $date
  246. end
  247. return if $ub.empty?
  248. $ub.gsub!(/\s+/,' ')
  249. n = $ub.gsub(/<.+?>/,' ').unescapeHTML.gsub(/<.+?>/,' ')
  250. .unescapeHTML
  251. if n.size < 5
  252. p $ub
  253. p n
  254. return
  255. end
  256. return n.icolor
  257. end
  258. class String
  259. def alice_say
  260. return if self.empty?
  261. url = 'http://www.pandorabots.com/pandora/talk?botid=f5d922d97e345aa1&skin=custom_input'
  262. p 'alice say'
  263. #$uri = uri=URI.parse(url)
  264. #$uri.open(
  265. #'Accept'=>'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*',
  266. #'Accept'=>'text/html',
  267. #'Referer'=> URI.escape(url),
  268. #'Accept-Language'=>'zh-cn',
  269. #'User-Agent'=> UserAgent
  270. #)
  271. agent = Mechanize.new
  272. agent.user_agent_alias = 'Linux Mozilla'
  273. agent.max_history = 0
  274. agent.open_timeout = 12
  275. agent.read_timeout = 12
  276. agent.cookies
  277. page = agent.get(url)
  278. #form = page.form_with(:name => 'f')
  279. #form.input = 'how old are you ?'
  280. #page = agent.submit(form)
  281. page = agent.post(url,{"input"=> self } )
  282. #File.new('a.txt','wb').puts page.body
  283. page.body.match(/<em>.+:(.+)<input type/m)[1].gsub(/alice/i,' @ ')
  284. .gsub!(/<.*?>/i,'') rescue '休息一下..'
  285. end
  286. def en2zh
  287. #return self if self.force_encoding("ASCII-8BIT") =~ CN_re #有中文
  288. return self unless self.ascii_only?
  289. flg = 'auto%7czh-CN'
  290. g_tr(self,flg)
  291. end
  292. def zh2en
  293. #return self if self.force_encoding("ASCII-8BIT") !~ CN_re #无中文
  294. return self if self.ascii_only?
  295. flg = 'zh-CN%7cen'
  296. g_tr(self,flg)
  297. end
  298. end
  299. def getbody(url)
  300. p url
  301. agent = Mechanize.new
  302. agent.user_agent_alias = 'Linux Mozilla'
  303. #agent.user_agent_alias = 'Mac Safari'
  304. agent.max_history = 0
  305. agent.open_timeout = 12
  306. agent.cookies
  307. page = agent.get(url)
  308. #form = page.form_with(:name => 'f')
  309. #page = agent.post(url,{"input"=> self } )
  310. p ' get body ok '
  311. page.body
  312. end
  313. #google 全文翻译,参数可以是中文,也可以是英文.
  314. def g_tr(word,flg)
  315. word = URI.escape(word)
  316. url = "http://translate.google.com/translate_a/t?client=firefox-a&text=#{word}&langpair=#{flg}&ie=UTF-8&oe=UTF-8"
  317. uri = URI.parse(url)
  318. uri.open(
  319. 'Accept'=>'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*',
  320. 'Accept'=>'text/html',
  321. 'Referer'=> URI.escape(url)
  322. ){ |f|
  323. return f.read.match(/"trans":"(.*?)","/)[1]
  324. }
  325. end
  326. def getGoogle_tran(word)
  327. if word.force_encoding("ASCII-8BIT") =~ CN_re #有中文
  328. flg = 'zh-CN%7cen'
  329. #flg = '#auto|en|' + word ; puts '中文>英文'
  330. else
  331. flg = 'auto%7czh-CN'
  332. #flg = '#auto|zh-CN|' + word
  333. end
  334. word = URI.escape(word)
  335. #url = "http://66.249.89.100/translate_t?hl=zh-CN#{flg}"
  336. url = "http://translate.google.com/translate_a/t?client=firefox-a&text=#{word}&langpair=#{flg}&ie=UTF-8&oe=UTF-8"
  337. uri = URI.parse(url)
  338. uri.open(
  339. 'Accept'=>'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*',
  340. 'Accept'=>'text/html',
  341. 'Referer'=> URI.escape(url)
  342. #'Accept-Language'=>'zh-cn',
  343. #'Cookie' => cookie,
  344. #'Range' => 'bytes=0-8000',
  345. #'User-Agent'=> UserAgent
  346. ){
  347. |f|
  348. return f.read.match(/"trans":"(.*?)","/)[1]
  349. #re = f.read[0,5059].force_encoding('utf-8').gsub(/\s+/,' ').gb_to_utf8
  350. #re.gsub!(/<.*?>/i,'')
  351. #return re.unescapeHTML
  352. }
  353. #Net::HTTP.start('translate.google.com') {|http|
  354. #resp = http.get("/translate_a/t?client=firefox-a&text=#{word}&langpair=#{flg}&ie=UTF-8&oe=UTF-8", nil)
  355. #p resp.body
  356. #return resp.body
  357. #}
  358. end
  359. #dict.cn
  360. def dictcn(word)
  361. word = word.utf8_to_gb
  362. url = 'http://dict.cn/mini.php?q=' + word
  363. url = URI.escape(url)
  364. uri = URI.parse(url)
  365. res = nil
  366. uri.open(
  367. 'Accept'=>'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*',
  368. 'Accept'=>'text/html',
  369. 'Referer'=> URI.escape(url),
  370. 'Accept-Language'=>'zh-cn',
  371. #'Cookie' => cookie,
  372. 'Range' => 'bytes=0-9000',
  373. 'User-Agent'=> UserAgent
  374. ){ |f|
  375. re = f.read[0,5059].force_encoding('utf-8').gsub(/\s+/,' ').gb_to_utf8
  376. re.gsub!(/<script.*?<\/script>/i,'')
  377. re.gsub!(/<.*?>/i,'')
  378. re.gsub!(/.*?Define /i,'')
  379. re.gsub!(/加入生词本.*/,'')
  380. return re.unescapeHTML + ' << Dict.cn'
  381. }
  382. rescue
  383. return $!.message
  384. end
  385. #取标题,参数是url.
  386. def gettitle(url,proxy=true,mechanize=1)
  387. #p url
  388. timeout=6
  389. title = ''
  390. charset = ''
  391. flag = 0
  392. istxthtml = false
  393. if url.b =~ CN_re
  394. url = URI.encode(url)
  395. end
  396. if mechanize == 1
  397. mechanize = false if url =~ $urlNoMechanize
  398. else
  399. mechanize = true
  400. end
  401. mechanize = true if url =~ /www\.google\.com/i
  402. mechanize = true if url =~ $urlProxy
  403. mechanize = true if proxy
  404. print ' mechanize:' , mechanize , ' ' , url ,10.chr unless mechanize
  405. #用代理加快速度
  406. if mechanize
  407. #if url =~ /%[A-F0-9]/
  408. #url = URI.decode(url)
  409. #end
  410. if url =~ /^https/i
  411. agent = Mechanize.new{|a| a.ssl_version, a.verify_mode= 'SSLv3', OpenSSL::SSL::VERIFY_NONE}
  412. #agent = Mechanize.new
  413. else
  414. agent = Mechanize.new
  415. end
  416. if proxy and $proxy_status_ok
  417. agent.set_proxy($proxy_addr2,$proxy_port2)
  418. else
  419. agent.set_proxy($proxy_addr,$proxy_port)
  420. end
  421. agent.user_agent_alias = 'Linux Mozilla'
  422. agent.max_history = 0
  423. agent.open_timeout = timeout
  424. agent.read_timeout = timeout
  425. #agent.cookies
  426. #agent.auth('^k^', 'password')
  427. begin
  428. page = agent.head(url)
  429. p 'head ok' if $DEBUG
  430. type = page.header['content-type']
  431. if type =~ /image\/./i
  432. showpic(url)
  433. return
  434. end
  435. if type and type !~ /^$|text\/html/i
  436. re = page.response.select{|x| x=~/^conten/i }.to_s
  437. .gsub(/content-/i,'')
  438. return if re =~ /"length"=>"0"/i
  439. return re.gsub(/("length"=>")(\d+)"/i){ "长度=>"+Filesize.from($2+'b').pretty }
  440. end
  441. rescue Exception
  442. log ''
  443. rescue
  444. log ''
  445. end
  446. begin
  447. #p 'start agent.get' if $DEBUG
  448. page = agent.get(url)
  449. #File.new('/tmp/a.x','wb').puts page.title
  450. #File.new('/tmp/b.x','wb').puts Mechanize.new.get_file url
  451. p 'end agent.get' if $DEBUG
  452. if page.class != Mechanize::Page
  453. p 'no page'
  454. return
  455. end
  456. title = page.title
  457. puts title if $DEBUG
  458. return if title.empty?
  459. charset= guess_charset(title)
  460. charset='GB18030' if charset =~ /^gb|IBM855|windows-1252/i
  461. if charset and charset !~ /#@charset/i
  462. title = title.code_a2b(charset,@charset) rescue title
  463. end
  464. title = title.unescapeHTML.uri_decode
  465. title.gsub!(/\s+/,' ')
  466. puts title if $DEBUG
  467. return title[0,1000]
  468. rescue Exception
  469. log ''
  470. return if $!.message =~ /connection refused/
  471. rescue
  472. log ''
  473. end
  474. end
  475. #puts URI.split url
  476. print 'no mechanize , ' , "\n"
  477. tmp = begin #加入错误处理
  478. Timeout.timeout(timeout) {
  479. $uri = URI.parse(url)
  480. $uri.open(
  481. 'Accept'=>'text/html , application/*',
  482. 'Range' => 'bytes=0-8999',
  483. #'Cookie' => cookie,
  484. 'User-Agent'=> UserAgent
  485. ){ |f|
  486. p f.content_type
  487. istxthtml= f.content_type =~ /text\/html|application\//i
  488. istxthtml = false if f.content_type =~ /application\/octet-stream/i
  489. charset= f.charset # "iso-8859-1"
  490. f.read[0,8800].gsub(/\s+/,' ')
  491. }
  492. }
  493. rescue Timeout::Error
  494. #return 'time out . IN gettitle '
  495. return
  496. rescue Exception
  497. log ''
  498. rescue
  499. if $!.message =~ /Connection reset by peer/ && $proxy_status_ok
  500. p $!.message
  501. p ' need pass wall '
  502. return if proxy
  503. end
  504. #log ''
  505. return $!.message[0,100] + ' . IN gettitle'
  506. end
  507. return unless istxthtml
  508. title = tmp.match(/<title.*?>(.*?)<\/title>/i)[1] rescue nil
  509. #return if title.empty?
  510. if title.empty?
  511. p tmp
  512. if tmp.match(/meta\shttp-equiv="refresh(.*?)url=(.*?)">/i)
  513. p 'refresh..'
  514. return Timeout.timeout(7){
  515. url = $2
  516. url = "http://#{$uri.host}/#{$2}" if url !~ /^http/i
  517. gettitle(url)
  518. }
  519. end
  520. end
  521. #return if title =~ /index of/i
  522. if tmp =~ /<meta.*?charset=(.+?)["']/i
  523. charset=$1 if $1
  524. end
  525. if charset != 'UTF-8'
  526. #charset='GB18030' if charset =~ /^gb|iso-8859-/i
  527. title = title.code_a2b(charset,'UTF-8') rescue title
  528. end
  529. title = title.unescapeHTML rescue title
  530. #p title
  531. title
  532. end
  533. def gettitleA(url,from="_",proxy=true)
  534. $last_url = url
  535. ti=nil
  536. begin
  537. ti=Timeout.timeout(13){gettitle(url,proxy)}
  538. rescue Timeout::Error
  539. Thread.pass
  540. p 'get title Time out '
  541. return ['time out . IN gettitle ']
  542. end
  543. return if ti.empty?
  544. #检测是否有其它取标题机器人
  545. #
  546. return " ... ⇪ #{ti} " if ti !~ $tiList and url !~ $urlList
  547. #登录 • Ubuntu中文论坛
  548. if ti
  549. ti.gsub!(/登录 •/, '水区水贴? ')
  550. return " \x033⇪ t: #{ti}\x030" if proxy
  551. return " \x033⇪ ti: #{ti}\x030"
  552. end
  553. end
  554. def getPY(c)
  555. p 'getPY'
  556. c=' '+ c
  557. c.gsub!(/\sfirefox(.*?)\s/i,' huohuliulanqi ')
  558. c.gsub!(/\subuntu/i,' wu ban tu ')
  559. c.gsub!(/\sopen(.*?)\s/i,' ')
  560. c.gsub!(/\s(xubuntu|fedora)/i,' ')
  561. c.gsub!(/\s[A-Z](.*?)\s/,' ')
  562. if c =~ /\skubuntu/i
  563. needAddKub=true
  564. c.gsub!(/\skubuntu/i,' ')
  565. end
  566. #re = google_py(c)
  567. re = youdao_py(c)
  568. re = re + ' Kubuntu' if needAddKub==true
  569. re.gsub!(/还原/i,'换源')
  570. if re=~ CN_re#是中文才返回
  571. return re
  572. end
  573. end
  574. def encodeurl(url)
  575. URI.encode(url)
  576. end
  577. def google_py(word)
  578. p 'google_py'
  579. re=''
  580. url = 'http://www.google.com/search?hl=zh-CN&oe=UTF-8&q=' + word.strip
  581. url = encodeurl(url)
  582. url_mini = encodeurl('http://www.google.com/search?q=' + word.strip)
  583. open(url,
  584. 'Referer'=> url,
  585. 'Accept-Encoding'=>'deflate',
  586. 'User-Agent'=> UserAgent
  587. ){ |f|
  588. html=f.read.gsub(/\s+/,' ')
  589. html.match(/是不是要找.*<em>(.*?)<\/em>/i)
  590. return $1.to_s.unescapeHTML
  591. }
  592. end
  593. #拼音转中文
  594. def youdao_py(words)
  595. url = "http://www.youdao.com/search?q=#{words}&ue=utf8&keyfrom=web.index"
  596. geturl(url)
  597. end
  598. def geturl(url,type=1)
  599. agent = Mechanize.new
  600. agent.user_agent_alias = 'Linux Mozilla'
  601. agent.max_history = 1
  602. agent.open_timeout = 12
  603. #agent.cookies
  604. begin
  605. page = agent.get_file(url)
  606. rescue Exception => e
  607. return e.message[0,60] + ' . IN geturl.'
  608. end
  609. puts page
  610. s = page.force_encoding('utf-8').match(/您是不是要找.*?<strong>(.*?)<\/strong>/im)[1]
  611. s.gsub!(/\s+/,' ')
  612. #puts s
  613. s.gsub!(/<.*?>/,'')#.unescapeHTML.gb_to_utf8
  614. s
  615. end
  616. def getGoogle(word,flg=0)
  617. print "word:"
  618. p word
  619. #url = 'http://www.google.com.hk/search?hl=zh-CN&oe=UTF-8&q=' + word.strip
  620. url = 'http://www.google.com.hk/search?q=' + word.strip
  621. #s=getbody(url)
  622. #puts s.size
  623. #File.new('/tmp/a.x','wb').puts s
  624. #p s.class
  625. #s = s.match(/<div id=resultStats>.+/i)[0]
  626. #File.open('tmp.html','wb').puts s
  627. #puts s.match(/.+?<div id=foot>/i)[0]
  628. #return
  629. #url = encodeurl(url)
  630. url = URI.encode(url)
  631. p url
  632. #url_mini = encodeurl('http://g.cn/search?q=' + word.strip)
  633. url_mini = 'http g.cn'
  634. re=''
  635. open(url
  636. #'Accept'=>'*/*',
  637. #'Referer'=> url,
  638. #'Accept-Language'=>'zh-CN',
  639. #'Accept-Encoding'=>'deflate',
  640. #'User-Agent'=> UserAgent
  641. ){ |f|
  642. html=f.read.gsub(/\s+/,' ')
  643. html=html.code_a2b(guess_charset(html) ,'utf-8')
  644. #File.new('/tmp/a.html','wb').puts html.match(/<div id="resultStats">.*/im)[0].gsub(/></,">\n<")
  645. matched = true
  646. case html
  647. when /<div class=f .*?><h3 class="r"><nobr>.*?<\/nobr>(.*?)<!--n--><!--m-->.*?<li class="g"><div class="vsc" sig="U2O">/
  648. re = "#$1 #$2"
  649. when /相关词句:(.*?)网络上查询(.*?)(https?:\/\/\S+[^\s*])">/i#define
  650. tmp = $2.to_s + " > " + $3.to_s.gsub(/&amp;.*/i,'')
  651. tmp += ' ⋙ SEE ALSO ' + $1.to_s if rand(10)>5 and $1.to_s.size > 2
  652. when /专业气象台|比价仅作信息参考/
  653. tmp = html.match(/resultStats.*?\/nobr>(.*?)(class=hd>搜索结果|Google\s+主页)/i)[1]
  654. when /calc_img\.gif(.*?)Google 计算器详情/i #是计算器
  655. tmp = "<#{$1} Google 计算器" #(.*?)<li>
  656. else
  657. matched = false
  658. end
  659. #puts html.match(/搜索用时(.*?)搜索结果<\/h2>(.*?)网页快照/i)[0]
  660. if matched or html =~ /搜索用时(.*?)搜索结果<\/h2>(.*?)网页快照/i
  661. if !matched
  662. tmp =$2.gsub(/<cite>.+<\/cite>/,' ' + url_mini)
  663. tmp1=$1
  664. end
  665. tmp.gsub!(/(.+?)您的广告/,'')
  666. if tmp=~/赞助商链接/
  667. tmp.gsub!(/赞助商链接.+?<ol.+?<\/ol>/,' ')
  668. end
  669. tmp.gsub!(/更多有关货币兑换的信息。/,"")
  670. tmp.gsub!(/<br>/i," ")
  671. #puts tmp + "\n"
  672. tmp.gsub!(/(.*秒))|\s+/i,' ')
  673. if tmp.bytesize > 30 || word =~ /^.?13.{9}$/ || tmp =~ /小提示/ then
  674. re=tmp
  675. else
  676. #puts "tmp.bytesize=#{tmp.bytesize} => 是普通搜索"
  677. do1=true
  678. end
  679. else
  680. do1=true
  681. end
  682. if do1
  683. puts '+普通搜索+'
  684. if html.match(/<div class=f .*?><h3 class="r"><nobr>.*?<\/nobr>(.*?)<!--n--><!--m-->.*?<li class="g"><div class="vsc" sig="U2O">/)
  685. re = "#$1 #$2"
  686. else
  687. html.match(/<div id="search"><div id="ires"(.*?)(<a href="\/url\?q=https?:\/\/[^\s]*?)">?(.*?)<span class="st">(.*?)<\/span>/i)
  688. #~ puts "$1=#{$1}\n$2=#{$2}\n$3=#{$3}\n$4=#{$4}\n$5=#{$5}"
  689. #url= $2.to_s
  690. re = $4.to_s + $5.to_s #+ $3.to_s.sub(/.*?>/i,'')
  691. end
  692. #if url =~ /https?:\/\/(.*?)(https?:\/\/.+?)/i
  693. #puts '清理二次http'
  694. #url=$2.to_s
  695. #end
  696. return if re.bytesize < 3
  697. re = url_mini + ' ' + re
  698. end
  699. re.gsub!(/<.*?>/i,'')
  700. re.gsub!(/\[\s翻译此页\s\]/,'')
  701. re= re.unescapeHTML
  702. }
  703. return unless re
  704. return if re.bytesize < url_mini.bytesize + 3
  705. return re
  706. end
  707. class Dic
  708. #ed2k
  709. def geted2kinfo(url)
  710. p url
  711. url.match(/^:\/\/\|(.+?)\|(\S+?)\|(.+?)\|.*$/)
  712. name=$2.to_s;size=$3.to_f
  713. return if $1 == 'server'
  714. return if not $3
  715. if url =~ /%..%../ #解析%DA之类的
  716. $ti = "#{URLDecode(name)} , #{'%.2f' % (size / 1024**3)} GB"
  717. else
  718. $ti = " #{ '%.2f' % (size / 1024**3)} GB"
  719. end
  720. $ti.gsub!(/.*\]\./,'')
  721. "⇪ #{$ti.unescapeHTML}"
  722. end
  723. end
  724. def getBaidu(word)
  725. url= 'http://www.baidu.com/s?cl=3&ie=UTF-8&wd='+word
  726. if url =~ /[\u4E00-\u9FA5]/
  727. url = URI.encode(url)
  728. end
  729. p url
  730. open(url,
  731. 'Accept'=>'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*',
  732. 'Referer'=> url,
  733. 'Accept-Language'=>'zh-cn',
  734. 'Accept-Encoding'=>'deflate',
  735. 'User-Agent'=> UserAgent,
  736. 'Host'=>'www.baidu.com',
  737. 'Connection'=>'close'
  738. ) {|f|
  739. html=f.read().gsub!(/\s/,' ')
  740. re = html.match(/ScriptDiv(.*?)(http:\/\/\S+[^\s*])(.*?)size=-1>(.*?)<br><font color=#008000>(.*?)<a\ href(.*?)(http:\/\/\S+[^\s*])/i).to_s
  741. re = $4 ; a2=$2[0,120]
  742. re= re.unescapeHTML.gsub(/<.*?>/i,'')[0,330]
  743. $re = a2 + ' ' + re
  744. $re = $re.code_a2b('gbk','UTF-8')[0,980]
  745. }
  746. $re
  747. end
  748. def getBaidu_tran(word,en=true)
  749. url= 'http://www.baidu.com/s?cl=3&ie=UTF-8&wd='+word+'&ct=1048576'
  750. if url =~ /[\u4E00-\u9FA5]/
  751. url = URI.encode(url)
  752. end
  753. open(url,
  754. 'Accept'=>'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*',
  755. 'Referer'=> url,
  756. 'Accept-Language'=>'zh-cn',
  757. 'Accept-Encoding'=>'deflate',
  758. 'User-Agent'=> UserAgent,
  759. 'Host'=>'www.baidu.com',
  760. 'Connection'=>'close',
  761. 'Cookie'=>'BAIDUID=EBBDCF1D3F9B11071169B4971122829A:FG=1; BDSTAT=172f338baaeb951db319ebc4b74543a98226cffc1f178a82b9014a90f703d697'
  762. ) {|f|
  763. html = f.read()
  764. html = html.gb_to_utf8.gsub(/\s+/,' ')
  765. re = ' <' + html.match(/class="wd"(.+?)<script>pronu/i)[1].to_s + ' '
  766. re += html.match(/class="explain">(.+?)<script/i)[1]
  767. re.gsub!(/<script\s?.+?>.+?<\/script>/i,'')
  768. re = re[0,600]
  769. re.gsub!(/&nbsp/,' ')
  770. re = re.unescapeHTML
  771. re.gsub!(/<.*?>/,'')
  772. $re = re.gsub(/>pronu.+?中文翻译/i,' ')
  773. $re.gsub!(/以下结果由.*?提供词典解释/,' ')
  774. $re.gsub!(/部首笔画部首.+?基本字义/,' 基本字义: ')
  775. if en
  776. $re.gsub!(/基本字义.*?英文翻译/,': ')
  777. end
  778. }
  779. $re
  780. end
  781. #为Time类加入hm方法,返回格式化后的时和分
  782. class Time
  783. def hm
  784. Time.now.strftime(' %H:%M')
  785. end
  786. #ch,小时字符. '㍘' = 0x3358
  787. def ch
  788. " \xE3\x8D".force_encoding('ascii-8bit') + (Time.now.hour + 0x98).chr
  789. end
  790. end
  791. #取IP地址的具体位置,参数是IP
  792. #
  793. class String
  794. def getaddr_fromip
  795. hostA(self,true)
  796. end
  797. end
  798. def getaddr_fromip(ip)
  799. hostA(ip,true)
  800. end
  801. #域名转化为IP
  802. def host(domain)
  803. return 'IPV6' if domain =~ /^([\da-f]{1,4}(:|::)){1,6}[\da-f]{1,4}$/i
  804. domain.gsub!(/\/.*/i,'')
  805. return domain if not domain.include?('.')
  806. return Resolv.getaddress(domain) rescue domain
  807. end
  808. def getProvince(domain)#取省
  809. hostA(domain).gsub(/^.*(\s|省)/,'').match(/\s?(.*?)市/)[1]
  810. end
  811. #取IP或域名的地理位置
  812. #hostA('www.g.cn',true)
  813. def hostA(domain,hideip=true)#处理IP 或域名
  814. return nil if !domain
  815. if domain=~ /^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/
  816. tmp = $1
  817. else
  818. tmp = host(domain)
  819. end
  820. if hideip
  821. tmp = IpLocationSeeker.new.seek(tmp) rescue tmp
  822. else
  823. tmp = tmp + '-' + IpLocationSeeker.new.seek(tmp) rescue tmp
  824. end
  825. tmp.gsub!(/CZ88\.NET/i,'')
  826. tmp.gsub!(/IANA/i,'不在宇宙')
  827. tmp.gsub(/\s+/,'').to_s + ' '
  828. end
  829. alias _print print if not defined?_print
  830. def print(* s)
  831. _print s.join rescue nil
  832. s.join
  833. end
  834. #eval
  835. def evaluate(s)
  836. begin
  837. r=Timeout.timeout(2){
  838. safe_eval(s)
  839. #return safe_eval(s)
  840. #return safe(l){eval(s).to_s[0,290]}
  841. #return safely(s,l)[0,300]
  842. }.inspect
  843. #p r
  844. return r
  845. rescue Timeout::Error
  846. return 'Timeout'
  847. rescue Exception
  848. return $!.message[0,38] # + $@[1..2].join(' ')
  849. rescue
  850. return $!.message[0,28]#+ $@[1..2].join(' ')
  851. end
  852. end
  853. def onemin
  854. 60
  855. end
  856. def onehour
  857. 3600
  858. end
  859. Oneday = 86400 unless defined? Oneday
  860. #重定义Time.now
  861. unless defined?Time._now
  862. p 'redefine Time.now'
  863. class Time
  864. class << self
  865. alias _now now if not defined?_now
  866. def now
  867. _now - $_time
  868. end
  869. end
  870. end
  871. end
  872. #返回roll
  873. def roll
  874. "掷出了: #{rand(101)} "
  875. end
  876. #返回uptime
  877. def b
  878. `uptime`
  879. end
  880. #每日一句英语学习
  881. def osod
  882. return '' if true
  883. agent = Mechanize.new
  884. agent.user_agent_alias = 'Linux Mozilla'
  885. agent.max_history = 0
  886. agent.open_timeout = 12
  887. #agent.cookies
  888. #url = 'http://ppcbook.hongen.com/eng/daily/sentence/0425sent.htm'
  889. t=Time.now
  890. m="%02d" % (t.sec%10+3)
  891. d="%02d" % t.day
  892. url = "http://ppcbook.hongen.com/eng/daily/sentence/#{m}#{d}sent.htm"
  893. begin
  894. page = agent.get_file(url)
  895. rescue Exception => e
  896. return e.message[0,60] + ' . IN osod'
  897. end
  898. s = page.match(/span class="e2">(.*?)<select name='selectmonth'>/mi)[1]
  899. s = s.gsub!(/\s+/,' ')
  900. s.gsub!(/<.*?>/,'').unescapeHTML.gb_to_utf8
  901. end
  902. #`apt-cache show #{c}`.gsub(/\n/,'~').match(/Version:(.*?)~.{4,16}:(.*?)Description[:\-](.*?)~.{4,16}:/i)
  903. #re="#$3".gsub(/~/,'')
  904. # gsub(/xxx/){$&.upcase; gsub(/xxx/,'\2,\1')}
  905. #get deb info
  906. def ge name
  907. agent = Mechanize.new
  908. agent.user_agent_alias = 'Linux Mozilla'
  909. agent.max_history = 0
  910. agent.open_timeout = 12
  911. agent.cookies
  912. begin
  913. url = 'http://packages.ubuntu.com/search?&searchon=names&suite=all&section=all&keywords=' + name.strip
  914. #url = 'http://packages.debian.org/search?suite=all&arch=any&searchon=names&keywords=' + name.strip
  915. #p url
  916. #page = agent.get(url)
  917. page = agent.get_file(url)
  918. #return nil if page.class != Mechanize::Page
  919. rescue Exception => e
  920. #p e.message
  921. return e.message[0,60] + ' . IN getdeb'
  922. end
  923. s = page.split(/<\/h2>/im)[1]
  924. s = s.match(/.*resultlink".+?:(.+?)<br>(.+?): .*<h2>/mi)[1..2].join ','
  925. s = s.gsub!(/\s+/,' ')
  926. s.gsub!(/<.*?>/,'')
  927. s.unescapeHTML
  928. end
  929. alias get_deb_info ge
  930. #公告
  931. def gg
  932. t=Time.now
  933. #http://logs.ubuntu-eu.org/free/#{t.strftime('%Y/%m/%d')}/%23ubuntu-cn.html
  934. #https://groups.google.com/group/ircubuntu-cn/topics
  935. "频道 #ubuntu-cn当前log地址是 :
  936. http://irclogs.ubuntu.com/#{t.strftime('%Y/%m/%d')}/%23ubuntu-cn.html
  937. 有需要请浏览
  938. . #{t.strftime('%H:%M:%S')} "
  939. end
  940. #alias say_公告 say_gg
  941. #简单检测代理是否可用
  942. def check_proxy_status
  943. Thread.new do
  944. Thread.current[:name]= 'check proxy stat'
  945. begin
  946. Timeout.timeout(8){
  947. a=TCPSocket.open($proxy_addr2,$proxy_port2)
  948. a.send('get',0)
  949. a.close
  950. }
  951. rescue Timeout::Error
  952. print $proxy_addr2,':',$proxy_port2,' ',false,"\n"
  953. $proxy_status_ok = false
  954. break
  955. end
  956. #print $proxy_addr2,':',$proxy_port2,' ',true,"\n"
  957. $proxy_status_ok = true
  958. end
  959. true
  960. end
  961. def addTimCh
  962. Time.now.hm
  963. end
  964. def chr_hour
  965. Time.now.hm
  966. #Time.now.ch
  967. end
  968. #随机事件
  969. def rand_do
  970. case rand(1000)
  971. when 0..130
  972. $my_s
  973. when 131..180
  974. get_feed
  975. when 200..400
  976. "..休息一下.. #$my_s"
  977. else
  978. ""
  979. end
  980. end
  981. def hello_replay(sSay)
  982. tmp = Time.parse('2013-02-10')-Time.now
  983. #不用显示倒计时
  984. if tmp < 0 or tmp > Oneday*30 or rand(9) < 2
  985. return if sSay =~ /\s$/
  986. return "#{sSay} \0039 #{chr_hour} \017"
  987. end
  988. case tmp
  989. when 61..3600
  990. tmp="#{tmp/60}分钟"
  991. when 3601..86400
  992. tmp="#{tmp/60/60}小时"
  993. when 0..60
  994. tmp="#{tmp}秒"
  995. else
  996. tmp="#{tmp/60/60/24}天"
  997. end
  998. tmp.sub!(/([\.?\d]+)/){ "%.2f" % $1}
  999. "#{sSay} #{chr_hour} \0039新年快乐 : #{tmp}\017"
  1000. end
  1001. def gettitle_https(url)
  1002. require 'net/http'
  1003. require 'net/https'
  1004. url = URI.parse(url)
  1005. http = Net::HTTP.new(url.host, url.port)
  1006. http.use_ssl = true if url.scheme == 'https'
  1007. request = Net::HTTP::Get.new(url.path)
  1008. s= http.request(request)
  1009. #puts s.head[0,9999]
  1010. pp s.body
  1011. #puts s.body[0,9999]
  1012. end
  1013. def gettitle_proxy(url)
  1014. #Net::HTTP 的类方法 Net::HTTP.Proxy通常会生成一个新的类,该类通过代理进行连接操作。由于该类继承了Net::HTTP,所以可以像使用Net::HTTP那样来操作它。
  1015. require 'net/http'
  1016. Net::HTTP.version_1_2 # 设定对象的运作方式
  1017. #Net::HTTP::Proxy($proxy_addr, $proxy_port).start( 'some.www.server' ) {|http|
  1018. ## always connect to your.proxy.addr:8080
  1019. #:
  1020. #}
  1021. #若Net::HTTP.Proxy的第一参数为nil的话,它就会返回Net::HTTP本身。所以即使没有代理,上面的代码也可应对自如。
  1022. end
  1023. def update_proxy_rule
  1024. File.open('gfwlist.txt','w'){ |x|
  1025. url = "nUE0pQbiY2S1qT9jpz94rF1aMaqfnKA0Yzqio2qfMJAiMTHhL29gY3A2ov90\npaIhnl9aMaqfnKA0YaE4qN==\n".rot13.ub64
  1026. x.write Mechanize.new.get(url).body
  1027. }
  1028. end
  1029. def read_proxy_rule
  1030. $proxy_rule = File.read('gfwlist.txt').unbase64.split(/\n/)
  1031. end
  1032. #调用 alice
  1033. def botsay(s)
  1034. return if s.empty?
  1035. s.zh2en.alice_say.en2zh rescue ( '.. 休息一下 ..')
  1036. end
  1037. #高亮打印消息
  1038. def pr_highlighted(s)
  1039. #s=s.force_encoding("utf-8")
  1040. s=s.gb_to_utf8 if @charset !~ /UTF-8/i #如果频道编码不是utf-8,则转换成utf-8
  1041. need_savelog = false
  1042. case s
  1043. when /^:(.+?)!(.+?)@(.+?)\s(.+?)\s((.+?)\s:)?(.+)$/i
  1044. from=$1;name=$2;ip=$3;mt=$4;to=$6;sy=$7
  1045. return if $ignore_action =~ /#{Regexp::escape mt}/i
  1046. case mt
  1047. when /privmsg/i
  1048. need_savelog = true
  1049. mt.replace ' '
  1050. if to =~ /#{Regexp::escape @channel}/i
  1051. to.clear
  1052. end
  1053. sy= sy.yellow if to =~ /#{Regexp::escape @nick}/i
  1054. when /join|part|quit|nick|notice|kick/i
  1055. mt = ' ' << mt[0,4].red_on_white << ' '
  1056. from << ' ' << ip.getaddr_fromip.underline
  1057. if to =~ /#{Regexp::escape @channel}/i
  1058. to.clear
  1059. end
  1060. need_savelog = true
  1061. else
  1062. #pp s.match(/^:(.+?)!(.+?)@(.+?)\s(.+?)\s((.+)\s:)?(.+)$/i)
  1063. re= s.pink
  1064. mt= ' ' + mt[0,4].blue + ' '
  1065. sy=sy.green
  1066. need_savelog = true
  1067. end
  1068. t = Time.now.strftime('%H%M%S')
  1069. sy.force_encoding('utf-8')
  1070. re= "#{t}#{ (('<'+from+'>').rjust(14)).c_rand(name.sum)}#{mt}#{to} #{sy}"
  1071. else
  1072. re= s.red
  1073. end
  1074. re = "\r" << re
  1075. if $local_charset !~ /UTF-8/i
  1076. puts re.code_a2b('utf-8',$local_charset)
  1077. else
  1078. puts re
  1079. end
  1080. savelog re if need_savelog
  1081. end
  1082. #写入聊天记录
  1083. def savelog(s)
  1084. return if $not_savelog
  1085. s.gsub!(/\e\[\d\d?m/i,'') #去掉ANSI颜色代码
  1086. #gem install ansi2html
  1087. #m = Time.now.min
  1088. #m = "%02d" % (m - (m % 30))
  1089. fn=Time.now.strftime("%y%m%d.txt")
  1090. #mkdir_p "irclogs/#{@channel[1..-1]}"
  1091. File.open("irclogs/#{@channel[1..-1]}/" + fn,'ab'){|x|
  1092. x.puts s
  1093. }
  1094. end
  1095. #记录自己说话的时间
  1096. def isaid(second=0)
  1097. $min_next_say=Time.now + $minsaytime + second
  1098. end
  1099. #记录频道说话的频率
  1100. def auto_set_ch_baud(ch)
  1101. @ch_baud ||= Hash.new
  1102. @ch_baud.default = Hash.new
  1103. #最后1次发言时间
  1104. @ch_baud[ch]['last']=Time.now
  1105. end
  1106. begin
  1107. require 'bfrb'
  1108. rescue LoadError
  1109. end
  1110. def bf(s='.')
  1111. $last_bf=''
  1112. BfRb::Interpreter.new.run s
  1113. $last_bf
  1114. end
  1115. #.rvm/gems/ruby-1.9.2-p180/gems/bfrb-0.1.5/lib/bfrb/interpreter.rb
  1116. # print value in memory$
  1117. #when "."$
  1118. #@output_stream.print current_memory.chr
  1119. #$last_bf << current_memory.chr rescue nil
  1120. if __FILE__ == $0
  1121. p rand_do
  1122. end