PageRenderTime 67ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/rex/text.rb

https://bitbucket.org/technopunk2099/metasploit-framework
Ruby | 1430 lines | 958 code | 168 blank | 304 comment | 107 complexity | ce9a8e839d6546f38362db41b70b3eb4 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, LGPL-2.1, GPL-2.0, MIT
  1. # -*- coding: binary -*-
  2. require 'digest/md5'
  3. require 'digest/sha1'
  4. require 'stringio'
  5. require 'cgi'
  6. begin
  7. old_verbose = $VERBOSE
  8. $VERBOSE = nil
  9. require 'iconv'
  10. require 'zlib'
  11. rescue ::LoadError
  12. ensure
  13. $VERBOSE = old_verbose
  14. end
  15. module Rex
  16. ###
  17. #
  18. # This class formats text in various fashions and also provides
  19. # a mechanism for wrapping text at a given column.
  20. #
  21. ###
  22. module Text
  23. @@codepage_map_cache = nil
  24. ##
  25. #
  26. # Constants
  27. #
  28. ##
  29. States = ["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DE", "FL", "GA", "HI",
  30. "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN",
  31. "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY", "OH",
  32. "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA",
  33. "WI", "WV", "WY"]
  34. UpperAlpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  35. LowerAlpha = "abcdefghijklmnopqrstuvwxyz"
  36. Numerals = "0123456789"
  37. Base32 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
  38. Alpha = UpperAlpha + LowerAlpha
  39. AlphaNumeric = Alpha + Numerals
  40. HighAscii = [*(0x80 .. 0xff)].pack("C*")
  41. LowAscii = [*(0x00 .. 0x1f)].pack("C*")
  42. DefaultWrap = 60
  43. AllChars = [*(0x00 .. 0xff)].pack("C*")
  44. Punctuation = ( [*(0x21 .. 0x2f)] + [*(0x3a .. 0x3F)] + [*(0x5b .. 0x60)] + [*(0x7b .. 0x7e)] ).flatten.pack("C*")
  45. DefaultPatternSets = [ Rex::Text::UpperAlpha, Rex::Text::LowerAlpha, Rex::Text::Numerals ]
  46. # In case Iconv isn't loaded
  47. Iconv_EBCDIC = ["\x00", "\x01", "\x02", "\x03", "7", "-", ".", "/", "\x16", "\x05", "%", "\v", "\f", "\r", "\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13", "<", "=", "2", "&", "\x18", "\x19", "?", "'", "\x1C", "\x1D", "\x1E", "\x1F", "@", "Z", "\x7F", "{", "[", "l", "P", "}", "M", "]", "\\", "N", "k", "`", "K", "a", "\xF0", "\xF1", "\xF2", "\xF3", "\xF4", "\xF5", "\xF6", "\xF7", "\xF8", "\xF9", "z", "^", "L", "~", "n", "o", "|", "\xC1", "\xC2", "\xC3", "\xC4", "\xC5", "\xC6", "\xC7", "\xC8", "\xC9", "\xD1", "\xD2", "\xD3", "\xD4", "\xD5", "\xD6", "\xD7", "\xD8", "\xD9", "\xE2", "\xE3", "\xE4", "\xE5", "\xE6", "\xE7", "\xE8", "\xE9", nil, "\xE0", nil, nil, "m", "y", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", "\x88", "\x89", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", "\x98", "\x99", "\xA2", "\xA3", "\xA4", "\xA5", "\xA6", "\xA7", "\xA8", "\xA9", "\xC0", "O", "\xD0", "\xA1", "\a", nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil]
  48. Iconv_ASCII = ["\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\a", "\b", "\t", "\n", "\v", "\f", "\r", "\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\e", "\x1C", "\x1D", "\x1E", "\x1F", " ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", nil, "\\", nil, nil, "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "\x7F", nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil]
  49. ##
  50. #
  51. # Serialization
  52. #
  53. ##
  54. #
  55. # Converts a raw string into a ruby buffer
  56. #
  57. def self.to_ruby(str, wrap = DefaultWrap, name = "buf")
  58. return hexify(str, wrap, '"', '" +', "#{name} = \n", '"')
  59. end
  60. #
  61. # Creates a ruby-style comment
  62. #
  63. def self.to_ruby_comment(str, wrap = DefaultWrap)
  64. return wordwrap(str, 0, wrap, '', '# ')
  65. end
  66. #
  67. # Converts a raw string into a C buffer
  68. #
  69. def self.to_c(str, wrap = DefaultWrap, name = "buf")
  70. return hexify(str, wrap, '"', '"', "unsigned char #{name}[] = \n", '";')
  71. end
  72. #
  73. # Creates a c-style comment
  74. #
  75. def self.to_c_comment(str, wrap = DefaultWrap)
  76. return "/*\n" + wordwrap(str, 0, wrap, '', ' * ') + " */\n"
  77. end
  78. #
  79. # Creates a javascript-style comment
  80. #
  81. def self.to_js_comment(str, wrap = DefaultWrap)
  82. return wordwrap(str, 0, wrap, '', '// ')
  83. end
  84. #
  85. # Converts a raw string into a perl buffer
  86. #
  87. def self.to_perl(str, wrap = DefaultWrap, name = "buf")
  88. return hexify(str, wrap, '"', '" .', "my $#{name} = \n", '";')
  89. end
  90. #
  91. # Converts a raw string into a Bash buffer
  92. #
  93. def self.to_bash(str, wrap = DefaultWrap, name = "buf")
  94. return hexify(str, wrap, '$\'', '\'\\', "export #{name}=\\\n", '\'')
  95. end
  96. #
  97. # Converts a raw string into a java byte array
  98. #
  99. def self.to_java(str, name = "shell")
  100. buff = "byte #{name}[] = new byte[]\n{\n"
  101. cnt = 0
  102. max = 0
  103. str.unpack('C*').each do |c|
  104. buff << ", " if max > 0
  105. buff << "\t" if max == 0
  106. buff << sprintf('(byte) 0x%.2x', c)
  107. max +=1
  108. cnt +=1
  109. if (max > 7)
  110. buff << ",\n" if cnt != str.length
  111. max = 0
  112. end
  113. end
  114. buff << "\n};\n"
  115. return buff
  116. end
  117. #
  118. # Creates a perl-style comment
  119. #
  120. def self.to_perl_comment(str, wrap = DefaultWrap)
  121. return wordwrap(str, 0, wrap, '', '# ')
  122. end
  123. #
  124. # Creates a Bash-style comment
  125. #
  126. def self.to_bash_comment(str, wrap = DefaultWrap)
  127. return wordwrap(str, 0, wrap, '', '# ')
  128. end
  129. #
  130. # Returns the raw string
  131. #
  132. def self.to_raw(str)
  133. return str
  134. end
  135. #
  136. # Converts ISO-8859-1 to UTF-8
  137. #
  138. def self.to_utf8(str)
  139. begin
  140. Iconv.iconv("utf-8","iso-8859-1", str).join(" ")
  141. rescue
  142. raise ::RuntimeError, "Your installation does not support iconv (needed for utf8 conversion)"
  143. end
  144. end
  145. #
  146. # Converts ASCII to EBCDIC
  147. #
  148. class IllegalSequence < ArgumentError; end
  149. # A native implementation of the ASCII->EBCDIC table, used to fall back from using
  150. # Iconv
  151. def self.to_ebcdic_rex(str)
  152. new_str = []
  153. str.each_byte do |x|
  154. if Iconv_ASCII.index(x.chr)
  155. new_str << Iconv_EBCDIC[Iconv_ASCII.index(x.chr)]
  156. else
  157. raise Rex::Text::IllegalSequence, ("\\x%x" % x)
  158. end
  159. end
  160. new_str.join
  161. end
  162. # A native implementation of the EBCDIC->ASCII table, used to fall back from using
  163. # Iconv
  164. def self.from_ebcdic_rex(str)
  165. new_str = []
  166. str.each_byte do |x|
  167. if Iconv_EBCDIC.index(x.chr)
  168. new_str << Iconv_ASCII[Iconv_EBCDIC.index(x.chr)]
  169. else
  170. raise Rex::Text::IllegalSequence, ("\\x%x" % x)
  171. end
  172. end
  173. new_str.join
  174. end
  175. def self.to_ebcdic(str)
  176. begin
  177. Iconv.iconv("EBCDIC-US", "ASCII", str).first
  178. rescue ::Iconv::IllegalSequence => e
  179. raise e
  180. rescue
  181. self.to_ebcdic_rex(str)
  182. end
  183. end
  184. #
  185. # Converts EBCIDC to ASCII
  186. #
  187. def self.from_ebcdic(str)
  188. begin
  189. Iconv.iconv("ASCII", "EBCDIC-US", str).first
  190. rescue ::Iconv::IllegalSequence => e
  191. raise e
  192. rescue
  193. self.from_ebcdic_rex(str)
  194. end
  195. end
  196. #
  197. # Returns the words in +str+ as an Array.
  198. #
  199. # strict - include *only* words, no boundary characters (like spaces, etc.)
  200. #
  201. def self.to_words( str, strict = false )
  202. splits = str.split( /\b/ )
  203. splits.reject! { |w| !(w =~ /\w/) } if strict
  204. splits
  205. end
  206. #
  207. # Removes noise from 2 Strings and return a refined String version.
  208. #
  209. def self.refine( str1, str2 )
  210. return str1 if str1 == str2
  211. # get the words of the first str in an array
  212. s_words = to_words( str1 )
  213. # get the words of the second str in an array
  214. o_words = to_words( str2 )
  215. # get what hasn't changed (the rdiff, so to speak) as a string
  216. (s_words - (s_words - o_words)).join
  217. end
  218. #
  219. # Returns a unicode escaped string for Javascript
  220. #
  221. def self.to_unescape(data, endian=ENDIAN_LITTLE)
  222. data << "\x41" if (data.length % 2 != 0)
  223. dptr = 0
  224. buff = ''
  225. while (dptr < data.length)
  226. c1 = data[dptr,1].unpack("C*")[0]
  227. dptr += 1
  228. c2 = data[dptr,1].unpack("C*")[0]
  229. dptr += 1
  230. if (endian == ENDIAN_LITTLE)
  231. buff << sprintf('%%u%.2x%.2x', c2, c1)
  232. else
  233. buff << sprintf('%%u%.2x%.2x', c1, c2)
  234. end
  235. end
  236. return buff
  237. end
  238. def self.to_octal(str, prefix = "\\")
  239. octal = ""
  240. str.each_byte { |b|
  241. octal << "#{prefix}#{b.to_s 8}"
  242. }
  243. return octal
  244. end
  245. #
  246. # Returns the hex version of the supplied string
  247. #
  248. def self.to_hex(str, prefix = "\\x", count = 1)
  249. raise ::RuntimeError, "unable to chunk into #{count} byte chunks" if ((str.length % count) > 0)
  250. # XXX: Regexp.new is used here since using /.{#{count}}/o would compile
  251. # the regex the first time it is used and never check again. Since we
  252. # want to know how many to capture on every instance, we do it this
  253. # way.
  254. return str.unpack('H*')[0].gsub(Regexp.new(".{#{count * 2}}", nil, 'n')) { |s| prefix + s }
  255. end
  256. #
  257. # Returns the string with nonprintable hex characters sanitized to ascii. Similiar to to_hex,
  258. # but regular ASCII is not translated if count is 1.
  259. #
  260. def self.to_hex_ascii(str, prefix = "\\x", count = 1, suffix=nil)
  261. raise ::RuntimeError, "unable to chunk into #{count} byte chunks" if ((str.length % count) > 0)
  262. return str.unpack('H*')[0].gsub(Regexp.new(".{#{count * 2}}", nil, 'n')) { |s|
  263. (0x20..0x7e) === s.to_i(16) ? s.to_i(16).chr : prefix + s + suffix.to_s
  264. }
  265. end
  266. #
  267. # Converts standard ASCII text to a unicode string.
  268. #
  269. # Supported unicode types include: utf-16le, utf16-be, utf32-le, utf32-be, utf-7, and utf-8
  270. #
  271. # Providing 'mode' provides hints to the actual encoder as to how it should encode the string. Only UTF-7 and UTF-8 use "mode".
  272. #
  273. # utf-7 by default does not encode alphanumeric and a few other characters. By specifying the mode of "all", then all of the characters are encoded, not just the non-alphanumeric set.
  274. # to_unicode(str, 'utf-7', 'all')
  275. #
  276. # utf-8 specifies that alphanumeric characters are used directly, eg "a" is just "a". However, there exist 6 different overlong encodings of "a" that are technically not valid, but parse just fine in most utf-8 parsers. (0xC1A1, 0xE081A1, 0xF08081A1, 0xF8808081A1, 0xFC80808081A1, 0xFE8080808081A1). How many bytes to use for the overlong enocding is specified providing 'size'.
  277. # to_unicode(str, 'utf-8', 'overlong', 2)
  278. #
  279. # Many utf-8 parsers also allow invalid overlong encodings, where bits that are unused when encoding a single byte are modified. Many parsers will ignore these bits, rendering simple string matching to be ineffective for dealing with UTF-8 strings. There are many more invalid overlong encodings possible for "a". For example, three encodings are available for an invalid 2 byte encoding of "a". (0xC1E1 0xC161 0xC121). By specifying "invalid", a random invalid encoding is chosen for the given byte size.
  280. # to_unicode(str, 'utf-8', 'invalid', 2)
  281. #
  282. # utf-7 defaults to 'normal' utf-7 encoding
  283. # utf-8 defaults to 2 byte 'normal' encoding
  284. #
  285. def self.to_unicode(str='', type = 'utf-16le', mode = '', size = '')
  286. return '' if not str
  287. case type
  288. when 'utf-16le'
  289. return str.unpack('C*').pack('v*')
  290. when 'utf-16be'
  291. return str.unpack('C*').pack('n*')
  292. when 'utf-32le'
  293. return str.unpack('C*').pack('V*')
  294. when 'utf-32be'
  295. return str.unpack('C*').pack('N*')
  296. when 'utf-7'
  297. case mode
  298. when 'all'
  299. return str.gsub(/./){ |a|
  300. out = ''
  301. if 'a' != '+'
  302. out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
  303. end
  304. '+' + out + '-'
  305. }
  306. else
  307. return str.gsub(/[^\n\r\t\ A-Za-z0-9\'\(\),-.\/\:\?]/){ |a|
  308. out = ''
  309. if a != '+'
  310. out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
  311. end
  312. '+' + out + '-'
  313. }
  314. end
  315. when 'utf-8'
  316. if size == ''
  317. size = 2
  318. end
  319. if size >= 2 and size <= 7
  320. string = ''
  321. str.each_byte { |a|
  322. if (a < 21 || a > 0x7f) || mode != ''
  323. # ugh. turn a single byte into the binary representation of it, in array form
  324. bin = [a].pack('C').unpack('B8')[0].split(//)
  325. # even more ugh.
  326. bin.collect!{|a_| a_.to_i}
  327. out = Array.new(8 * size, 0)
  328. 0.upto(size - 1) { |i|
  329. out[i] = 1
  330. out[i * 8] = 1
  331. }
  332. i = 0
  333. byte = 0
  334. bin.reverse.each { |bit|
  335. if i < 6
  336. mod = (((size * 8) - 1) - byte * 8) - i
  337. out[mod] = bit
  338. else
  339. byte = byte + 1
  340. i = 0
  341. redo
  342. end
  343. i = i + 1
  344. }
  345. if mode != ''
  346. case mode
  347. when 'overlong'
  348. # do nothing, since we already handle this as above...
  349. when 'invalid'
  350. done = 0
  351. while done == 0
  352. # the ghetto...
  353. bits = [7, 8, 15, 16, 23, 24, 31, 32, 41]
  354. bits.each { |bit|
  355. bit = (size * 8) - bit
  356. if bit > 1
  357. set = rand(2)
  358. if out[bit] != set
  359. out[bit] = set
  360. done = 1
  361. end
  362. end
  363. }
  364. end
  365. else
  366. raise TypeError, 'Invalid mode. Only "overlong" and "invalid" are acceptable modes for utf-8'
  367. end
  368. end
  369. string << [out.join('')].pack('B*')
  370. else
  371. string << [a].pack('C')
  372. end
  373. }
  374. return string
  375. else
  376. raise TypeError, 'invalid utf-8 size'
  377. end
  378. when 'uhwtfms' # suggested name from HD :P
  379. load_codepage()
  380. string = ''
  381. # overloading mode as codepage
  382. if mode == ''
  383. mode = 1252 # ANSI - Latan 1, default for US installs of MS products
  384. else
  385. mode = mode.to_i
  386. end
  387. if @@codepage_map_cache[mode].nil?
  388. raise TypeError, "Invalid codepage #{mode}"
  389. end
  390. str.each_byte {|byte|
  391. char = [byte].pack('C*')
  392. possible = @@codepage_map_cache[mode]['data'][char]
  393. if possible.nil?
  394. raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
  395. end
  396. string << possible[ rand(possible.length) ]
  397. }
  398. return string
  399. when 'uhwtfms-half' # suggested name from HD :P
  400. load_codepage()
  401. string = ''
  402. # overloading mode as codepage
  403. if mode == ''
  404. mode = 1252 # ANSI - Latan 1, default for US installs of MS products
  405. else
  406. mode = mode.to_i
  407. end
  408. if mode != 1252
  409. raise TypeError, "Invalid codepage #{mode}, only 1252 supported for uhwtfms_half"
  410. end
  411. str.each_byte {|byte|
  412. if ((byte >= 33 && byte <= 63) || (byte >= 96 && byte <= 126))
  413. string << "\xFF" + [byte ^ 32].pack('C')
  414. elsif (byte >= 64 && byte <= 95)
  415. string << "\xFF" + [byte ^ 96].pack('C')
  416. else
  417. char = [byte].pack('C')
  418. possible = @@codepage_map_cache[mode]['data'][char]
  419. if possible.nil?
  420. raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
  421. end
  422. string << possible[ rand(possible.length) ]
  423. end
  424. }
  425. return string
  426. else
  427. raise TypeError, 'invalid utf type'
  428. end
  429. end
  430. #
  431. # Converts a unicode string to standard ASCII text.
  432. #
  433. def self.to_ascii(str='', type = 'utf-16le', mode = '', size = '')
  434. return '' if not str
  435. case type
  436. when 'utf-16le'
  437. return str.unpack('v*').pack('C*')
  438. when 'utf-16be'
  439. return str.unpack('n*').pack('C*')
  440. when 'utf-32le'
  441. return str.unpack('V*').pack('C*')
  442. when 'utf-32be'
  443. return str.unpack('N*').pack('C*')
  444. when 'utf-7'
  445. raise TypeError, 'invalid utf type, not yet implemented'
  446. when 'utf-8'
  447. raise TypeError, 'invalid utf type, not yet implemented'
  448. when 'uhwtfms' # suggested name from HD :P
  449. raise TypeError, 'invalid utf type, not yet implemented'
  450. when 'uhwtfms-half' # suggested name from HD :P
  451. raise TypeError, 'invalid utf type, not yet implemented'
  452. else
  453. raise TypeError, 'invalid utf type'
  454. end
  455. end
  456. #
  457. # Encode a string in a manor useful for HTTP URIs and URI Parameters.
  458. #
  459. def self.uri_encode(str, mode = 'hex-normal')
  460. return "" if str == nil
  461. return str if mode == 'none' # fast track no encoding
  462. all = /[^\/\\]+/
  463. normal = /[^a-zA-Z0-9\/\\\.\-]+/
  464. normal_na = /[a-zA-Z0-9\/\\\.\-]/
  465. case mode
  466. when 'hex-normal'
  467. return str.gsub(normal) { |s| Rex::Text.to_hex(s, '%') }
  468. when 'hex-all'
  469. return str.gsub(all) { |s| Rex::Text.to_hex(s, '%') }
  470. when 'hex-random'
  471. res = ''
  472. str.each_byte do |c|
  473. b = c.chr
  474. res << ((rand(2) == 0) ?
  475. b.gsub(all) { |s| Rex::Text.to_hex(s, '%') } :
  476. b.gsub(normal){ |s| Rex::Text.to_hex(s, '%') } )
  477. end
  478. return res
  479. when 'u-normal'
  480. return str.gsub(normal) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) }
  481. when 'u-all'
  482. return str.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) }
  483. when 'u-random'
  484. res = ''
  485. str.each_byte do |c|
  486. b = c.chr
  487. res << ((rand(2) == 0) ?
  488. b.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) } :
  489. b.gsub(normal){ |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) } )
  490. end
  491. return res
  492. when 'u-half'
  493. return str.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms-half'), '%u', 2) }
  494. else
  495. raise TypeError, 'invalid mode'
  496. end
  497. end
  498. #
  499. # Encode a string in a manner useful for HTTP URIs and URI Parameters.
  500. #
  501. def self.html_encode(str, mode = 'hex')
  502. case mode
  503. when 'hex'
  504. return str.unpack('C*').collect{ |i| "&#x" + ("%.2x" % i) + ";"}.join
  505. when 'int'
  506. return str.unpack('C*').collect{ |i| "&#" + i.to_s + ";"}.join
  507. when 'int-wide'
  508. return str.unpack('C*').collect{ |i| "&#" + ("0" * (7 - i.to_s.length)) + i.to_s + ";" }.join
  509. else
  510. raise TypeError, 'invalid mode'
  511. end
  512. end
  513. #
  514. # Decode a string that's html encoded
  515. #
  516. def self.html_decode(str)
  517. decoded_str = CGI.unescapeHTML(str)
  518. return decoded_str
  519. end
  520. #
  521. # Encode an ASCII string so it's safe for XML. It's a wrapper for to_hex_ascii.
  522. #
  523. def self.xml_char_encode(str)
  524. self.to_hex_ascii(str, "&#x", 1, ";")
  525. end
  526. #
  527. # Decode a URI encoded string
  528. #
  529. def self.uri_decode(str)
  530. str.gsub(/(%[a-z0-9]{2})/i){ |c| [c[1,2]].pack("H*") }
  531. end
  532. #
  533. # Converts a string to random case
  534. #
  535. def self.to_rand_case(str)
  536. buf = str.dup
  537. 0.upto(str.length) do |i|
  538. buf[i,1] = rand(2) == 0 ? str[i,1].upcase : str[i,1].downcase
  539. end
  540. return buf
  541. end
  542. #
  543. # Takes a string, and returns an array of all mixed case versions.
  544. #
  545. # Example:
  546. #
  547. # >> Rex::Text.to_mixed_case_array "abc1"
  548. # => ["abc1", "abC1", "aBc1", "aBC1", "Abc1", "AbC1", "ABc1", "ABC1"]
  549. #
  550. def self.to_mixed_case_array(str)
  551. letters = []
  552. str.scan(/./).each { |l| letters << [l.downcase, l.upcase] }
  553. coords = []
  554. (1 << str.size).times { |i| coords << ("%0#{str.size}b" % i) }
  555. mixed = []
  556. coords.each do |coord|
  557. c = coord.scan(/./).map {|x| x.to_i}
  558. this_str = ""
  559. c.each_with_index { |d,i| this_str << letters[i][d] }
  560. mixed << this_str
  561. end
  562. return mixed.uniq
  563. end
  564. #
  565. # Converts a string a nicely formatted hex dump
  566. #
  567. def self.to_hex_dump(str, width=16)
  568. buf = ''
  569. idx = 0
  570. cnt = 0
  571. snl = false
  572. lst = 0
  573. while (idx < str.length)
  574. chunk = str[idx, width]
  575. line = chunk.unpack("H*")[0].scan(/../).join(" ")
  576. buf << line
  577. if (lst == 0)
  578. lst = line.length
  579. buf << " " * 4
  580. else
  581. buf << " " * ((lst - line.length) + 4).abs
  582. end
  583. chunk.unpack("C*").each do |c|
  584. if (c > 0x1f and c < 0x7f)
  585. buf << c.chr
  586. else
  587. buf << "."
  588. end
  589. end
  590. buf << "\n"
  591. idx += width
  592. end
  593. buf << "\n"
  594. end
  595. #
  596. # Converts a string a nicely formatted and addressed ex dump
  597. #
  598. def self.to_addr_hex_dump(str, start_addr=0, width=16)
  599. buf = ''
  600. idx = 0
  601. cnt = 0
  602. snl = false
  603. lst = 0
  604. addr = start_addr
  605. while (idx < str.length)
  606. buf << "%08x" % addr
  607. buf << " " * 4
  608. chunk = str[idx, width]
  609. line = chunk.unpack("H*")[0].scan(/../).join(" ")
  610. buf << line
  611. if (lst == 0)
  612. lst = line.length
  613. buf << " " * 4
  614. else
  615. buf << " " * ((lst - line.length) + 4).abs
  616. end
  617. chunk.unpack("C*").each do |c|
  618. if (c > 0x1f and c < 0x7f)
  619. buf << c.chr
  620. else
  621. buf << "."
  622. end
  623. end
  624. buf << "\n"
  625. idx += width
  626. addr += width
  627. end
  628. buf << "\n"
  629. end
  630. #
  631. # Converts a hex string to a raw string
  632. #
  633. def self.hex_to_raw(str)
  634. [ str.downcase.gsub(/'/,'').gsub(/\\?x([a-f0-9][a-f0-9])/, '\1') ].pack("H*")
  635. end
  636. #
  637. # Turn non-printable chars into hex representations, leaving others alone
  638. #
  639. # If +whitespace+ is true, converts whitespace (0x20, 0x09, etc) to hex as
  640. # well.
  641. #
  642. def self.ascii_safe_hex(str, whitespace=false)
  643. if whitespace
  644. str.gsub(/([\x00-\x20\x80-\xFF])/){ |x| "\\x%.2x" % x.unpack("C*")[0] }
  645. else
  646. str.gsub(/([\x00-\x08\x0b\x0c\x0e-\x1f\x80-\xFF])/n){ |x| "\\x%.2x" % x.unpack("C*")[0]}
  647. end
  648. end
  649. #
  650. # Wraps text at a given column using a supplied indention
  651. #
  652. def self.wordwrap(str, indent = 0, col = DefaultWrap, append = '', prepend = '')
  653. return str.gsub(/.{1,#{col - indent}}(?:\s|\Z)/){
  654. ( (" " * indent) + prepend + $& + append + 5.chr).gsub(/\n\005/,"\n").gsub(/\005/,"\n")}
  655. end
  656. #
  657. # Converts a string to a hex version with wrapping support
  658. #
  659. def self.hexify(str, col = DefaultWrap, line_start = '', line_end = '', buf_start = '', buf_end = '')
  660. output = buf_start
  661. cur = 0
  662. count = 0
  663. new_line = true
  664. # Go through each byte in the string
  665. str.each_byte { |byte|
  666. count += 1
  667. append = ''
  668. # If this is a new line, prepend with the
  669. # line start text
  670. if (new_line == true)
  671. append << line_start
  672. new_line = false
  673. end
  674. # Append the hexified version of the byte
  675. append << sprintf("\\x%.2x", byte)
  676. cur += append.length
  677. # If we're about to hit the column or have gone past it,
  678. # time to finish up this line
  679. if ((cur + line_end.length >= col) or (cur + buf_end.length >= col))
  680. new_line = true
  681. cur = 0
  682. # If this is the last byte, use the buf_end instead of
  683. # line_end
  684. if (count == str.length)
  685. append << buf_end + "\n"
  686. else
  687. append << line_end + "\n"
  688. end
  689. end
  690. output << append
  691. }
  692. # If we were in the middle of a line, finish the buffer at this point
  693. if (new_line == false)
  694. output << buf_end + "\n"
  695. end
  696. return output
  697. end
  698. ##
  699. #
  700. # Transforms
  701. #
  702. ##
  703. #
  704. # Base32 code
  705. #
  706. # Based on --> https://github.com/stesla/base32
  707. # Copyright (c) 2007-2011 Samuel Tesla
  708. # Permission is hereby granted, free of charge, to any person obtaining a copy
  709. # of this software and associated documentation files (the "Software"), to deal
  710. # in the Software without restriction, including without limitation the rights
  711. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  712. # copies of the Software, and to permit persons to whom the Software is
  713. # furnished to do so, subject to the following conditions:
  714. # The above copyright notice and this permission notice shall be included in
  715. # all copies or substantial portions of the Software.
  716. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  717. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  718. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  719. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  720. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  721. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  722. # THE SOFTWARE.
  723. #
  724. # Base32 encoder
  725. #
  726. def self.b32encode(bytes_in)
  727. n = (bytes_in.length * 8.0 / 5.0).ceil
  728. p = n < 8 ? 5 - (bytes_in.length * 8) % 5 : 0
  729. c = bytes_in.inject(0) {|m,o| (m << 8) + o} << p
  730. [(0..n-1).to_a.reverse.collect {|i| Base32[(c >> i * 5) & 0x1f].chr},
  731. ("=" * (8-n))]
  732. end
  733. def self.encode_base32(str)
  734. bytes = str.bytes
  735. result = ''
  736. size= 5
  737. while bytes.any? do
  738. bytes.each_slice(size) do |a|
  739. bytes_out = b32encode(a).flatten.join
  740. result << bytes_out
  741. bytes = bytes.drop(size)
  742. end
  743. end
  744. return result
  745. end
  746. #
  747. # Base32 decoder
  748. #
  749. def self.b32decode(bytes_in)
  750. bytes = bytes_in.take_while {|c| c != 61} # strip padding
  751. n = (bytes.length * 5.0 / 8.0).floor
  752. p = bytes.length < 8 ? 5 - (n * 8) % 5 : 0
  753. c = bytes.inject(0) {|m,o| (m << 5) + Base32.index(o.chr)} >> p
  754. (0..n-1).to_a.reverse.collect {|i| ((c >> i * 8) & 0xff).chr}
  755. end
  756. def self.decode_base32(str)
  757. bytes = str.bytes
  758. result = ''
  759. size= 8
  760. while bytes.any? do
  761. bytes.each_slice(size) do |a|
  762. bytes_out = b32decode(a).flatten.join
  763. result << bytes_out
  764. bytes = bytes.drop(size)
  765. end
  766. end
  767. return result
  768. end
  769. #
  770. # Base64 encoder
  771. #
  772. def self.encode_base64(str, delim='')
  773. [str.to_s].pack("m").gsub(/\s+/, delim)
  774. end
  775. #
  776. # Base64 decoder
  777. #
  778. def self.decode_base64(str)
  779. str.to_s.unpack("m")[0]
  780. end
  781. #
  782. # Raw MD5 digest of the supplied string
  783. #
  784. def self.md5_raw(str)
  785. Digest::MD5.digest(str)
  786. end
  787. #
  788. # Hexidecimal MD5 digest of the supplied string
  789. #
  790. def self.md5(str)
  791. Digest::MD5.hexdigest(str)
  792. end
  793. #
  794. # Raw SHA1 digest of the supplied string
  795. #
  796. def self.sha1_raw(str)
  797. Digest::SHA1.digest(str)
  798. end
  799. #
  800. # Hexidecimal SHA1 digest of the supplied string
  801. #
  802. def self.sha1(str)
  803. Digest::SHA1.hexdigest(str)
  804. end
  805. #
  806. # Convert hex-encoded characters to literals.
  807. # Example: "AA\\x42CC" becomes "AABCC"
  808. #
  809. def self.dehex(str)
  810. return str unless str.respond_to? :match
  811. return str unless str.respond_to? :gsub
  812. regex = /\x5cx[0-9a-f]{2}/mi
  813. if str.match(regex)
  814. str.gsub(regex) { |x| x[2,2].to_i(16).chr }
  815. else
  816. str
  817. end
  818. end
  819. #
  820. # Convert and replace hex-encoded characters to literals.
  821. #
  822. def self.dehex!(str)
  823. return str unless str.respond_to? :match
  824. return str unless str.respond_to? :gsub
  825. regex = /\x5cx[0-9a-f]{2}/mi
  826. str.gsub!(regex) { |x| x[2,2].to_i(16).chr }
  827. end
  828. ##
  829. #
  830. # Generators
  831. #
  832. ##
  833. # Generates a random character.
  834. def self.rand_char(bad, chars = AllChars)
  835. rand_text(1, bad, chars)
  836. end
  837. # Base text generator method
  838. def self.rand_base(len, bad, *foo)
  839. cset = (foo.join.unpack("C*") - bad.to_s.unpack("C*")).uniq
  840. return "" if cset.length == 0
  841. outp = []
  842. len.times { outp << cset[rand(cset.length)] }
  843. outp.pack("C*")
  844. end
  845. # Generate random bytes of data
  846. def self.rand_text(len, bad='', chars = AllChars)
  847. foo = chars.split('')
  848. rand_base(len, bad, *foo)
  849. end
  850. # Generate random bytes of alpha data
  851. def self.rand_text_alpha(len, bad='')
  852. foo = []
  853. foo += ('A' .. 'Z').to_a
  854. foo += ('a' .. 'z').to_a
  855. rand_base(len, bad, *foo )
  856. end
  857. # Generate random bytes of lowercase alpha data
  858. def self.rand_text_alpha_lower(len, bad='')
  859. rand_base(len, bad, *('a' .. 'z').to_a)
  860. end
  861. # Generate random bytes of uppercase alpha data
  862. def self.rand_text_alpha_upper(len, bad='')
  863. rand_base(len, bad, *('A' .. 'Z').to_a)
  864. end
  865. # Generate random bytes of alphanumeric data
  866. def self.rand_text_alphanumeric(len, bad='')
  867. foo = []
  868. foo += ('A' .. 'Z').to_a
  869. foo += ('a' .. 'z').to_a
  870. foo += ('0' .. '9').to_a
  871. rand_base(len, bad, *foo )
  872. end
  873. # Generate random bytes of alphanumeric hex.
  874. def self.rand_text_hex(len, bad='')
  875. foo = []
  876. foo += ('0' .. '9').to_a
  877. foo += ('a' .. 'f').to_a
  878. rand_base(len, bad, *foo)
  879. end
  880. # Generate random bytes of numeric data
  881. def self.rand_text_numeric(len, bad='')
  882. foo = ('0' .. '9').to_a
  883. rand_base(len, bad, *foo )
  884. end
  885. # Generate random bytes of english-like data
  886. def self.rand_text_english(len, bad='')
  887. foo = []
  888. foo += (0x21 .. 0x7e).map{ |c| c.chr }
  889. rand_base(len, bad, *foo )
  890. end
  891. # Generate random bytes of high ascii data
  892. def self.rand_text_highascii(len, bad='')
  893. foo = []
  894. foo += (0x80 .. 0xff).map{ |c| c.chr }
  895. rand_base(len, bad, *foo )
  896. end
  897. # Generate a random GUID, of the form {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}
  898. def self.rand_guid
  899. "{#{[8,4,4,4,12].map {|a| rand_text_hex(a) }.join("-")}}"
  900. end
  901. #
  902. # Creates a pattern that can be used for offset calculation purposes. This
  903. # routine is capable of generating patterns using a supplied set and a
  904. # supplied number of identifiable characters (slots). The supplied sets
  905. # should not contain any duplicate characters or the logic will fail.
  906. #
  907. def self.pattern_create(length, sets = nil)
  908. buf = ''
  909. idx = 0
  910. offsets = []
  911. # Make sure there's something in sets even if we were given an explicit nil
  912. sets ||= [ UpperAlpha, LowerAlpha, Numerals ]
  913. # Return stupid uses
  914. return "" if length.to_i < 1
  915. return sets[0][0].chr * length if sets.size == 1 and sets[0].size == 1
  916. sets.length.times { offsets << 0 }
  917. until buf.length >= length
  918. begin
  919. buf << converge_sets(sets, 0, offsets, length)
  920. end
  921. end
  922. # Maximum permutations reached, but we need more data
  923. if (buf.length < length)
  924. buf = buf * (length / buf.length.to_f).ceil
  925. end
  926. buf[0,length]
  927. end
  928. # Step through an arbitrary number of sets of bytes to build up a findable pattern.
  929. # This is mostly useful for experimentially determining offset lengths into memory
  930. # structures. Note that the supplied sets should never contain duplicate bytes, or
  931. # else it can become impossible to measure the offset accurately.
  932. def self.patt2(len, sets = nil)
  933. buf = ""
  934. counter = []
  935. sets ||= [ UpperAlpha, LowerAlpha, Numerals ]
  936. len ||= len.to_i
  937. return "" if len.zero?
  938. sets = sets.map {|a| a.split(//)}
  939. sets.size.times { counter << 0}
  940. 0.upto(len-1) do |i|
  941. setnum = i % sets.size
  942. #puts counter.inspect
  943. end
  944. return buf
  945. end
  946. #
  947. # Calculate the offset to a pattern
  948. #
  949. def self.pattern_offset(pattern, value, start=0)
  950. if (value.kind_of?(String))
  951. pattern.index(value, start)
  952. elsif (value.kind_of?(Fixnum) or value.kind_of?(Bignum))
  953. pattern.index([ value ].pack('V'), start)
  954. else
  955. raise ::ArgumentError, "Invalid class for value: #{value.class}"
  956. end
  957. end
  958. #
  959. # Compresses a string, eliminating all superfluous whitespace before and
  960. # after lines and eliminating all lines.
  961. #
  962. def self.compress(str)
  963. str.gsub(/\n/m, ' ').gsub(/\s+/, ' ').gsub(/^\s+/, '').gsub(/\s+$/, '')
  964. end
  965. #
  966. # Randomize the whitespace in a string
  967. #
  968. def self.randomize_space(str)
  969. str.gsub(/\s+/) { |s|
  970. len = rand(50)+2
  971. set = "\x09\x20\x0d\x0a"
  972. buf = ''
  973. while (buf.length < len)
  974. buf << set[rand(set.length),1]
  975. end
  976. buf
  977. }
  978. end
  979. # Returns true if zlib can be used.
  980. def self.zlib_present?
  981. begin
  982. temp = Zlib
  983. return true
  984. rescue
  985. return false
  986. end
  987. end
  988. # backwards compat for just a bit...
  989. def self.gzip_present?
  990. self.zlib_present?
  991. end
  992. #
  993. # Compresses a string using zlib
  994. #
  995. def self.zlib_deflate(str, level = Zlib::BEST_COMPRESSION)
  996. if self.zlib_present?
  997. z = Zlib::Deflate.new(level)
  998. dst = z.deflate(str, Zlib::FINISH)
  999. z.close
  1000. return dst
  1001. else
  1002. raise RuntimeError, "Gzip support is not present."
  1003. end
  1004. end
  1005. #
  1006. # Uncompresses a string using zlib
  1007. #
  1008. def self.zlib_inflate(str)
  1009. if(self.zlib_present?)
  1010. zstream = Zlib::Inflate.new
  1011. buf = zstream.inflate(str)
  1012. zstream.finish
  1013. zstream.close
  1014. return buf
  1015. else
  1016. raise RuntimeError, "Gzip support is not present."
  1017. end
  1018. end
  1019. #
  1020. # Compresses a string using gzip
  1021. #
  1022. def self.gzip(str, level = 9)
  1023. raise RuntimeError, "Gzip support is not present." if (!zlib_present?)
  1024. raise RuntimeError, "Invalid gzip compression level" if (level < 1 or level > 9)
  1025. s = ""
  1026. s.force_encoding('ASCII-8BIT') if s.respond_to?(:encoding)
  1027. gz = Zlib::GzipWriter.new(StringIO.new(s, 'wb'), level)
  1028. gz << str
  1029. gz.close
  1030. return s
  1031. end
  1032. #
  1033. # Uncompresses a string using gzip
  1034. #
  1035. def self.ungzip(str)
  1036. raise RuntimeError, "Gzip support is not present." if (!zlib_present?)
  1037. s = ""
  1038. s.force_encoding('ASCII-8BIT') if s.respond_to?(:encoding)
  1039. gz = Zlib::GzipReader.new(StringIO.new(str, 'rb'))
  1040. s << gz.read
  1041. gz.close
  1042. return s
  1043. end
  1044. #
  1045. # Return the index of the first badchar in data, otherwise return
  1046. # nil if there wasn't any badchar occurences.
  1047. #
  1048. def self.badchar_index(data, badchars = '')
  1049. badchars.unpack("C*").each { |badchar|
  1050. pos = data.index(badchar.chr)
  1051. return pos if pos
  1052. }
  1053. return nil
  1054. end
  1055. #
  1056. # This method removes bad characters from a string.
  1057. #
  1058. def self.remove_badchars(data, badchars = '')
  1059. data.delete(badchars)
  1060. end
  1061. #
  1062. # This method returns all chars but the supplied set
  1063. #
  1064. def self.charset_exclude(keepers)
  1065. [*(0..255)].pack('C*').delete(keepers)
  1066. end
  1067. #
  1068. # Shuffles a byte stream
  1069. #
  1070. def self.shuffle_s(str)
  1071. shuffle_a(str.unpack("C*")).pack("C*")
  1072. end
  1073. #
  1074. # Performs a Fisher-Yates shuffle on an array
  1075. #
  1076. def self.shuffle_a(arr)
  1077. len = arr.length
  1078. max = len - 1
  1079. cyc = [* (0..max) ]
  1080. for d in cyc
  1081. e = rand(d+1)
  1082. next if e == d
  1083. f = arr[d];
  1084. g = arr[e];
  1085. arr[d] = g;
  1086. arr[e] = f;
  1087. end
  1088. return arr
  1089. end
  1090. # Permute the case of a word
  1091. def self.permute_case(word, idx=0)
  1092. res = []
  1093. if( (UpperAlpha+LowerAlpha).index(word[idx,1]))
  1094. word_ucase = word.dup
  1095. word_ucase[idx, 1] = word[idx, 1].upcase
  1096. word_lcase = word.dup
  1097. word_lcase[idx, 1] = word[idx, 1].downcase
  1098. if (idx == word.length)
  1099. return [word]
  1100. else
  1101. res << permute_case(word_ucase, idx+1)
  1102. res << permute_case(word_lcase, idx+1)
  1103. end
  1104. else
  1105. res << permute_case(word, idx+1)
  1106. end
  1107. res.flatten
  1108. end
  1109. # Generate a random hostname
  1110. def self.rand_hostname
  1111. host = []
  1112. (rand(5) + 1).times {
  1113. host.push(Rex::Text.rand_text_alphanumeric(rand(10) + 1))
  1114. }
  1115. d = ['com', 'net', 'org', 'gov']
  1116. host.push(d[rand(d.size)])
  1117. host.join('.').downcase
  1118. end
  1119. # Generate a state
  1120. def self.rand_state()
  1121. States[rand(States.size)]
  1122. end
  1123. #
  1124. # Calculate the ROR13 hash of a given string
  1125. #
  1126. def self.ror13_hash(name)
  1127. hash = 0
  1128. name.unpack("C*").each {|c| hash = ror(hash, 13); hash += c }
  1129. hash
  1130. end
  1131. #
  1132. # Rotate a 32-bit value to the right by cnt bits
  1133. #
  1134. def self.ror(val, cnt)
  1135. bits = [val].pack("N").unpack("B32")[0].split(//)
  1136. 1.upto(cnt) do |c|
  1137. bits.unshift( bits.pop )
  1138. end
  1139. [bits.join].pack("B32").unpack("N")[0]
  1140. end
  1141. #
  1142. # Rotate a 32-bit value to the left by cnt bits
  1143. #
  1144. def self.rol(val, cnt)
  1145. bits = [val].pack("N").unpack("B32")[0].split(//)
  1146. 1.upto(cnt) do |c|
  1147. bits.push( bits.shift )
  1148. end
  1149. [bits.join].pack("B32").unpack("N")[0]
  1150. end
  1151. #
  1152. # Split a string by n charachter into an array
  1153. #
  1154. def self.split_to_a(str, n)
  1155. if n > 0
  1156. s = str.dup
  1157. until s.empty?
  1158. (ret ||= []).push s.slice!(0, n)
  1159. end
  1160. else
  1161. ret = str
  1162. end
  1163. ret
  1164. end
  1165. #
  1166. #Pack a value as 64 bit litle endian; does not exist for Array.pack
  1167. #
  1168. def self.pack_int64le(val)
  1169. [val & 0x00000000ffffffff, val >> 32].pack("V2")
  1170. end
  1171. #
  1172. # A custom unicode filter for dealing with multi-byte strings on a 8-bit console
  1173. # Punycode would have been more "standard", but it requires valid Unicode chars
  1174. #
  1175. def self.unicode_filter_encode(str)
  1176. if (str.to_s.unpack("C*") & ( LowAscii + HighAscii + "\x7f" ).unpack("C*")).length > 0
  1177. str = "$U$" + str.unpack("C*").select{|c| c < 0x7f and c > 0x1f and c != 0x2d}.pack("C*") + "-0x" + str.unpack("H*")[0]
  1178. else
  1179. str
  1180. end
  1181. end
  1182. def self.unicode_filter_decode(str)
  1183. str.to_s.gsub( /\$U\$([\x20-\x2c\x2e-\x7E]*)\-0x([A-Fa-f0-9]+)/ ){|m| [$2].pack("H*") }
  1184. end
  1185. protected
  1186. def self.converge_sets(sets, idx, offsets, length) # :nodoc:
  1187. buf = sets[idx][offsets[idx]].chr
  1188. # If there are more sets after use, converage with them.
  1189. if (sets[idx + 1])
  1190. buf << converge_sets(sets, idx + 1, offsets, length)
  1191. else
  1192. # Increment the current set offset as well as previous ones if we
  1193. # wrap back to zero.
  1194. while (idx >= 0 and ((offsets[idx] = (offsets[idx] + 1) % sets[idx].length)) == 0)
  1195. idx -= 1
  1196. end
  1197. # If we reached the point where the idx fell below zero, then that
  1198. # means we've reached the maximum threshold for permutations.
  1199. if (idx < 0)
  1200. return buf
  1201. end
  1202. end
  1203. buf
  1204. end
  1205. def self.load_codepage()
  1206. return if (!@@codepage_map_cache.nil?)
  1207. file = File.join(File.dirname(__FILE__),'codepage.map')
  1208. page = ''
  1209. name = ''
  1210. map = {}
  1211. File.open(file).each { |line|
  1212. next if line =~ /^#/
  1213. next if line =~ /^\s*$/
  1214. data = line.split
  1215. if data[1] =~ /^\(/
  1216. page = data.shift.to_i
  1217. name = data.join(' ').sub(/^\(/,'').sub(/\)$/,'')
  1218. map[page] = {}
  1219. map[page]['name'] = name
  1220. map[page]['data'] = {}
  1221. else
  1222. data.each { |entry|
  1223. wide, char = entry.split(':')
  1224. char = [char].pack('H*')
  1225. wide = [wide].pack('H*')
  1226. if map[page]['data'][char].nil?
  1227. map[page]['data'][char] = [wide]
  1228. else
  1229. map[page]['data'][char].push(wide)
  1230. end
  1231. }
  1232. end
  1233. }
  1234. @@codepage_map_cache = map
  1235. end
  1236. def self.checksum8(str)
  1237. str.unpack("C*").inject(:+) % 0x100
  1238. end
  1239. def self.checksum16_le(str)
  1240. str.unpack("v*").inject(:+) % 0x10000
  1241. end
  1242. def self.checksum16_be(str)
  1243. str.unpack("n*").inject(:+) % 0x10000
  1244. end
  1245. def self.checksum32_le(str)
  1246. str.unpack("V*").inject(:+) % 0x100000000
  1247. end
  1248. def self.checksum32_be(str)
  1249. str.unpack("N*").inject(:+) % 0x100000000
  1250. end
  1251. end
  1252. end