PageRenderTime 71ms CodeModel.GetById 45ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/Ruby/lib/ruby/1.8/jcode.rb

http://github.com/agross/netopenspace
Ruby | 220 lines | 216 code | 3 blank | 1 comment | 7 complexity | 807e53d9f319bc2aa93b94b1bd4c0105 MD5 | raw file
  1# jcode.rb - ruby code to handle japanese (EUC/SJIS) string
  2
  3if $VERBOSE && $KCODE == "NONE"
  4  warn "Warning: $KCODE is NONE."
  5end
  6
  7$vsave, $VERBOSE = $VERBOSE, false
  8class String
  9  warn "feel free for some warnings:\n" if $VERBOSE
 10
 11  def _regex_quote(str)
 12    str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do
 13      $1 || $2 || '\\' + $3
 14    end
 15  end
 16  private :_regex_quote
 17
 18  PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
 19  PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
 20  PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
 21
 22  RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n')
 23  RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n')
 24  RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n')
 25
 26  SUCC = {}
 27  SUCC['s'] = Hash.new(1)
 28  for i in 0 .. 0x3f
 29    SUCC['s'][i.chr] = 0x40 - i
 30  end
 31  SUCC['s']["\x7e"] = 0x80 - 0x7e
 32  SUCC['s']["\xfd"] = 0x100 - 0xfd
 33  SUCC['s']["\xfe"] = 0x100 - 0xfe
 34  SUCC['s']["\xff"] = 0x100 - 0xff
 35  SUCC['e'] = Hash.new(1)
 36  for i in 0 .. 0xa0
 37    SUCC['e'][i.chr] = 0xa1 - i
 38  end
 39  SUCC['e']["\xfe"] = 2
 40  SUCC['u'] = Hash.new(1)
 41  for i in 0 .. 0x7f
 42    SUCC['u'][i.chr] = 0x80 - i
 43  end
 44  SUCC['u']["\xbf"] = 0x100 - 0xbf
 45
 46  def mbchar?
 47    case $KCODE[0]
 48    when ?s, ?S
 49      self =~ RE_SJIS
 50    when ?e, ?E
 51      self =~ RE_EUC
 52    when ?u, ?U
 53      self =~ RE_UTF8
 54    else
 55      nil
 56    end
 57  end
 58
 59  def end_regexp
 60    case $KCODE[0]
 61    when ?s, ?S
 62      /#{PATTERN_SJIS}$/on
 63    when ?e, ?E
 64      /#{PATTERN_EUC}$/on
 65    when ?u, ?U
 66      /#{PATTERN_UTF8}$/on
 67    else
 68      /.$/on
 69    end
 70  end
 71
 72  alias original_succ! succ!
 73  private :original_succ!
 74
 75  alias original_succ succ
 76  private :original_succ
 77
 78  def succ!
 79    reg = end_regexp
 80    if  $KCODE != 'NONE' && self =~ reg
 81      succ_table = SUCC[$KCODE[0,1].downcase]
 82      begin
 83	self[-1] += succ_table[self[-1]]
 84	self[-2] += 1 if self[-1] == 0
 85      end while self !~ reg
 86      self
 87    else
 88      original_succ!
 89    end
 90  end
 91
 92  def succ
 93    str = self.dup
 94    str.succ! or str
 95  end
 96
 97  private
 98
 99  def _expand_ch str
100    a = []
101    str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
102      from = $1 || $2
103      to = $3 || $4
104      one = $5 || $6
105      if one
106	a.push one
107      elsif from.length != to.length
108	next
109      elsif from.length == 1
110	from[0].upto(to[0]) { |c| a.push c.chr }
111      else
112	from.upto(to) { |c| a.push c }
113      end
114    end
115    a
116  end
117
118  def expand_ch_hash from, to
119    h = {}
120    afrom = _expand_ch(from)
121    ato = _expand_ch(to)
122    afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
123    h
124  end
125
126  HashCache = {}
127  TrPatternCache = {}
128  DeletePatternCache = {}
129  SqueezePatternCache = {}
130
131  public
132
133  def tr!(from, to)
134    return nil if from == ""
135    return self.delete!(from) if to == ""
136
137    pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
138    if from[0] == ?^
139      last = /.$/.match(to)[0]
140      self.gsub!(pattern, last)
141    else
142      h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
143      self.gsub!(pattern) do |c| h[c] end
144    end
145  end
146
147  def tr(from, to)
148    (str = self.dup).tr!(from, to) or str
149  end
150
151  def delete!(del)
152    return nil if del == ""
153    self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
154  end
155
156  def delete(del)
157    (str = self.dup).delete!(del) or str
158  end
159
160  def squeeze!(del=nil)
161    return nil if del == ""
162    pattern =
163      if del
164	SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
165      else
166	/(.|\n)\1+/
167      end
168    self.gsub!(pattern, '\1')
169  end
170
171  def squeeze(del=nil)
172    (str = self.dup).squeeze!(del) or str
173  end
174
175  def tr_s!(from, to)
176    return self.delete!(from) if to.length == 0
177
178    pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1*/
179    if from[0] == ?^
180      last = /.$/.match(to)[0]
181      self.gsub!(pattern, last)
182    else
183      h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
184      self.gsub!(pattern) do h[$1] end
185    end
186  end
187
188  def tr_s(from, to)
189    (str = self.dup).tr_s!(from,to) or str
190  end
191
192  def chop!
193    self.gsub!(/(?:.|\r?\n)\z/, '')
194  end
195
196  def chop
197    (str = self.dup).chop! or str
198  end
199
200  def jlength
201    self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
202  end
203  alias jsize jlength
204
205  def jcount(str)
206    self.delete("^#{str}").jlength
207  end
208
209  def each_char
210    if block_given?
211      scan(/./m) do |x|
212        yield x
213      end
214    else
215      scan(/./m)
216    end
217  end
218
219end
220$VERBOSE = $vsave