PageRenderTime 113ms CodeModel.GetById 36ms app.highlight 68ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/Ruby/lib/ruby/1.8/csv.rb

http://github.com/agross/netopenspace
Ruby | 992 lines | 662 code | 77 blank | 253 comment | 134 complexity | c1c16c40f610947607ac030347bad4e5 MD5 | raw file
  1# CSV -- module for generating/parsing CSV data.
  2# Copyright (C) 2000-2004  NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
  3  
  4# $Id: csv.rb 11708 2007-02-12 23:01:19Z shyouhei $
  5  
  6# This program is copyrighted free software by NAKAMURA, Hiroshi.  You can
  7# redistribute it and/or modify it under the same terms of Ruby's license;
  8# either the dual license version in 2003, or any later version.
  9  
 10  
 11class CSV
 12  class IllegalFormatError < RuntimeError; end
 13
 14  # deprecated
 15  class Cell < String
 16    def initialize(data = "", is_null = false)
 17      super(is_null ? "" : data)
 18    end
 19
 20    def data
 21      to_s
 22    end
 23  end
 24
 25  # deprecated
 26  class Row < Array
 27  end
 28
 29  # Open a CSV formatted file for reading or writing.
 30  #
 31  # For reading.
 32  #
 33  # EXAMPLE 1
 34  #   CSV.open('csvfile.csv', 'r') do |row|
 35  #     p row
 36  #   end
 37  #
 38  # EXAMPLE 2
 39  #   reader = CSV.open('csvfile.csv', 'r')
 40  #   row1 = reader.shift
 41  #   row2 = reader.shift
 42  #   if row2.empty?
 43  #     p 'row2 not find.'
 44  #   end
 45  #   reader.close
 46  #
 47  # ARGS
 48  #   filename: filename to parse.
 49  #   col_sep: Column separator.  ?, by default.  If you want to separate
 50  #     fields with semicolon, give ?; here.
 51  #   row_sep: Row separator.  nil by default.  nil means "\r\n or \n".  If you
 52  #     want to separate records with \r, give ?\r here.
 53  #
 54  # RETURNS
 55  #   reader instance.  To get parse result, see CSV::Reader#each.
 56  #
 57  #
 58  # For writing.
 59  #
 60  # EXAMPLE 1
 61  #   CSV.open('csvfile.csv', 'w') do |writer|
 62  #     writer << ['r1c1', 'r1c2']
 63  #     writer << ['r2c1', 'r2c2']
 64  #     writer << [nil, nil]
 65  #   end
 66  #
 67  # EXAMPLE 2
 68  #   writer = CSV.open('csvfile.csv', 'w')
 69  #   writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
 70  #   writer.close
 71  #
 72  # ARGS
 73  #   filename: filename to generate.
 74  #   col_sep: Column separator.  ?, by default.  If you want to separate
 75  #     fields with semicolon, give ?; here.
 76  #   row_sep: Row separator.  nil by default.  nil means "\r\n or \n".  If you
 77  #     want to separate records with \r, give ?\r here.
 78  #
 79  # RETURNS
 80  #   writer instance.  See CSV::Writer#<< and CSV::Writer#add_row to know how
 81  #   to generate CSV string.
 82  #
 83  def CSV.open(path, mode, fs = nil, rs = nil, &block)
 84    if mode == 'r' or mode == 'rb'
 85      open_reader(path, mode, fs, rs, &block)
 86    elsif mode == 'w' or mode == 'wb'
 87      open_writer(path, mode, fs, rs, &block)
 88    else
 89      raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
 90    end
 91  end
 92
 93  def CSV.foreach(path, rs = nil, &block)
 94    open_reader(path, 'r', ',', rs, &block)
 95  end
 96
 97  def CSV.read(path, length = nil, offset = nil)
 98    CSV.parse(IO.read(path, length, offset))
 99  end
100  
101  def CSV.readlines(path, rs = nil)
102    reader = open_reader(path, 'r', ',', rs)
103    begin
104      reader.collect { |row| row }
105    ensure
106      reader.close
107    end
108  end
109
110  def CSV.generate(path, fs = nil, rs = nil, &block)
111    open_writer(path, 'w', fs, rs, &block)
112  end
113
114  # Parse lines from given string or stream.  Return rows as an Array of Arrays.
115  def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
116    if File.exist?(str_or_readable)
117      STDERR.puts("CSV.parse(filename) is deprecated." +
118        "  Use CSV.open(filename, 'r') instead.")
119      return open_reader(str_or_readable, 'r', fs, rs, &block)
120    end
121    if block
122      CSV::Reader.parse(str_or_readable, fs, rs) do |row|
123        yield(row)
124      end
125      nil
126    else
127      CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
128    end
129  end
130
131  # Parse a line from given string.  Bear in mind it parses ONE LINE.  Rest of
132  # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
133  # second line 'c,d' is ignored.
134  #
135  # If you don't know whether a target string to parse is exactly 1 line or
136  # not, use CSV.parse_row instead of this method.
137  def CSV.parse_line(src, fs = nil, rs = nil)
138    fs ||= ','
139    if fs.is_a?(Fixnum)
140      fs = fs.chr
141    end
142    if !rs.nil? and rs.is_a?(Fixnum)
143      rs = rs.chr
144    end
145    idx = 0
146    res_type = :DT_COLSEP
147    row = []
148    begin
149      while res_type == :DT_COLSEP
150        res_type, idx, cell = parse_body(src, idx, fs, rs)
151        row << cell
152      end
153    rescue IllegalFormatError
154      return []
155    end
156    row
157  end
158
159  # Create a line from cells.  each cell is stringified by to_s.
160  def CSV.generate_line(row, fs = nil, rs = nil)
161    if row.size == 0
162      return ''
163    end
164    fs ||= ','
165    if fs.is_a?(Fixnum)
166      fs = fs.chr
167    end
168    if !rs.nil? and rs.is_a?(Fixnum)
169      rs = rs.chr
170    end
171    res_type = :DT_COLSEP
172    result_str = ''
173    idx = 0
174    while true
175      generate_body(row[idx], result_str, fs, rs)
176      idx += 1
177      if (idx == row.size)
178        break
179      end
180      generate_separator(:DT_COLSEP, result_str, fs, rs)
181    end
182    result_str
183  end
184  
185  # Parse a line from string.  Consider using CSV.parse_line instead.
186  # To parse lines in CSV string, see EXAMPLE below.
187  #
188  # EXAMPLE
189  #   src = "a,b\r\nc,d\r\ne,f"
190  #   idx = 0
191  #   begin
192  #     parsed = []
193  #     parsed_cells, idx = CSV.parse_row(src, idx, parsed)
194  #     puts "Parsed #{ parsed_cells } cells."
195  #     p parsed
196  #   end while parsed_cells > 0
197  #
198  # ARGS
199  #   src: a CSV data to be parsed.  Must respond '[](idx)'.
200  #     src[](idx) must return a char. (Not a string such as 'a', but 97).
201  #     src[](idx_out_of_bounds) must return nil.  A String satisfies this
202  #     requirement.
203  #   idx: index of parsing location of 'src'.  0 origin.
204  #   out_dev: buffer for parsed cells.  Must respond '<<(aString)'.
205  #   col_sep: Column separator.  ?, by default.  If you want to separate
206  #     fields with semicolon, give ?; here.
207  #   row_sep: Row separator.  nil by default.  nil means "\r\n or \n".  If you
208  #     want to separate records with \r, give ?\r here.
209  #
210  # RETURNS
211  #   parsed_cells: num of parsed cells.
212  #   idx: index of next parsing location of 'src'.
213  #
214  def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
215    fs ||= ','
216    if fs.is_a?(Fixnum)
217      fs = fs.chr
218    end
219    if !rs.nil? and rs.is_a?(Fixnum)
220      rs = rs.chr
221    end
222    idx_backup = idx
223    parsed_cells = 0
224    res_type = :DT_COLSEP
225    begin
226      while res_type != :DT_ROWSEP
227        res_type, idx, cell = parse_body(src, idx, fs, rs)
228        if res_type == :DT_EOS
229          if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
230            return 0, 0
231          end
232          res_type = :DT_ROWSEP
233        end
234        parsed_cells += 1
235        out_dev << cell
236      end
237    rescue IllegalFormatError
238      return 0, 0
239    end
240    return parsed_cells, idx
241  end
242  
243  # Convert a line from cells data to string.  Consider using CSV.generate_line
244  # instead.  To generate multi-row CSV string, see EXAMPLE below.
245  #
246  # EXAMPLE
247  #   row1 = ['a', 'b']
248  #   row2 = ['c', 'd']
249  #   row3 = ['e', 'f']
250  #   src = [row1, row2, row3]
251  #   buf = ''
252  #   src.each do |row|
253  #     parsed_cells = CSV.generate_row(row, 2, buf)
254  #     puts "Created #{ parsed_cells } cells."
255  #   end
256  #   p buf
257  #
258  # ARGS
259  #   src: an Array of String to be converted to CSV string.  Must respond to
260  #     'size' and '[](idx)'.  src[idx] must return String.
261  #   cells: num of cells in a line.
262  #   out_dev: buffer for generated CSV string.  Must respond to '<<(string)'.
263  #   col_sep: Column separator.  ?, by default.  If you want to separate
264  #     fields with semicolon, give ?; here.
265  #   row_sep: Row separator.  nil by default.  nil means "\r\n or \n".  If you
266  #     want to separate records with \r, give ?\r here.
267  #
268  # RETURNS
269  #   parsed_cells: num of converted cells.
270  #
271  def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
272    fs ||= ','
273    if fs.is_a?(Fixnum)
274      fs = fs.chr
275    end
276    if !rs.nil? and rs.is_a?(Fixnum)
277      rs = rs.chr
278    end
279    src_size = src.size
280    if (src_size == 0)
281      if cells == 0
282        generate_separator(:DT_ROWSEP, out_dev, fs, rs)
283      end
284      return 0
285    end
286    res_type = :DT_COLSEP
287    parsed_cells = 0
288    generate_body(src[parsed_cells], out_dev, fs, rs)
289    parsed_cells += 1
290    while ((parsed_cells < cells) and (parsed_cells != src_size))
291      generate_separator(:DT_COLSEP, out_dev, fs, rs)
292      generate_body(src[parsed_cells], out_dev, fs, rs)
293      parsed_cells += 1
294    end
295    if (parsed_cells == cells)
296      generate_separator(:DT_ROWSEP, out_dev, fs, rs)
297    else
298      generate_separator(:DT_COLSEP, out_dev, fs, rs)
299    end
300    parsed_cells
301  end
302  
303  # Private class methods.
304  class << self
305  private
306
307    def open_reader(path, mode, fs, rs, &block)
308      file = File.open(path, mode)
309      if block
310        begin
311          CSV::Reader.parse(file, fs, rs) do |row|
312            yield(row)
313          end
314        ensure
315          file.close
316        end
317        nil
318      else
319        reader = CSV::Reader.create(file, fs, rs)
320        reader.close_on_terminate
321        reader
322      end
323    end
324
325    def open_writer(path, mode, fs, rs, &block)
326      file = File.open(path, mode)
327      if block
328        begin
329          CSV::Writer.generate(file, fs, rs) do |writer|
330            yield(writer)
331          end
332        ensure
333          file.close
334        end
335        nil
336      else
337        writer = CSV::Writer.create(file, fs, rs) 
338        writer.close_on_terminate
339        writer
340      end
341    end
342
343    def parse_body(src, idx, fs, rs)
344      fs_str = fs
345      fs_size = fs_str.size
346      rs_str = rs || "\n"
347      rs_size = rs_str.size
348      fs_idx = rs_idx = 0
349      cell = Cell.new
350      state = :ST_START
351      quoted = cr = false
352      c = nil
353      last_idx = idx
354      while c = src[idx]
355        unless quoted
356          fschar = (c == fs_str[fs_idx])
357          rschar = (c == rs_str[rs_idx])
358          # simple 1 char backtrack
359          if !fschar and c == fs_str[0]
360            fs_idx = 0
361            fschar = true
362            if state == :ST_START
363              state = :ST_DATA
364            elsif state == :ST_QUOTE
365              raise IllegalFormatError
366            end
367          end
368          if !rschar and c == rs_str[0]
369            rs_idx = 0
370            rschar = true
371            if state == :ST_START
372              state = :ST_DATA
373            elsif state == :ST_QUOTE
374              raise IllegalFormatError
375            end
376          end
377        end
378        if c == ?"
379          fs_idx = rs_idx = 0
380          if cr
381            raise IllegalFormatError
382          end
383          cell << src[last_idx, (idx - last_idx)]
384          last_idx = idx
385          if state == :ST_DATA
386            if quoted
387              last_idx += 1
388              quoted = false
389              state = :ST_QUOTE
390            else
391              raise IllegalFormatError
392            end
393          elsif state == :ST_QUOTE
394            cell << c.chr
395            last_idx += 1
396            quoted = true
397            state = :ST_DATA
398          else  # :ST_START
399            quoted = true
400            last_idx += 1
401            state = :ST_DATA
402          end
403        elsif fschar or rschar
404          if fschar
405            fs_idx += 1
406          end
407          if rschar
408            rs_idx += 1
409          end
410          sep = nil
411          if fs_idx == fs_size
412            if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
413              state = :ST_DATA
414            end
415            cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
416            last_idx = idx
417            fs_idx = rs_idx = 0
418            if cr
419              raise IllegalFormatError
420            end
421            sep = :DT_COLSEP
422          elsif rs_idx == rs_size
423            if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
424              state = :ST_DATA
425            end
426            if !(rs.nil? and cr)
427              cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
428              last_idx = idx
429            end
430            fs_idx = rs_idx = 0
431            sep = :DT_ROWSEP
432          end
433          if sep
434            if state == :ST_DATA
435              return sep, idx + 1, cell;
436            elsif state == :ST_QUOTE
437              return sep, idx + 1, cell;
438            else  # :ST_START
439              return sep, idx + 1, nil
440            end
441          end
442        elsif rs.nil? and c == ?\r
443          # special \r treatment for backward compatibility
444          fs_idx = rs_idx = 0
445          if cr
446            raise IllegalFormatError
447          end
448          cell << src[last_idx, (idx - last_idx)]
449          last_idx = idx
450          if quoted
451            state = :ST_DATA
452          else
453            cr = true
454          end
455        else
456          fs_idx = rs_idx = 0
457          if state == :ST_DATA or state == :ST_START
458            if cr
459              raise IllegalFormatError
460            end
461            state = :ST_DATA
462          else  # :ST_QUOTE
463            raise IllegalFormatError
464          end
465        end
466        idx += 1
467      end
468      if state == :ST_START
469        if fs_idx > 0 or rs_idx > 0
470          state = :ST_DATA
471        else
472          return :DT_EOS, idx, nil
473        end
474      elsif quoted
475        raise IllegalFormatError
476      elsif cr
477        raise IllegalFormatError
478      end
479      cell << src[last_idx, (idx - last_idx)]
480      last_idx = idx
481      return :DT_EOS, idx, cell
482    end
483  
484    def generate_body(cell, out_dev, fs, rs)
485      if cell.nil?
486        # empty
487      else
488        cell = cell.to_s
489        row_data = cell.dup
490        if (row_data.gsub!('"', '""') or
491            row_data.index(fs) or
492            (rs and row_data.index(rs)) or
493            (/[\r\n]/ =~ row_data) or
494            (cell.empty?))
495          out_dev << '"' << row_data << '"'
496        else
497          out_dev << row_data
498        end
499      end
500    end
501    
502    def generate_separator(type, out_dev, fs, rs)
503      case type
504      when :DT_COLSEP
505        out_dev << fs
506      when :DT_ROWSEP
507        out_dev << (rs || "\n")
508      end
509    end
510  end
511
512
513  # CSV formatted string/stream reader.
514  #
515  # EXAMPLE
516  #   read CSV lines untill the first column is 'stop'.
517  #
518  #   CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
519  #     p row
520  #     break if !row[0].is_null && row[0].data == 'stop'
521  #   end
522  #
523  class Reader
524    include Enumerable
525
526    # Parse CSV data and get lines.  Given block is called for each parsed row.
527    # Block value is always nil.  Rows are not cached for performance reason.
528    def Reader.parse(str_or_readable, fs = ',', rs = nil, &block)
529      reader = Reader.create(str_or_readable, fs, rs)
530      if block
531        reader.each do |row|
532          yield(row)
533        end
534        reader.close
535        nil
536      else
537        reader
538      end
539    end
540
541    # Returns reader instance.
542    def Reader.create(str_or_readable, fs = ',', rs = nil)
543      case str_or_readable
544      when IO
545        IOReader.new(str_or_readable, fs, rs)
546      when String
547        StringReader.new(str_or_readable, fs, rs)
548      else
549        IOReader.new(str_or_readable, fs, rs)
550      end
551    end
552
553    def each
554      while true
555        row = []
556        parsed_cells = get_row(row)
557        if parsed_cells == 0
558          break
559        end
560        yield(row)
561      end
562      nil
563    end
564
565    def shift
566      row = []
567      parsed_cells = get_row(row)
568      row
569    end
570
571    def close
572      terminate
573    end
574
575  private
576
577    def initialize(dev)
578      raise RuntimeError.new('Do not instanciate this class directly.')
579    end
580
581    def get_row(row)
582      raise NotImplementedError.new('Method get_row must be defined in a derived class.')
583    end
584
585    def terminate
586      # Define if needed.
587    end
588  end
589  
590
591  class StringReader < Reader
592    def initialize(string, fs = ',', rs = nil)
593      @fs = fs
594      @rs = rs
595      @dev = string
596      @idx = 0
597      if @dev[0, 3] == "\xef\xbb\xbf"
598        @idx += 3
599      end
600    end
601
602  private
603
604    def get_row(row)
605      parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
606      if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
607        raise IllegalFormatError.new
608      end
609      @idx = next_idx
610      parsed_cells
611    end
612  end
613
614
615  class IOReader < Reader
616    def initialize(io, fs = ',', rs = nil)
617      @io = io
618      @fs = fs
619      @rs = rs
620      @dev = CSV::IOBuf.new(@io)
621      @idx = 0
622      if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
623        @idx += 3
624      end
625      @close_on_terminate = false
626    end
627
628    # Tell this reader to close the IO when terminated (Triggered by invoking
629    # CSV::IOReader#close).
630    def close_on_terminate
631      @close_on_terminate = true
632    end
633
634  private
635
636    def get_row(row)
637      parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
638      if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos?
639        raise IllegalFormatError.new
640      end
641      dropped = @dev.drop(next_idx)
642      @idx = next_idx - dropped
643      parsed_cells
644    end
645
646    def terminate
647      if @close_on_terminate
648        @io.close
649      end
650
651      if @dev
652        @dev.close
653      end
654    end
655  end
656
657
658  # CSV formatted string/stream writer.
659  #
660  # EXAMPLE
661  #   Write rows to 'csvout' file.
662  #
663  #   outfile = File.open('csvout', 'wb')
664  #   CSV::Writer.generate(outfile) do |csv|
665  #     csv << ['c1', nil, '', '"', "\r\n", 'c2']
666  #     ...
667  #   end
668  #
669  #   outfile.close
670  #
671  class Writer
672    # Given block is called with the writer instance.  str_or_writable must
673    # handle '<<(string)'.
674    def Writer.generate(str_or_writable, fs = ',', rs = nil, &block)
675      writer = Writer.create(str_or_writable, fs, rs)
676      if block
677        yield(writer)
678        writer.close
679        nil
680      else
681        writer
682      end
683    end
684
685    # str_or_writable must handle '<<(string)'.
686    def Writer.create(str_or_writable, fs = ',', rs = nil)
687      BasicWriter.new(str_or_writable, fs, rs)
688    end
689
690    # dump CSV stream to the device.  argument must be an Array of String.
691    def <<(row)
692      CSV.generate_row(row, row.size, @dev, @fs, @rs)
693      self
694    end
695    alias add_row <<
696
697    def close
698      terminate
699    end
700
701  private
702
703    def initialize(dev)
704      raise RuntimeError.new('Do not instanciate this class directly.')
705    end
706
707    def terminate
708      # Define if needed.
709    end
710  end
711
712
713  class BasicWriter < Writer
714    def initialize(str_or_writable, fs = ',', rs = nil)
715      @fs = fs
716      @rs = rs
717      @dev = str_or_writable
718      @close_on_terminate = false
719    end
720
721    # Tell this writer to close the IO when terminated (Triggered by invoking
722    # CSV::BasicWriter#close).
723    def close_on_terminate
724      @close_on_terminate = true
725    end
726
727  private
728
729    def terminate
730      if @close_on_terminate
731        @dev.close
732      end
733    end
734  end
735
736private
737
738  # Buffered stream.
739  #
740  # EXAMPLE 1 -- an IO.
741  #   class MyBuf < StreamBuf
742  #     # Do initialize myself before a super class.  Super class might call my
743  #     # method 'read'. (Could be awful for C++ user. :-)
744  #     def initialize(s)
745  #       @s = s
746  #       super()
747  #     end
748  #
749  #     # define my own 'read' method.
750  #     # CAUTION: Returning nil means EnfOfStream.
751  #     def read(size)
752  #       @s.read(size)
753  #     end
754  #
755  #     # release buffers. in Ruby which has GC, you do not have to call this...
756  #     def terminate
757  #       @s = nil
758  #       super()
759  #     end
760  #   end
761  #
762  #   buf = MyBuf.new(STDIN)
763  #   my_str = ''
764  #   p buf[0, 0]               # => '' (null string)
765  #   p buf[0]                  # => 97 (char code of 'a')
766  #   p buf[0, 1]               # => 'a'
767  #   my_str = buf[0, 5]
768  #   p my_str                  # => 'abcde' (5 chars)
769  #   p buf[0, 6]               # => "abcde\n" (6 chars)
770  #   p buf[0, 7]               # => "abcde\n" (6 chars)
771  #   p buf.drop(3)             # => 3 (dropped chars)
772  #   p buf.get(0, 2)           # => 'de' (2 chars)
773  #   p buf.is_eos?             # => false (is not EOS here)
774  #   p buf.drop(5)             # => 3 (dropped chars)
775  #   p buf.is_eos?             # => true (is EOS here)
776  #   p buf[0]                  # => nil (is EOS here)
777  #
778  # EXAMPLE 2 -- String.
779  #   This is a conceptual example.  No pros with this.
780  #
781  #   class StrBuf < StreamBuf
782  #     def initialize(s)
783  #       @str = s
784  #       @idx = 0
785  #       super()
786  #     end
787  #
788  #     def read(size)
789  #       str = @str[@idx, size]
790  #       @idx += str.size
791  #       str
792  #     end
793  #   end
794  #
795  class StreamBuf
796    # get a char or a partial string from the stream.
797    # idx: index of a string to specify a start point of a string to get.
798    # unlike String instance, idx < 0 returns nil.
799    # n: size of a string to get.
800    # returns char at idx if n == nil.
801    # returns a partial string, from idx to (idx + n) if n != nil.  at EOF,
802    # the string size could not equal to arg n.
803    def [](idx, n = nil) 
804      if idx < 0
805        return nil
806      end
807      if (idx_is_eos?(idx))
808        if n and (@offset + idx == buf_size(@cur_buf))
809          # Like a String, 'abc'[4, 1] returns nil and
810          # 'abc'[3, 1] returns '' not nil.
811          return ''
812        else
813          return nil
814        end
815      end
816      my_buf = @cur_buf
817      my_offset = @offset
818      next_idx = idx
819      while (my_offset + next_idx >= buf_size(my_buf))
820        if (my_buf == @buf_tail_idx)
821          unless add_buf
822            break
823          end
824        end
825        next_idx = my_offset + next_idx - buf_size(my_buf)
826        my_buf += 1
827        my_offset = 0
828      end
829      loc = my_offset + next_idx
830      if !n
831        return @buf_list[my_buf][loc]           # Fixnum of char code.
832      elsif (loc + n - 1 < buf_size(my_buf))
833        return @buf_list[my_buf][loc, n]        # String.
834      else # should do loop insted of (tail) recursive call...
835        res = @buf_list[my_buf][loc, BufSize]
836        size_added = buf_size(my_buf) - loc
837        if size_added > 0
838          idx += size_added
839          n -= size_added
840          ret = self[idx, n]
841          if ret
842            res << ret
843          end
844        end
845        return res
846      end
847    end
848    alias get []
849  
850    # drop a string from the stream.
851    # returns dropped size.  at EOF, dropped size might not equals to arg n.
852    # Once you drop the head of the stream, access to the dropped part via []
853    # or get returns nil.
854    def drop(n)
855      if is_eos?
856        return 0
857      end
858      size_dropped = 0
859      while (n > 0)
860        if !@is_eos or (@cur_buf != @buf_tail_idx)
861          if (@offset + n < buf_size(@cur_buf))
862            size_dropped += n
863            @offset += n
864            n = 0
865          else
866            size = buf_size(@cur_buf) - @offset
867            size_dropped += size
868            n -= size
869            @offset = 0
870            unless rel_buf
871              unless add_buf
872                break
873              end
874              @cur_buf = @buf_tail_idx
875            end
876          end
877        end
878      end
879      size_dropped
880    end
881  
882    def is_eos?
883      return idx_is_eos?(0)
884    end
885  
886    # WARN: Do not instantiate this class directly.  Define your own class
887    # which derives this class and define 'read' instance method.
888    def initialize
889      @buf_list = []
890      @cur_buf = @buf_tail_idx = -1
891      @offset = 0
892      @is_eos = false
893      add_buf
894      @cur_buf = @buf_tail_idx
895    end
896  
897  protected
898
899    def terminate
900      while (rel_buf); end
901    end
902  
903    # protected method 'read' must be defined in derived classes.
904    # CAUTION: Returning a string which size is not equal to 'size' means
905    # EnfOfStream.  When it is not at EOS, you must block the callee, try to
906    # read and return the sized string.
907    def read(size) # raise EOFError
908      raise NotImplementedError.new('Method read must be defined in a derived class.')
909    end
910  
911  private
912  
913    def buf_size(idx)
914      @buf_list[idx].size
915    end
916
917    def add_buf
918      if @is_eos
919        return false
920      end
921      begin
922        str_read = read(BufSize)
923      rescue EOFError
924        str_read = nil
925      rescue
926        terminate
927        raise
928      end
929      if str_read.nil?
930        @is_eos = true
931        @buf_list.push('')
932        @buf_tail_idx += 1
933        false
934      else
935        @buf_list.push(str_read)
936        @buf_tail_idx += 1
937        true
938      end
939    end
940  
941    def rel_buf
942      if (@cur_buf < 0)
943        return false
944      end
945      @buf_list[@cur_buf] = nil
946      if (@cur_buf == @buf_tail_idx)
947        @cur_buf = -1
948        return false
949      else
950        @cur_buf += 1
951        return true
952      end
953    end
954  
955    def idx_is_eos?(idx)
956      (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
957    end
958  
959    BufSize = 1024 * 8
960  end
961
962  # Buffered IO.
963  #
964  # EXAMPLE
965  #   # File 'bigdata' could be a giga-byte size one!
966  #   buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
967  #   CSV::Reader.new(buf).each do |row|
968  #     p row
969  #     break if row[0].data == 'admin'
970  #   end
971  #
972  class IOBuf < StreamBuf
973    def initialize(s)
974      @s = s
975      super()
976    end
977  
978    def close
979      terminate
980    end
981
982  private
983
984    def read(size)
985      @s.read(size)
986    end
987 
988    def terminate
989      super()
990    end
991  end
992end