/tools/Ruby/lib/ruby/1.8/csv.rb

http://github.com/agross/netopenspace · Ruby · 992 lines · 662 code · 77 blank · 253 comment · 134 complexity · c1c16c40f610947607ac030347bad4e5 MD5 · raw file

  1. # CSV -- module for generating/parsing CSV data.
  2. # Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
  3. # $Id: csv.rb 11708 2007-02-12 23:01:19Z shyouhei $
  4. # This program is copyrighted free software by NAKAMURA, Hiroshi. You can
  5. # redistribute it and/or modify it under the same terms of Ruby's license;
  6. # either the dual license version in 2003, or any later version.
  7. class CSV
  8. class IllegalFormatError < RuntimeError; end
  9. # deprecated
  10. class Cell < String
  11. def initialize(data = "", is_null = false)
  12. super(is_null ? "" : data)
  13. end
  14. def data
  15. to_s
  16. end
  17. end
  18. # deprecated
  19. class Row < Array
  20. end
  21. # Open a CSV formatted file for reading or writing.
  22. #
  23. # For reading.
  24. #
  25. # EXAMPLE 1
  26. # CSV.open('csvfile.csv', 'r') do |row|
  27. # p row
  28. # end
  29. #
  30. # EXAMPLE 2
  31. # reader = CSV.open('csvfile.csv', 'r')
  32. # row1 = reader.shift
  33. # row2 = reader.shift
  34. # if row2.empty?
  35. # p 'row2 not find.'
  36. # end
  37. # reader.close
  38. #
  39. # ARGS
  40. # filename: filename to parse.
  41. # col_sep: Column separator. ?, by default. If you want to separate
  42. # fields with semicolon, give ?; here.
  43. # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
  44. # want to separate records with \r, give ?\r here.
  45. #
  46. # RETURNS
  47. # reader instance. To get parse result, see CSV::Reader#each.
  48. #
  49. #
  50. # For writing.
  51. #
  52. # EXAMPLE 1
  53. # CSV.open('csvfile.csv', 'w') do |writer|
  54. # writer << ['r1c1', 'r1c2']
  55. # writer << ['r2c1', 'r2c2']
  56. # writer << [nil, nil]
  57. # end
  58. #
  59. # EXAMPLE 2
  60. # writer = CSV.open('csvfile.csv', 'w')
  61. # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
  62. # writer.close
  63. #
  64. # ARGS
  65. # filename: filename to generate.
  66. # col_sep: Column separator. ?, by default. If you want to separate
  67. # fields with semicolon, give ?; here.
  68. # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
  69. # want to separate records with \r, give ?\r here.
  70. #
  71. # RETURNS
  72. # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
  73. # to generate CSV string.
  74. #
  75. def CSV.open(path, mode, fs = nil, rs = nil, &block)
  76. if mode == 'r' or mode == 'rb'
  77. open_reader(path, mode, fs, rs, &block)
  78. elsif mode == 'w' or mode == 'wb'
  79. open_writer(path, mode, fs, rs, &block)
  80. else
  81. raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
  82. end
  83. end
  84. def CSV.foreach(path, rs = nil, &block)
  85. open_reader(path, 'r', ',', rs, &block)
  86. end
  87. def CSV.read(path, length = nil, offset = nil)
  88. CSV.parse(IO.read(path, length, offset))
  89. end
  90. def CSV.readlines(path, rs = nil)
  91. reader = open_reader(path, 'r', ',', rs)
  92. begin
  93. reader.collect { |row| row }
  94. ensure
  95. reader.close
  96. end
  97. end
  98. def CSV.generate(path, fs = nil, rs = nil, &block)
  99. open_writer(path, 'w', fs, rs, &block)
  100. end
  101. # Parse lines from given string or stream. Return rows as an Array of Arrays.
  102. def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
  103. if File.exist?(str_or_readable)
  104. STDERR.puts("CSV.parse(filename) is deprecated." +
  105. " Use CSV.open(filename, 'r') instead.")
  106. return open_reader(str_or_readable, 'r', fs, rs, &block)
  107. end
  108. if block
  109. CSV::Reader.parse(str_or_readable, fs, rs) do |row|
  110. yield(row)
  111. end
  112. nil
  113. else
  114. CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
  115. end
  116. end
  117. # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
  118. # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
  119. # second line 'c,d' is ignored.
  120. #
  121. # If you don't know whether a target string to parse is exactly 1 line or
  122. # not, use CSV.parse_row instead of this method.
  123. def CSV.parse_line(src, fs = nil, rs = nil)
  124. fs ||= ','
  125. if fs.is_a?(Fixnum)
  126. fs = fs.chr
  127. end
  128. if !rs.nil? and rs.is_a?(Fixnum)
  129. rs = rs.chr
  130. end
  131. idx = 0
  132. res_type = :DT_COLSEP
  133. row = []
  134. begin
  135. while res_type == :DT_COLSEP
  136. res_type, idx, cell = parse_body(src, idx, fs, rs)
  137. row << cell
  138. end
  139. rescue IllegalFormatError
  140. return []
  141. end
  142. row
  143. end
  144. # Create a line from cells. each cell is stringified by to_s.
  145. def CSV.generate_line(row, fs = nil, rs = nil)
  146. if row.size == 0
  147. return ''
  148. end
  149. fs ||= ','
  150. if fs.is_a?(Fixnum)
  151. fs = fs.chr
  152. end
  153. if !rs.nil? and rs.is_a?(Fixnum)
  154. rs = rs.chr
  155. end
  156. res_type = :DT_COLSEP
  157. result_str = ''
  158. idx = 0
  159. while true
  160. generate_body(row[idx], result_str, fs, rs)
  161. idx += 1
  162. if (idx == row.size)
  163. break
  164. end
  165. generate_separator(:DT_COLSEP, result_str, fs, rs)
  166. end
  167. result_str
  168. end
  169. # Parse a line from string. Consider using CSV.parse_line instead.
  170. # To parse lines in CSV string, see EXAMPLE below.
  171. #
  172. # EXAMPLE
  173. # src = "a,b\r\nc,d\r\ne,f"
  174. # idx = 0
  175. # begin
  176. # parsed = []
  177. # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
  178. # puts "Parsed #{ parsed_cells } cells."
  179. # p parsed
  180. # end while parsed_cells > 0
  181. #
  182. # ARGS
  183. # src: a CSV data to be parsed. Must respond '[](idx)'.
  184. # src[](idx) must return a char. (Not a string such as 'a', but 97).
  185. # src[](idx_out_of_bounds) must return nil. A String satisfies this
  186. # requirement.
  187. # idx: index of parsing location of 'src'. 0 origin.
  188. # out_dev: buffer for parsed cells. Must respond '<<(aString)'.
  189. # col_sep: Column separator. ?, by default. If you want to separate
  190. # fields with semicolon, give ?; here.
  191. # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
  192. # want to separate records with \r, give ?\r here.
  193. #
  194. # RETURNS
  195. # parsed_cells: num of parsed cells.
  196. # idx: index of next parsing location of 'src'.
  197. #
  198. def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
  199. fs ||= ','
  200. if fs.is_a?(Fixnum)
  201. fs = fs.chr
  202. end
  203. if !rs.nil? and rs.is_a?(Fixnum)
  204. rs = rs.chr
  205. end
  206. idx_backup = idx
  207. parsed_cells = 0
  208. res_type = :DT_COLSEP
  209. begin
  210. while res_type != :DT_ROWSEP
  211. res_type, idx, cell = parse_body(src, idx, fs, rs)
  212. if res_type == :DT_EOS
  213. if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
  214. return 0, 0
  215. end
  216. res_type = :DT_ROWSEP
  217. end
  218. parsed_cells += 1
  219. out_dev << cell
  220. end
  221. rescue IllegalFormatError
  222. return 0, 0
  223. end
  224. return parsed_cells, idx
  225. end
  226. # Convert a line from cells data to string. Consider using CSV.generate_line
  227. # instead. To generate multi-row CSV string, see EXAMPLE below.
  228. #
  229. # EXAMPLE
  230. # row1 = ['a', 'b']
  231. # row2 = ['c', 'd']
  232. # row3 = ['e', 'f']
  233. # src = [row1, row2, row3]
  234. # buf = ''
  235. # src.each do |row|
  236. # parsed_cells = CSV.generate_row(row, 2, buf)
  237. # puts "Created #{ parsed_cells } cells."
  238. # end
  239. # p buf
  240. #
  241. # ARGS
  242. # src: an Array of String to be converted to CSV string. Must respond to
  243. # 'size' and '[](idx)'. src[idx] must return String.
  244. # cells: num of cells in a line.
  245. # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
  246. # col_sep: Column separator. ?, by default. If you want to separate
  247. # fields with semicolon, give ?; here.
  248. # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
  249. # want to separate records with \r, give ?\r here.
  250. #
  251. # RETURNS
  252. # parsed_cells: num of converted cells.
  253. #
  254. def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
  255. fs ||= ','
  256. if fs.is_a?(Fixnum)
  257. fs = fs.chr
  258. end
  259. if !rs.nil? and rs.is_a?(Fixnum)
  260. rs = rs.chr
  261. end
  262. src_size = src.size
  263. if (src_size == 0)
  264. if cells == 0
  265. generate_separator(:DT_ROWSEP, out_dev, fs, rs)
  266. end
  267. return 0
  268. end
  269. res_type = :DT_COLSEP
  270. parsed_cells = 0
  271. generate_body(src[parsed_cells], out_dev, fs, rs)
  272. parsed_cells += 1
  273. while ((parsed_cells < cells) and (parsed_cells != src_size))
  274. generate_separator(:DT_COLSEP, out_dev, fs, rs)
  275. generate_body(src[parsed_cells], out_dev, fs, rs)
  276. parsed_cells += 1
  277. end
  278. if (parsed_cells == cells)
  279. generate_separator(:DT_ROWSEP, out_dev, fs, rs)
  280. else
  281. generate_separator(:DT_COLSEP, out_dev, fs, rs)
  282. end
  283. parsed_cells
  284. end
  285. # Private class methods.
  286. class << self
  287. private
  288. def open_reader(path, mode, fs, rs, &block)
  289. file = File.open(path, mode)
  290. if block
  291. begin
  292. CSV::Reader.parse(file, fs, rs) do |row|
  293. yield(row)
  294. end
  295. ensure
  296. file.close
  297. end
  298. nil
  299. else
  300. reader = CSV::Reader.create(file, fs, rs)
  301. reader.close_on_terminate
  302. reader
  303. end
  304. end
  305. def open_writer(path, mode, fs, rs, &block)
  306. file = File.open(path, mode)
  307. if block
  308. begin
  309. CSV::Writer.generate(file, fs, rs) do |writer|
  310. yield(writer)
  311. end
  312. ensure
  313. file.close
  314. end
  315. nil
  316. else
  317. writer = CSV::Writer.create(file, fs, rs)
  318. writer.close_on_terminate
  319. writer
  320. end
  321. end
  322. def parse_body(src, idx, fs, rs)
  323. fs_str = fs
  324. fs_size = fs_str.size
  325. rs_str = rs || "\n"
  326. rs_size = rs_str.size
  327. fs_idx = rs_idx = 0
  328. cell = Cell.new
  329. state = :ST_START
  330. quoted = cr = false
  331. c = nil
  332. last_idx = idx
  333. while c = src[idx]
  334. unless quoted
  335. fschar = (c == fs_str[fs_idx])
  336. rschar = (c == rs_str[rs_idx])
  337. # simple 1 char backtrack
  338. if !fschar and c == fs_str[0]
  339. fs_idx = 0
  340. fschar = true
  341. if state == :ST_START
  342. state = :ST_DATA
  343. elsif state == :ST_QUOTE
  344. raise IllegalFormatError
  345. end
  346. end
  347. if !rschar and c == rs_str[0]
  348. rs_idx = 0
  349. rschar = true
  350. if state == :ST_START
  351. state = :ST_DATA
  352. elsif state == :ST_QUOTE
  353. raise IllegalFormatError
  354. end
  355. end
  356. end
  357. if c == ?"
  358. fs_idx = rs_idx = 0
  359. if cr
  360. raise IllegalFormatError
  361. end
  362. cell << src[last_idx, (idx - last_idx)]
  363. last_idx = idx
  364. if state == :ST_DATA
  365. if quoted
  366. last_idx += 1
  367. quoted = false
  368. state = :ST_QUOTE
  369. else
  370. raise IllegalFormatError
  371. end
  372. elsif state == :ST_QUOTE
  373. cell << c.chr
  374. last_idx += 1
  375. quoted = true
  376. state = :ST_DATA
  377. else # :ST_START
  378. quoted = true
  379. last_idx += 1
  380. state = :ST_DATA
  381. end
  382. elsif fschar or rschar
  383. if fschar
  384. fs_idx += 1
  385. end
  386. if rschar
  387. rs_idx += 1
  388. end
  389. sep = nil
  390. if fs_idx == fs_size
  391. if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
  392. state = :ST_DATA
  393. end
  394. cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
  395. last_idx = idx
  396. fs_idx = rs_idx = 0
  397. if cr
  398. raise IllegalFormatError
  399. end
  400. sep = :DT_COLSEP
  401. elsif rs_idx == rs_size
  402. if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
  403. state = :ST_DATA
  404. end
  405. if !(rs.nil? and cr)
  406. cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
  407. last_idx = idx
  408. end
  409. fs_idx = rs_idx = 0
  410. sep = :DT_ROWSEP
  411. end
  412. if sep
  413. if state == :ST_DATA
  414. return sep, idx + 1, cell;
  415. elsif state == :ST_QUOTE
  416. return sep, idx + 1, cell;
  417. else # :ST_START
  418. return sep, idx + 1, nil
  419. end
  420. end
  421. elsif rs.nil? and c == ?\r
  422. # special \r treatment for backward compatibility
  423. fs_idx = rs_idx = 0
  424. if cr
  425. raise IllegalFormatError
  426. end
  427. cell << src[last_idx, (idx - last_idx)]
  428. last_idx = idx
  429. if quoted
  430. state = :ST_DATA
  431. else
  432. cr = true
  433. end
  434. else
  435. fs_idx = rs_idx = 0
  436. if state == :ST_DATA or state == :ST_START
  437. if cr
  438. raise IllegalFormatError
  439. end
  440. state = :ST_DATA
  441. else # :ST_QUOTE
  442. raise IllegalFormatError
  443. end
  444. end
  445. idx += 1
  446. end
  447. if state == :ST_START
  448. if fs_idx > 0 or rs_idx > 0
  449. state = :ST_DATA
  450. else
  451. return :DT_EOS, idx, nil
  452. end
  453. elsif quoted
  454. raise IllegalFormatError
  455. elsif cr
  456. raise IllegalFormatError
  457. end
  458. cell << src[last_idx, (idx - last_idx)]
  459. last_idx = idx
  460. return :DT_EOS, idx, cell
  461. end
  462. def generate_body(cell, out_dev, fs, rs)
  463. if cell.nil?
  464. # empty
  465. else
  466. cell = cell.to_s
  467. row_data = cell.dup
  468. if (row_data.gsub!('"', '""') or
  469. row_data.index(fs) or
  470. (rs and row_data.index(rs)) or
  471. (/[\r\n]/ =~ row_data) or
  472. (cell.empty?))
  473. out_dev << '"' << row_data << '"'
  474. else
  475. out_dev << row_data
  476. end
  477. end
  478. end
  479. def generate_separator(type, out_dev, fs, rs)
  480. case type
  481. when :DT_COLSEP
  482. out_dev << fs
  483. when :DT_ROWSEP
  484. out_dev << (rs || "\n")
  485. end
  486. end
  487. end
  488. # CSV formatted string/stream reader.
  489. #
  490. # EXAMPLE
  491. # read CSV lines untill the first column is 'stop'.
  492. #
  493. # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
  494. # p row
  495. # break if !row[0].is_null && row[0].data == 'stop'
  496. # end
  497. #
  498. class Reader
  499. include Enumerable
  500. # Parse CSV data and get lines. Given block is called for each parsed row.
  501. # Block value is always nil. Rows are not cached for performance reason.
  502. def Reader.parse(str_or_readable, fs = ',', rs = nil, &block)
  503. reader = Reader.create(str_or_readable, fs, rs)
  504. if block
  505. reader.each do |row|
  506. yield(row)
  507. end
  508. reader.close
  509. nil
  510. else
  511. reader
  512. end
  513. end
  514. # Returns reader instance.
  515. def Reader.create(str_or_readable, fs = ',', rs = nil)
  516. case str_or_readable
  517. when IO
  518. IOReader.new(str_or_readable, fs, rs)
  519. when String
  520. StringReader.new(str_or_readable, fs, rs)
  521. else
  522. IOReader.new(str_or_readable, fs, rs)
  523. end
  524. end
  525. def each
  526. while true
  527. row = []
  528. parsed_cells = get_row(row)
  529. if parsed_cells == 0
  530. break
  531. end
  532. yield(row)
  533. end
  534. nil
  535. end
  536. def shift
  537. row = []
  538. parsed_cells = get_row(row)
  539. row
  540. end
  541. def close
  542. terminate
  543. end
  544. private
  545. def initialize(dev)
  546. raise RuntimeError.new('Do not instanciate this class directly.')
  547. end
  548. def get_row(row)
  549. raise NotImplementedError.new('Method get_row must be defined in a derived class.')
  550. end
  551. def terminate
  552. # Define if needed.
  553. end
  554. end
  555. class StringReader < Reader
  556. def initialize(string, fs = ',', rs = nil)
  557. @fs = fs
  558. @rs = rs
  559. @dev = string
  560. @idx = 0
  561. if @dev[0, 3] == "\xef\xbb\xbf"
  562. @idx += 3
  563. end
  564. end
  565. private
  566. def get_row(row)
  567. parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
  568. if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
  569. raise IllegalFormatError.new
  570. end
  571. @idx = next_idx
  572. parsed_cells
  573. end
  574. end
  575. class IOReader < Reader
  576. def initialize(io, fs = ',', rs = nil)
  577. @io = io
  578. @fs = fs
  579. @rs = rs
  580. @dev = CSV::IOBuf.new(@io)
  581. @idx = 0
  582. if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
  583. @idx += 3
  584. end
  585. @close_on_terminate = false
  586. end
  587. # Tell this reader to close the IO when terminated (Triggered by invoking
  588. # CSV::IOReader#close).
  589. def close_on_terminate
  590. @close_on_terminate = true
  591. end
  592. private
  593. def get_row(row)
  594. parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
  595. if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos?
  596. raise IllegalFormatError.new
  597. end
  598. dropped = @dev.drop(next_idx)
  599. @idx = next_idx - dropped
  600. parsed_cells
  601. end
  602. def terminate
  603. if @close_on_terminate
  604. @io.close
  605. end
  606. if @dev
  607. @dev.close
  608. end
  609. end
  610. end
  611. # CSV formatted string/stream writer.
  612. #
  613. # EXAMPLE
  614. # Write rows to 'csvout' file.
  615. #
  616. # outfile = File.open('csvout', 'wb')
  617. # CSV::Writer.generate(outfile) do |csv|
  618. # csv << ['c1', nil, '', '"', "\r\n", 'c2']
  619. # ...
  620. # end
  621. #
  622. # outfile.close
  623. #
  624. class Writer
  625. # Given block is called with the writer instance. str_or_writable must
  626. # handle '<<(string)'.
  627. def Writer.generate(str_or_writable, fs = ',', rs = nil, &block)
  628. writer = Writer.create(str_or_writable, fs, rs)
  629. if block
  630. yield(writer)
  631. writer.close
  632. nil
  633. else
  634. writer
  635. end
  636. end
  637. # str_or_writable must handle '<<(string)'.
  638. def Writer.create(str_or_writable, fs = ',', rs = nil)
  639. BasicWriter.new(str_or_writable, fs, rs)
  640. end
  641. # dump CSV stream to the device. argument must be an Array of String.
  642. def <<(row)
  643. CSV.generate_row(row, row.size, @dev, @fs, @rs)
  644. self
  645. end
  646. alias add_row <<
  647. def close
  648. terminate
  649. end
  650. private
  651. def initialize(dev)
  652. raise RuntimeError.new('Do not instanciate this class directly.')
  653. end
  654. def terminate
  655. # Define if needed.
  656. end
  657. end
  658. class BasicWriter < Writer
  659. def initialize(str_or_writable, fs = ',', rs = nil)
  660. @fs = fs
  661. @rs = rs
  662. @dev = str_or_writable
  663. @close_on_terminate = false
  664. end
  665. # Tell this writer to close the IO when terminated (Triggered by invoking
  666. # CSV::BasicWriter#close).
  667. def close_on_terminate
  668. @close_on_terminate = true
  669. end
  670. private
  671. def terminate
  672. if @close_on_terminate
  673. @dev.close
  674. end
  675. end
  676. end
  677. private
  678. # Buffered stream.
  679. #
  680. # EXAMPLE 1 -- an IO.
  681. # class MyBuf < StreamBuf
  682. # # Do initialize myself before a super class. Super class might call my
  683. # # method 'read'. (Could be awful for C++ user. :-)
  684. # def initialize(s)
  685. # @s = s
  686. # super()
  687. # end
  688. #
  689. # # define my own 'read' method.
  690. # # CAUTION: Returning nil means EnfOfStream.
  691. # def read(size)
  692. # @s.read(size)
  693. # end
  694. #
  695. # # release buffers. in Ruby which has GC, you do not have to call this...
  696. # def terminate
  697. # @s = nil
  698. # super()
  699. # end
  700. # end
  701. #
  702. # buf = MyBuf.new(STDIN)
  703. # my_str = ''
  704. # p buf[0, 0] # => '' (null string)
  705. # p buf[0] # => 97 (char code of 'a')
  706. # p buf[0, 1] # => 'a'
  707. # my_str = buf[0, 5]
  708. # p my_str # => 'abcde' (5 chars)
  709. # p buf[0, 6] # => "abcde\n" (6 chars)
  710. # p buf[0, 7] # => "abcde\n" (6 chars)
  711. # p buf.drop(3) # => 3 (dropped chars)
  712. # p buf.get(0, 2) # => 'de' (2 chars)
  713. # p buf.is_eos? # => false (is not EOS here)
  714. # p buf.drop(5) # => 3 (dropped chars)
  715. # p buf.is_eos? # => true (is EOS here)
  716. # p buf[0] # => nil (is EOS here)
  717. #
  718. # EXAMPLE 2 -- String.
  719. # This is a conceptual example. No pros with this.
  720. #
  721. # class StrBuf < StreamBuf
  722. # def initialize(s)
  723. # @str = s
  724. # @idx = 0
  725. # super()
  726. # end
  727. #
  728. # def read(size)
  729. # str = @str[@idx, size]
  730. # @idx += str.size
  731. # str
  732. # end
  733. # end
  734. #
  735. class StreamBuf
  736. # get a char or a partial string from the stream.
  737. # idx: index of a string to specify a start point of a string to get.
  738. # unlike String instance, idx < 0 returns nil.
  739. # n: size of a string to get.
  740. # returns char at idx if n == nil.
  741. # returns a partial string, from idx to (idx + n) if n != nil. at EOF,
  742. # the string size could not equal to arg n.
  743. def [](idx, n = nil)
  744. if idx < 0
  745. return nil
  746. end
  747. if (idx_is_eos?(idx))
  748. if n and (@offset + idx == buf_size(@cur_buf))
  749. # Like a String, 'abc'[4, 1] returns nil and
  750. # 'abc'[3, 1] returns '' not nil.
  751. return ''
  752. else
  753. return nil
  754. end
  755. end
  756. my_buf = @cur_buf
  757. my_offset = @offset
  758. next_idx = idx
  759. while (my_offset + next_idx >= buf_size(my_buf))
  760. if (my_buf == @buf_tail_idx)
  761. unless add_buf
  762. break
  763. end
  764. end
  765. next_idx = my_offset + next_idx - buf_size(my_buf)
  766. my_buf += 1
  767. my_offset = 0
  768. end
  769. loc = my_offset + next_idx
  770. if !n
  771. return @buf_list[my_buf][loc] # Fixnum of char code.
  772. elsif (loc + n - 1 < buf_size(my_buf))
  773. return @buf_list[my_buf][loc, n] # String.
  774. else # should do loop insted of (tail) recursive call...
  775. res = @buf_list[my_buf][loc, BufSize]
  776. size_added = buf_size(my_buf) - loc
  777. if size_added > 0
  778. idx += size_added
  779. n -= size_added
  780. ret = self[idx, n]
  781. if ret
  782. res << ret
  783. end
  784. end
  785. return res
  786. end
  787. end
  788. alias get []
  789. # drop a string from the stream.
  790. # returns dropped size. at EOF, dropped size might not equals to arg n.
  791. # Once you drop the head of the stream, access to the dropped part via []
  792. # or get returns nil.
  793. def drop(n)
  794. if is_eos?
  795. return 0
  796. end
  797. size_dropped = 0
  798. while (n > 0)
  799. if !@is_eos or (@cur_buf != @buf_tail_idx)
  800. if (@offset + n < buf_size(@cur_buf))
  801. size_dropped += n
  802. @offset += n
  803. n = 0
  804. else
  805. size = buf_size(@cur_buf) - @offset
  806. size_dropped += size
  807. n -= size
  808. @offset = 0
  809. unless rel_buf
  810. unless add_buf
  811. break
  812. end
  813. @cur_buf = @buf_tail_idx
  814. end
  815. end
  816. end
  817. end
  818. size_dropped
  819. end
  820. def is_eos?
  821. return idx_is_eos?(0)
  822. end
  823. # WARN: Do not instantiate this class directly. Define your own class
  824. # which derives this class and define 'read' instance method.
  825. def initialize
  826. @buf_list = []
  827. @cur_buf = @buf_tail_idx = -1
  828. @offset = 0
  829. @is_eos = false
  830. add_buf
  831. @cur_buf = @buf_tail_idx
  832. end
  833. protected
  834. def terminate
  835. while (rel_buf); end
  836. end
  837. # protected method 'read' must be defined in derived classes.
  838. # CAUTION: Returning a string which size is not equal to 'size' means
  839. # EnfOfStream. When it is not at EOS, you must block the callee, try to
  840. # read and return the sized string.
  841. def read(size) # raise EOFError
  842. raise NotImplementedError.new('Method read must be defined in a derived class.')
  843. end
  844. private
  845. def buf_size(idx)
  846. @buf_list[idx].size
  847. end
  848. def add_buf
  849. if @is_eos
  850. return false
  851. end
  852. begin
  853. str_read = read(BufSize)
  854. rescue EOFError
  855. str_read = nil
  856. rescue
  857. terminate
  858. raise
  859. end
  860. if str_read.nil?
  861. @is_eos = true
  862. @buf_list.push('')
  863. @buf_tail_idx += 1
  864. false
  865. else
  866. @buf_list.push(str_read)
  867. @buf_tail_idx += 1
  868. true
  869. end
  870. end
  871. def rel_buf
  872. if (@cur_buf < 0)
  873. return false
  874. end
  875. @buf_list[@cur_buf] = nil
  876. if (@cur_buf == @buf_tail_idx)
  877. @cur_buf = -1
  878. return false
  879. else
  880. @cur_buf += 1
  881. return true
  882. end
  883. end
  884. def idx_is_eos?(idx)
  885. (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
  886. end
  887. BufSize = 1024 * 8
  888. end
  889. # Buffered IO.
  890. #
  891. # EXAMPLE
  892. # # File 'bigdata' could be a giga-byte size one!
  893. # buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
  894. # CSV::Reader.new(buf).each do |row|
  895. # p row
  896. # break if row[0].data == 'admin'
  897. # end
  898. #
  899. class IOBuf < StreamBuf
  900. def initialize(s)
  901. @s = s
  902. super()
  903. end
  904. def close
  905. terminate
  906. end
  907. private
  908. def read(size)
  909. @s.read(size)
  910. end
  911. def terminate
  912. super()
  913. end
  914. end
  915. end