/vendor/ruby/2.2.0/gems/roo-2.7.1/lib/roo/excelx/shared_strings.rb
Ruby | 157 lines | 116 code | 14 blank | 27 comment | 2 complexity | 7e13bdbc026fceb7d1b9b2619a29efaa MD5 | raw file
Possible License(s): MIT, CC-BY-SA-3.0, BSD-3-Clause, 0BSD, Apache-2.0, JSON
- require 'roo/excelx/extractor'
- module Roo
- class Excelx
- class SharedStrings < Excelx::Extractor
- COMMON_STRINGS = {
- t: "t",
- r: "r",
- html_tag_open: "<html>",
- html_tag_closed: "</html>"
- }
- def [](index)
- to_a[index]
- end
- def to_a
- @array ||= extract_shared_strings
- end
- def to_html
- @html ||= extract_html
- end
- # Use to_html or to_a for html returns
- # See what is happening with commit???
- def use_html?(index)
- to_html[index][/<([biu]|sup|sub)>/]
- end
- private
- def fix_invalid_shared_strings(doc)
- invalid = { '_x000D_' => "\n" }
- xml = doc.to_s
- return doc unless xml[/#{invalid.keys.join('|')}/]
- ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
- end
- def extract_shared_strings
- return [] unless doc_exists?
- document = fix_invalid_shared_strings(doc)
- # read the shared strings xml document
- document.xpath('/sst/si').map do |si|
- shared_string = ''
- si.children.each do |elem|
- case elem.name
- when 'r'
- elem.children.each do |r_elem|
- shared_string << r_elem.content if r_elem.name == 't'
- end
- when 't'
- shared_string = elem.content
- end
- end
- shared_string
- end
- end
- def extract_html
- return [] unless doc_exists?
- fix_invalid_shared_strings(doc)
- # read the shared strings xml document
- doc.xpath('/sst/si').map do |si|
- html_string = '<html>'
- si.children.each do |elem|
- case elem.name
- when 'r'
- html_string << extract_html_r(elem)
- when 't'
- html_string << elem.content
- end # case elem.name
- end # si.children.each do |elem|
- html_string << '</html>'
- end # doc.xpath('/sst/si').map do |si|
- end # def extract_html
- # The goal of this function is to take the following XML code snippet and create a html tag
- # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
- # {code:xml}
- # <r>
- # <rPr>
- # <i/>
- # <b/>
- # <u/>
- # <vertAlign val="subscript"/>
- # <vertAlign val="superscript"/>
- # </rPr>
- # <t>TEXT</t>
- # </r>
- # {code}
- #
- # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
- def extract_html_r(r_elem)
- str = ''
- xml_elems = {
- sub: false,
- sup: false,
- b: false,
- i: false,
- u: false
- }
- b, i, u, sub, sup = false, false, false, false, false
- r_elem.children.each do |elem|
- case elem.name
- when 'rPr'
- elem.children.each do |rPr_elem|
- case rPr_elem.name
- when 'b'
- # set formatting for Bold to true
- xml_elems[:b] = true
- when 'i'
- # set formatting for Italics to true
- xml_elems[:i] = true
- when 'u'
- # set formatting for Underline to true
- xml_elems[:u] = true
- when 'vertAlign'
- # See if the Vertical Alignment is subscript or superscript
- case rPr_elem.xpath('@val').first.value
- when 'subscript'
- # set formatting for Subscript to true and Superscript to false ... Can't have both
- xml_elems[:sub] = true
- xml_elems[:sup] = false
- when 'superscript'
- # set formatting for Superscript to true and Subscript to false ... Can't have both
- xml_elems[:sup] = true
- xml_elems[:sub] = false
- end
- end
- end
- when 't'
- str << create_html(elem.content, xml_elems)
- end
- end
- str
- end # extract_html_r
- # This will return an html string
- def create_html(text, formatting)
- tmp_str = ''
- formatting.each do |elem, val|
- tmp_str << "<#{elem}>" if val
- end
- tmp_str << text
- reverse_format = Hash[formatting.to_a.reverse]
- reverse_format.each do |elem, val|
- tmp_str << "</#{elem}>" if val
- end
- tmp_str
- end
- end # class SharedStrings < Excelx::Extractor
- end # class Excelx
- end # module Roo