PageRenderTime 42ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/vimfiles/ruby/tbibtools.rb

https://bitbucket.org/halostatue/vim-config
Ruby | 927 lines | 711 code | 91 blank | 125 comment | 84 complexity | 8fca05fba3721f88715f1c7ede740074 MD5 | raw file
Possible License(s): GPL-2.0
  1. #!/usr/bin/env ruby
  2. # tbibtools.rb -- bibtex-related utilities
  3. # @Author: Thomas Link (samul AT web de)
  4. # @License: GPL (see http://www.gnu.org/licenses/gpl.txt)
  5. # @Created: 2007-03-28.
  6. # @Last Change: 2007-07-05.
  7. # @Revision: 0.2.731
  8. #
  9. # This file provides the class TBibTools that can be used to sort and
  10. # process bibtex files, list bibtex keys etc.
  11. #
  12. # Please be aware though that TBibTools#simple_bibtex_parser makes a few
  13. # assumptions about the bibtex file. So it's quite possible that it will
  14. # fail in some occasions. This is rather a quick hack than a real
  15. # parser.
  16. require 'optparse'
  17. require 'rbconfig'
  18. class TBibTools
  19. # Some of this class's methods can be used in the configuration
  20. # file. Please see the examples in the methods' documentation.
  21. #
  22. # New formatting options can be defined with the methods: shortcut
  23. # and def_*.
  24. #
  25. # The formatting options (methods matching preprocess_*, head_*, body_*,
  26. # tail_*, format_*, shortcut_*) are defined here, too.
  27. class Configuration
  28. attr_accessor :case_sensitive
  29. attr_accessor :entry_format
  30. attr_accessor :entry_format_default
  31. attr_accessor :sort
  32. attr_accessor :input_files
  33. attr_accessor :output_file
  34. attr_accessor :filter_rx
  35. attr_accessor :ignore_fields
  36. attr_accessor :list_format_string
  37. attr_accessor :strings_expansion
  38. attr_accessor :keys_order
  39. attr_accessor :stripPrelude
  40. attr_accessor :query_rx
  41. def initialize(tbibtools)
  42. @tbibtools = tbibtools
  43. @case_sensitive = true
  44. @stripPrelude = false
  45. @entry_format = []
  46. @entry_format_default = []
  47. @input_files = []
  48. @output_file = nil
  49. @query_rx = {}
  50. @ignore_fields = []
  51. @sort = '_id'
  52. @filter_rx = nil
  53. @list_format_string = '#{_id}'
  54. @strings_expansion = false
  55. @keys_order = [
  56. 'author',
  57. 'title',
  58. 'editor',
  59. 'booktitle',
  60. 'journal',
  61. 'publisher',
  62. 'institution',
  63. 'address',
  64. 'howpublished',
  65. 'organization',
  66. 'school',
  67. 'series',
  68. 'type',
  69. 'year',
  70. 'month',
  71. 'edition',
  72. 'chapter',
  73. 'doi',
  74. 'volume',
  75. 'number',
  76. 'pages',
  77. 'url',
  78. 'eprint',
  79. 'file',
  80. 'crossref',
  81. 'key',
  82. 'keywords',
  83. 'note',
  84. 'annote',
  85. 'abstract',
  86. ]
  87. fs = [File.join(Config::CONFIG['sysconfdir'], 'tbibtools.rb')]
  88. fs << File.join(ENV['USERPROFILE'], 'tbibtools.rb') if ENV['USERPROFILE']
  89. fs << File.join(ENV['HOME'], '.tbibtools') if ENV['HOME']
  90. fs.each {|f| config f}
  91. end
  92. # Attribute reader
  93. def entry_format
  94. (@entry_format.empty? ? @entry_format_default : @entry_format).uniq
  95. end
  96. # Usage in configuration file:
  97. # config 'file.rb'
  98. def config(value)
  99. # require value if File.exists?(value)
  100. if File.exists?(value)
  101. fc = File.read(value)
  102. self.instance_eval(fc)
  103. end
  104. end
  105. # Usage in configuration file:
  106. # sort_case true
  107. # sort_case false
  108. def sort_case(value)
  109. @case_sensitive = value
  110. end
  111. # Usage in configuration file:
  112. # sort_key '_id'
  113. def sort_key(value)
  114. @sort = value
  115. end
  116. # Usage in configuration file:
  117. # input 'file1.bib', 'file2.bib'
  118. def input(*value)
  119. @input_files = @input_files | value
  120. end
  121. # Usage in configuration file:
  122. # output 'file.bib'
  123. def output(value)
  124. @output_file = value
  125. end
  126. # Usage in configuration file:
  127. # strip 'mynotes'
  128. def strip(*value)
  129. @ignore_fields += value
  130. end
  131. # Usage in configuration file:
  132. # filter /Humpty Dumpty/
  133. def filter(value)
  134. @filter_rx = value
  135. end
  136. # Usage in configuration file:
  137. # strip_prelude
  138. # strip_prelude false
  139. def strip_prelude(value=true)
  140. @stripPrelude = value
  141. end
  142. # Usage in configuration file:
  143. # format 'stripRedundantTitle', 'stripEmpty'
  144. def format(*value)
  145. set_format(@entry_format, *value)
  146. end
  147. # Usage in configuration file:
  148. # default_format 'tml'
  149. def default_format(*value)
  150. set_format(@entry_format_default, *value)
  151. end
  152. # Usage in configuration file:
  153. # list_format '#{_lineno}: #{author|editor|institution}: #{title|booktitle}'
  154. def list_format(value)
  155. @list_format_string = value
  156. end
  157. # Usage in configuration file:
  158. # expand_strings false
  159. # expand_strings true
  160. def expand_strings(value=false)
  161. @strings_expansion = value
  162. end
  163. # Usage in configuration file:
  164. # order 'author', 'title', 'editor', 'booktitle'
  165. def order(*value)
  166. @keys_order = value
  167. end
  168. # Usage in configuration file:
  169. # query FIELD1 => REGEXP1, FIELD2 => REGEXP2, ...
  170. def query(value)
  171. value.each do |field, rx|
  172. @query_rx[field] = rx
  173. end
  174. end
  175. def shortcut_tml(acc=nil)
  176. sort_case false
  177. f = ['nnIsYear', 'sortCrossref', 'downcaseType', 'downcaseKey', \
  178. 'canonicPages', 'canonicAuthors', 'canonicKeywords', 'canonicQuotes', \
  179. 'stripRedundantTitle', 'stripEmpty', 'bracket', 'align', \
  180. 'unwrap', 'indent']
  181. set_format acc, *f
  182. f
  183. end
  184. def shortcut_ls(acc=nil)
  185. f = ['list', 'stripPrelude']
  186. set_format acc, *f
  187. f
  188. end
  189. # Usage in configuration file:
  190. # shortcut "NAME" => ["FORMAT1", "FORMAT2" ...]
  191. def shortcut(hash)
  192. for name, list in hash
  193. # {list.map{|a| a.inspect}.join(', ')}
  194. eval <<-EOR
  195. def shortcut_#{name}(acc=nil)
  196. f = #{list.inspect}
  197. set_format acc, *f
  198. f
  199. end
  200. EOR
  201. end
  202. end
  203. # Usage in configuration file:
  204. # def_preprocess("NAME") {|entry| BODY}
  205. # => entry
  206. def def_preprocess(name, &block)
  207. if block.arity != 1
  208. raise ArgumentError, "Wrong number of arguments for preprocess definition: #{name}"
  209. end
  210. self.class.send(:define_method, "preprocess_#{name}", &block)
  211. end
  212. # Usage in configuration file:
  213. # def_head("NAME") {|entry, type| BODY}
  214. def def_head(name, &block)
  215. if block.arity != 2
  216. raise ArgumentError, "Wrong number of arguments for body definition: #{name}"
  217. end
  218. self.class.send(:define_method, "head_#{name}", &block)
  219. end
  220. # Usage in configuration file:
  221. # def_body("NAME") {|entry, key, value| BODY}
  222. def def_body(name, &block)
  223. if block.arity != 3
  224. raise ArgumentError, "Wrong number of arguments for body definition: #{name}"
  225. end
  226. self.class.send(:define_method, "body_#{name}", &block)
  227. end
  228. # Usage in configuration file:
  229. # def_tail("NAME") {|entry| BODY}
  230. def def_tail(name, &block)
  231. if block.arity != 1
  232. raise ArgumentError, "Wrong number of arguments for tail definition: #{name}"
  233. end
  234. self.class.send(:define_method, "tail_#{name}", &block)
  235. end
  236. # Usage in configuration file:
  237. # def_format("NAME") {|args, entry, key, value| BODY}
  238. def def_format(name, &block)
  239. if block.arity != 4
  240. raise ArgumentError, "Wrong number of arguments for format definition: #{name}"
  241. end
  242. self.class.send(:define_method, "format_#{name}", &block)
  243. end
  244. # Usage in configuration file:
  245. # duplicate_field("NAME") {|oldval, val| BODY}
  246. # => newval
  247. def duplicate_field(name, &block)
  248. if block.arity != 2
  249. raise ArgumentError, "Wrong number of arguments for duplicate_field definition: #{name}"
  250. end
  251. self.class.send(:define_method, "duplicate_field_#{name}", &block)
  252. end
  253. def is_crossreferenced(e)
  254. id = e['_id']
  255. @tbibtools.crossreferenced.include?(id)
  256. end
  257. def preprocess_selectCrossref(e)
  258. is_crossreferenced(e) ? e : nil
  259. end
  260. def preprocess_unselectCrossref(e)
  261. is_crossreferenced(e) ? nil : e
  262. end
  263. def head_list(e, type)
  264. @list_format_string.gsub(/#(#|((-?\d+)?)\{(.+?)\})/) do |s|
  265. if s == '##'
  266. '#'
  267. else
  268. width = $2
  269. field = $4.split(/\|/).find {|f| e[f]}
  270. e, k, v = format_unwrap('', e, field, e[field])
  271. if !width.empty?
  272. "%#{$2}s" % v
  273. else
  274. v
  275. end
  276. end
  277. end
  278. end
  279. def head_downcaseType(e, type)
  280. type = type.downcase
  281. head_default(e, type)
  282. end
  283. def head_upcaseType(e, type)
  284. type = type.upcase
  285. head_default(e, type)
  286. end
  287. def head_default(e, type)
  288. "@#{type}{#{e['_id']},\n"
  289. end
  290. def body_default(e, k, v)
  291. "#{k} = #{v},\n"
  292. end
  293. def tail_default(e)
  294. "}\n\n"
  295. end
  296. def tail_list(e)
  297. "\n"
  298. end
  299. def format_list(args, e, k, v)
  300. return []
  301. end
  302. def format_nil(args, e, k, v)
  303. return [e, nil, v]
  304. end
  305. def format_check(args, e, k, v)
  306. # if v =~ /(^["#$%&~_^{}]|[^\\]["#$%&~_^{}])/
  307. # if v =~ /(^[#$&~_{}^%]|[^\\][#$&~_{}^%])/
  308. if v =~ /(^[#&%]|[^\\][#&%])/
  309. puts "Problematic entry #{e["_id"]}: #{k}=#{v}"
  310. end
  311. return [e, k, v]
  312. end
  313. def format_bracket(args, e, k, v)
  314. if v.empty? or v =~ /[^0-9]/
  315. # v = v.gsub(/([{}])/, '\\\\\\1')
  316. v = "{#{v}}"
  317. end
  318. return [e, k, v]
  319. end
  320. def format_quote(args, e, k, v)
  321. if v.empty? or v =~ /[^0-9]/
  322. # v = v.gsub(/([{}])/, '\\\\\\1')
  323. v = %{"#{v}"}
  324. end
  325. return [e, k, v]
  326. end
  327. def format_downcaseKey(args, e, k, v)
  328. return [e, k.downcase, v]
  329. end
  330. def format_upcaseKey(args, e, k, v)
  331. return [e, k.upcase, v]
  332. end
  333. def format_indent(args, e, k, v)
  334. if args.empty?
  335. i = ' '
  336. else
  337. i = ' ' * args.to_i
  338. end
  339. # v = " #{v.gsub(/(\n)/, '\\1' + i)}"
  340. v = v.gsub(/(\n)/, '\\1' + i)
  341. k = "#{i}#{k}"
  342. return [e, k, v]
  343. end
  344. def format_stripEmpty(args, e, k, v)
  345. if v.empty?
  346. return [e]
  347. end
  348. return [e, k, v]
  349. end
  350. def format_stripRedundantTitel(args, e, k, v)
  351. if k == 'title' && e['booktitle'] == v
  352. return [e]
  353. else
  354. return [e, k, v]
  355. end
  356. end
  357. def format_gsub(args, e, k, v)
  358. for rx, text in args.scan(/([^:]+):([^:]+)/)
  359. # v = v.gsub(Regexp.new(Regexp.escape(rx)), text.gsub(/[\\]/, '\\\\ \\\\\\0'))
  360. v = v.gsub(Regexp.new(Regexp.escape(rx)), text.gsub(/[\\]/, '\\\\\\0'))
  361. end
  362. return [e, k, v]
  363. end
  364. def format_align(args, e, k, v)
  365. k = ['%-', e['_keysmlen'], 's'].join % k
  366. return [e, k, v]
  367. end
  368. def format_canonicAuthors(args, e, k, v)
  369. if k == 'author' || k == 'editor'
  370. v = v.split(/\s+and\s+/)
  371. v.map! do |au|
  372. if au =~ /^(\S+?),\s*(.+)$/
  373. [$2, $1].join(' ')
  374. else
  375. au
  376. end
  377. end
  378. v = v.join(' and ')
  379. end
  380. return [e, k, v]
  381. end
  382. def format_canonicPages(args, e, k, v)
  383. if k == 'pages'
  384. v.gsub!(/\s*[-&#x2013;]+\s*/, '-')
  385. end
  386. return [e, k, v]
  387. end
  388. def format_canonicQuotes(args, e, k, v)
  389. if v =~ /"/
  390. v.gsub!(/(^|[^\\])("|')/) do |t|
  391. pre = $1
  392. case $2
  393. when '"'
  394. qu = $1 =~ /[[:cntrl:][:punct:][:space:]]/ ? '``' : %{''}
  395. else
  396. qu = $1 =~ /[[:cntrl:][:punct:][:space:]]/ ? '`' : %{'}
  397. end
  398. [pre, qu].join
  399. end
  400. end
  401. return [e, k, v]
  402. end
  403. def format_canonicKeywords(args, e, k, v)
  404. if k =~ /^keyword/ and v !~ /;/ and v =~ /,/
  405. v.gsub!(/,\s+/, '; ')
  406. end
  407. return [e, k, v]
  408. end
  409. def format_wrap(args, e, k, v)
  410. v = v.gsub(/(.{20,78}\s)/, "\\1\n ")
  411. return [e, k, v]
  412. end
  413. def format_unwrap(args, e, k, v)
  414. v = v.gsub(/\s*\n\s*/, ' ') if v.is_a?(String)
  415. return [e, k, v]
  416. end
  417. # def format_<+TBD+>(args, e, k, v)
  418. # <+TBD+>
  419. # return [e, k, v]
  420. # end
  421. def duplicate_field_author(oldval, val)
  422. [oldval, val].join(' and ')
  423. end
  424. def duplicate_field_abstract(oldval, val)
  425. [oldval, val].join("\n")
  426. end
  427. def duplicate_field_url(oldval, val)
  428. [oldval, val].join(' ')
  429. end
  430. def duplicate_field_keywords(oldval, val)
  431. (oldval.split(/[;,]\s*/) | val.split(/[;,]\s*/)).join(', ')
  432. end
  433. private
  434. def set_format(acc, *value)
  435. acc ||= @entry_format
  436. value.map {|f| f.to_s}.each do |fmt|
  437. fmeth = %{shortcut_#{fmt}}
  438. if self.respond_to?(fmeth)
  439. send(fmeth, acc)
  440. else
  441. acc << fmt
  442. end
  443. end
  444. end
  445. end
  446. attr_accessor :configuration
  447. attr_accessor :crossreferenced
  448. def initialize
  449. @configuration = TBibTools::Configuration.new(self)
  450. @crossreferenced = []
  451. end
  452. def process(args)
  453. parse_command_line_args(args)
  454. if !@configuration.input_files.empty?
  455. bib = @configuration.input_files.map {|f| File.read(f)}.join("\n")
  456. else
  457. bib = readlines.join
  458. end
  459. out = bibtex_sort_by(nil, bib)
  460. if @configuration.output_file
  461. File.open(@configuration.output_file, 'w') {|io| io.puts out}
  462. else
  463. puts out
  464. end
  465. end
  466. # Parse the command line args (provided as array), print a help
  467. # message on -h, --help, or -?.
  468. def parse_command_line_args(args)
  469. opts = OptionParser.new do |opts|
  470. opts.banner = 'Usage: tbibtools [OPTIONS] [FILES] < IN > OUT'
  471. opts.separator ''
  472. opts.separator 'tbibtools is a free software with ABSOLUTELY NO WARRANTY under'
  473. opts.separator 'the terms of the GNU General Public License version 2 or newer.'
  474. opts.separator ''
  475. opts.on('-c', '--config=FILE', String, 'Configuration file') do |value|
  476. @configuration.config value
  477. end
  478. opts.on('-e', '--regexp=REGEXP', String, 'Display entries matching the regexp') do |value|
  479. @configuration.filter Regexp.new(value)
  480. end
  481. opts.on('-f', '--format=STRING', String, 'Re-format entries (order matters)') do |value|
  482. @configuration.format *value.split(/,/)
  483. end
  484. opts.on('--[no-]formatted', 'Unformatted output') do |bool|
  485. unless bool
  486. @configuration.entry_format = []
  487. @configuration.entry_format_default = []
  488. end
  489. end
  490. opts.on('-i', '--[no-]case-sensitive', 'Case insensitive') do |bool|
  491. @configuration.sort_case bool
  492. end
  493. opts.on('-l', '--format-list=[STRING]', String, 'Format string for list (implies --ls)') do |value|
  494. @configuration.shortcut_ls
  495. @configuration.list_format value if value
  496. end
  497. opts.on('--ls', 'Synonym for: -f list,stripPrelude ("list" implies "unwrap")') do |bool|
  498. @configuration.shortcut_ls if bool
  499. end
  500. opts.on('-o', '--output=FILE', String, 'Output file') do |value|
  501. @configuration.output value
  502. end
  503. opts.on('-P', '--strip-prelude', 'Strip the prelude: same as -f stripPrelude but helps to maintain the original formatting') do |bool|
  504. @configuration.strip_prelude
  505. end
  506. opts.on('-q', '--query=FIELD=REGEXP', String, 'Show entries for which field matches the regexp') do |value|
  507. field, rx = value.split(/=/, 2)
  508. @configuration.query field => Regexp.new(rx, Regexp::IGNORECASE)
  509. end
  510. opts.on('-s', '--sort=STRING', String, 'Sort (default: sort by key; key = _id, type = _type)') do |value|
  511. @configuration.sort_key value
  512. end
  513. opts.on('-S', '--[no-]expand-strings', 'Replace/expand strings') do |bool|
  514. @configuration.expand_strings bool
  515. end
  516. opts.on('--strip=FIELDS', String, 'Ignore/strip fields') do |value|
  517. @configuration.strip value.split(/,/)
  518. end
  519. opts.on('-u', '--unsorted', 'Unsorted output') do |bool|
  520. @configuration.sort_key nil
  521. end
  522. opts.separator ''
  523. opts.separator 'Other Options:'
  524. opts.on('--debug', Integer, 'Show debug messages') do |v|
  525. $DEBUG = true
  526. $VERBOSE = true
  527. end
  528. opts.on('-v', '--verbose', 'Run verbosely') do |v|
  529. $VERBOSE = true
  530. end
  531. opts.on('-h', '--help', 'Show this message') do
  532. puts opts
  533. exit 1
  534. end
  535. opts.separator ''
  536. opts.separator 'Available formats:'
  537. format_rx = /^(format|preprocess|head|body|tail)_/
  538. format_names = (['nnIsYear', 'sortCrossref', 'downcaseType', 'upcaseType'] +
  539. @configuration.methods.find_all{|m| m =~ format_rx}.collect{|m| m.sub(format_rx, '')}).uniq.sort.join(', ')
  540. opts.separator format_names
  541. opts.separator ''
  542. opts.separator 'Known format shortcuts:'
  543. acc = []
  544. @configuration.methods.find_all{|m| m =~ /^shortcut_/}.sort.each do |meth|
  545. fn = meth.sub(/^shortcut_/, '')
  546. fs = @configuration.send(meth, acc)
  547. opts.separator "#{fn}: #{fs.join(',')}"
  548. end
  549. end
  550. @configuration.input *opts.parse!(args)
  551. self
  552. end
  553. # Parse text and sort by field. If field is nil, use @sort.
  554. # Return the result as string.
  555. def bibtex_sort_by(field, text)
  556. field ||= @configuration.sort
  557. entries, prelude = simple_bibtex_parser(text, @configuration.strings_expansion)
  558. if @configuration.filter_rx
  559. entries.delete_if do |key, value|
  560. value['_entry'] !~ @configuration.filter_rx
  561. end
  562. end
  563. unless @configuration.query_rx.empty?
  564. entries.delete_if do |key, value|
  565. @configuration.query_rx.all? do |field, rx|
  566. value[field] !~ rx
  567. end
  568. end
  569. end
  570. unless @configuration.ignore_fields.empty?
  571. entries.each do |key, value|
  572. ignore_fields.each do |field|
  573. value.delete(field)
  574. end
  575. end
  576. end
  577. acc = []
  578. unless @configuration.stripPrelude or @configuration.entry_format.include?('stripPrelude')
  579. acc << prelude
  580. end
  581. keys = entries.keys
  582. # if @configuration.entry_format.include?('sortCrossref')
  583. # @crossreferenced = keys.map {|k| entries[k]['crossref']}.compact
  584. # end
  585. if field
  586. keys.sort! do |a,b|
  587. aa = entries[a][field] || ''
  588. bb = entries[b][field] || ''
  589. unless @configuration.case_sensitive
  590. aa = aa.downcase
  591. bb = bb.downcase
  592. end
  593. if @configuration.entry_format.include?('nnIsYear')
  594. aa = replace_yy(aa)
  595. bb = replace_yy(bb)
  596. end
  597. if @configuration.entry_format.include?('sortCrossref') and
  598. ((ac = @crossreferenced.include?(a)) or (bc = @crossreferenced.include?(b)))
  599. if ac and bc
  600. elsif ac
  601. aa = 1
  602. bb = 0
  603. elsif bc
  604. aa = 0
  605. bb = 1
  606. end
  607. end
  608. aa <=> bb
  609. end
  610. end
  611. for i in keys
  612. e = entries[i]
  613. if @configuration.entry_format.empty?
  614. ee = e['_entry']
  615. else
  616. ee = format(e)
  617. end
  618. acc << ee if ee
  619. end
  620. if @configuration.entry_format.include?('nil')
  621. ''
  622. else
  623. acc.join
  624. end
  625. end
  626. # Format the entry on the basis of @configuration.entry_format.
  627. #
  628. # The output is constructed from
  629. #
  630. # [
  631. # head_FORMAT(entry, type)
  632. # format_FORMAT(args, entry, key, val) -> body_FORMAT(entry, key, val)
  633. # tail_FORMAT(entry)
  634. # ].join("\n")
  635. #
  636. # In order to define your own formats, please see
  637. # TBibTools::Configuration.
  638. def format(e)
  639. keys = e.keys.find_all {|k| k[0..0] != '_'}
  640. keys.sort! do |a,b|
  641. (@configuration.keys_order.index(a.downcase) || 99999) <=> (@configuration.keys_order.index(b.downcase) || 99999)
  642. end
  643. acc = []
  644. # e['_keysmlen'] = keys.inject(0) {|m, k| [m, k.size].max} + 1
  645. e['_keysmlen'] = keys.inject(0) {|m, k| [m, k.size].max}
  646. for_methods('preprocess') do |meth|
  647. e = @configuration.send(meth, e)
  648. end
  649. return e unless e
  650. unless @configuration.entry_format.include?('nil')
  651. type = e['_type']
  652. for_methods('head') do |meth|
  653. v = @configuration.send(meth, e, type)
  654. acc << v if v
  655. end
  656. end
  657. catch(:next_entry) do
  658. for k in keys
  659. v = e[k]
  660. catch(:next_key) do
  661. for f in @configuration.entry_format
  662. if f =~ /^(\w+)=(.*)$/
  663. f = $1
  664. a = $2
  665. else
  666. a = ''
  667. end
  668. m = "format_#{f}"
  669. if @configuration.respond_to?(m)
  670. e, k, v = @configuration.send(m, a, e, k, v)
  671. if e.nil?
  672. # throw :next_entry
  673. throw :next_entry
  674. elsif k.nil?
  675. throw :next_key
  676. end
  677. end
  678. end
  679. for_methods('body') do |meth|
  680. v = @configuration.send(meth, e, k, v)
  681. acc << v if v
  682. end
  683. end
  684. end
  685. end
  686. unless @configuration.entry_format.include?('nil')
  687. for_methods('tail') do |meth|
  688. v = @configuration.send(meth, e)
  689. acc << v if v
  690. end
  691. end
  692. # return acc.join("\n")
  693. return acc
  694. end
  695. def for_methods(prefix, &block)
  696. rv = @configuration.entry_format.map {|m| [prefix, m].join('_')}
  697. rv = rv.find_all {|m| @configuration.respond_to?(m)}
  698. if rv.empty?
  699. md = [prefix, 'default'].join('_')
  700. if @configuration.respond_to?(md)
  701. rv << md
  702. end
  703. end
  704. rv.each {|meth| block.call(meth)}
  705. end
  706. # Taken from deplate (http://deplate.sf.net).
  707. # Return a hash (key=filename) of parsed bibtex files (as hashes).
  708. def simple_bibtex_reader(bibfiles)
  709. acc = {}
  710. for b in bibfiles
  711. b = File.expand_path(b)
  712. unless File.exist?(b)
  713. b = Deplate::External.kpsewhich(self, b)
  714. if b.empty?
  715. next
  716. end
  717. end
  718. File.open(b) {|io| acc[b] = simple_bibtex_parser(io.readlines, @configuration.strings_expansion)}
  719. end
  720. acc
  721. end
  722. # Taken from deplate (http://deplate.sf.net). Parse text and
  723. # return a hash of hashes. Create the pseudo-keys _type, _id,
  724. # and _entry.
  725. #
  726. # This method works with a few simple regexps and makes a few
  727. # assumptions about your bib file:
  728. #
  729. # * @string definitions should be collected in the prelude, i.e.
  730. # before any bib entry.
  731. # * @string definitions must be oneliners.
  732. # * The bib entries must be more or less valid.
  733. # * Entries with curly braces may confuse the "parser".
  734. #
  735. # Return an array: [entries as hash, prelude as string]
  736. def simple_bibtex_parser(text, strings_expansion=true)
  737. prelude = []
  738. strings = {}
  739. entries = {}
  740. lineno = 1
  741. # m = /^\s*(@(\w+)\{(.*?)\})\s*(?=(^@|\z))/m.match(text)
  742. while (m = /^\s*(@(\w+)\{(.*?))\s*(?=(^@|\z))/m.match(text))
  743. text = m.post_match
  744. body = m[0]
  745. type = m[2]
  746. inner = m[3]
  747. case type.downcase
  748. when 'string'
  749. prelude << body
  750. mi = /^\s*(\S+?)\s*=\s*(.+?)\s*\}?\s*$/m.match(inner)
  751. r = mi[2]
  752. if r =~ /^(".*?"|'.*?'|\{.*?\})$/
  753. r = r[1..-2]
  754. end
  755. strings[mi[1]] = r
  756. else
  757. mi = /^\s*(\S+?)\s*,(.*)$/m.match(inner)
  758. id = mi[1]
  759. e = mi[2]
  760. # arr = e.scan(/^\s*(\w+)\s*=\s*(\{.*?\}|\d+)\s*[,}]\s*$/m)
  761. arr = e.scan(/^\s*(\w+)\s*=\s*(\{.*?\}|".*?"|\d+)\s*[,}]\s*$/m)
  762. entry = {}
  763. arr.each do |var, val, rest|
  764. # EXPERIMENTAL: something like author={{Top Institute}} didn't work. I'm not sure though if this is able to deal with the last field in a bibtex entry correctly
  765. # n = /^\s*\{(.*?)\}\s*($|\}\s*\z)/m.match(val)
  766. if (n = /^\s*\{(.*?)\}\s*$/m.match(val))
  767. val = n[1]
  768. elsif (n = /^\s*"(.*?)"\s*$/m.match(val))
  769. val = n[1]
  770. end
  771. if strings_expansion and strings[val]
  772. val = strings[val]
  773. end
  774. if (oldval = entry[var])
  775. if oldval != val
  776. meth = "duplicate_field_#{var}"
  777. if @configuration.respond_to?(meth)
  778. val = @configuration.send(meth, oldval, val)
  779. $stderr.puts "Resolve duplicate fields with mismatching values: #{id}.#{var}" if $VERBOSE
  780. $stderr.puts "=> #{val.inspect}" if $DEBUG
  781. else
  782. $stderr.puts "Can't resolve duplicate fields with mismatching values: #{id}.#{var}"
  783. $stderr.puts "#{oldval.inspect} != #{val.inspect}" if $DEBUG
  784. end
  785. end
  786. end
  787. entry[var] = val
  788. case var
  789. when 'crossref'
  790. @crossreferenced << val
  791. end
  792. end
  793. entry['_lineno'] = lineno.to_s
  794. entry['_type'] = type
  795. entry['_id'] = id
  796. entry['_entry'] = body
  797. if entries[id]
  798. if entries[id] != entry
  799. $stderr.puts "Duplicate key, mismatching entries: #{id}"
  800. if $DEBUG
  801. $stderr.puts entries[id]['_entry'].chomp
  802. $stderr.puts '<=>'
  803. $stderr.puts entry['_entry'].chomp
  804. $stderr.puts
  805. end
  806. end
  807. entries[id].update(entry)
  808. else
  809. entries[id] = entry
  810. end
  811. end
  812. lineno += (m.pre_match.scan(/\n/).size + body.scan(/\n/).size)
  813. end
  814. if text =~ /\S/
  815. $stderr.puts "Trash in bibtex input: #{text}" if $VERBOSE
  816. end
  817. return entries, prelude.join
  818. end
  819. private
  820. def replace_yy(text)
  821. text.gsub(/(^|\D)(\d)(\d)(\D|$)/) do |r|
  822. [
  823. $1,
  824. $2.to_i > Time.now.strftime('%y')[0..0].to_i ? '19' : '20',
  825. $2, $3, $4
  826. ].join
  827. end
  828. end
  829. end
  830. if __FILE__ == $0
  831. TBibTools.new.process(ARGV)
  832. end