/collects/scriblib/bibtex.rkt

https://bitbucket.org/agocke/racket · Racket · 282 lines · 261 code · 20 blank · 1 comment · 13 complexity · 5bb08afca2e3c3db22aae83a4ee0d608 MD5 · raw file

  1. #lang at-exp racket/base
  2. (require racket/function
  3. racket/match
  4. racket/list)
  5. (struct bibdb (raw bibs))
  6. (define (bibtex-parse ip)
  7. (define STRING-DB (make-hash))
  8. (define ENTRY-DB (make-hash))
  9. (define (perror ip sym fmt . args)
  10. (define loc (call-with-values (λ () (port-next-location ip)) list))
  11. (apply error sym (string-append fmt " @ line ~a column ~a byte ~a") (append args loc)))
  12. (define (read-while pred ip)
  13. (list->string
  14. (let loop ()
  15. (match (peek-char ip)
  16. [(and (? char?) (? pred))
  17. (cons (read-char ip)
  18. (loop))]
  19. [_
  20. empty]))))
  21. (define (read-until pred ip)
  22. (read-while (negate pred) ip))
  23. (define (slurp-whitespace ip)
  24. (read-while (λ (c) (and (char? c) (char-whitespace? c))) ip))
  25. (define (read-entries ip)
  26. (slurp-whitespace ip)
  27. (match (read-char ip)
  28. [#\%
  29. (read-line ip)
  30. (read-entries ip)]
  31. [#\@
  32. (read-entry ip)
  33. (read-entries ip)]
  34. [(? eof-object?)
  35. (void)]
  36. [c
  37. ;; All other characters are comments.
  38. (read-entries ip)]))
  39. (define (read-entry ip)
  40. (match (read-until (λ (c) (or (char=? c #\{)
  41. (char=? c #\()))
  42. ip)
  43. [(app string-downcase "string")
  44. (slurp-whitespace ip)
  45. (match (read-char ip)
  46. [#\{
  47. (void)]
  48. [c
  49. (perror ip 'read-entry "Parsing entry, expected {, got ~v" c)])
  50. (define tag (read-tag ip))
  51. (slurp-whitespace ip)
  52. (match (read-char ip)
  53. [#\=
  54. (slurp-whitespace ip)
  55. (define string (read-value ip))
  56. (slurp-whitespace ip)
  57. (match (read-char ip)
  58. [#\}
  59. (hash-set! STRING-DB tag string)]
  60. [c
  61. (perror ip 'read-entry "Parsing string, expected }, got ~v; tag is ~v; string is ~v" c tag string)])]
  62. [c
  63. (perror ip 'read-entry "Parsing string, expected =, got ~v; tag is ~v" c tag)])]
  64. [(app string-downcase "comment")
  65. (read-char ip)
  66. (let loop ()
  67. (read-until (λ (c) (or (char=? c #\{) (char=? c #\}))) ip)
  68. (match (read-char ip)
  69. [#\{
  70. (loop) (loop)]
  71. [#\}
  72. (void)]))]
  73. [typ
  74. (read-char ip)
  75. (slurp-whitespace ip)
  76. (define label (read-until (λ (c) (char=? c #\,)) ip))
  77. (read-char ip)
  78. (define alist
  79. (let loop ()
  80. (slurp-whitespace ip)
  81. (define atag (read-tag ip))
  82. (cond
  83. [(string=? "" atag)
  84. (read-char ip)
  85. (hash)]
  86. [else
  87. (slurp-whitespace ip)
  88. (match (read-char ip)
  89. [#\=
  90. (slurp-whitespace ip)
  91. (define aval (read-value ip))
  92. (slurp-whitespace ip)
  93. (match (read-char ip)
  94. [#\,
  95. (hash-set (loop) atag aval)]
  96. [#\}
  97. (hash atag aval)]
  98. [c
  99. (perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])]
  100. [c
  101. (perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)])])))
  102. (hash-set! ENTRY-DB label
  103. (hash-set alist 'type typ))]))
  104. (define (read-tag ip)
  105. (slurp-whitespace ip)
  106. (string-downcase
  107. (read-until
  108. (λ (c) (or (char-whitespace? c)
  109. (char=? c #\=)
  110. (char=? c #\{)
  111. (char=? c #\})))
  112. ip)))
  113. (define (read-braced-value ip)
  114. (read-char ip)
  115. (let loop ()
  116. (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\})))
  117. ip))
  118. (match (peek-char ip)
  119. [#\{
  120. (string-append first-part (read-value ip) (loop))]
  121. [#\}
  122. (read-char ip)
  123. first-part])))
  124. (define (read-value ip)
  125. (slurp-whitespace ip)
  126. (define first-part (read-value-single ip))
  127. (slurp-whitespace ip)
  128. (match (peek-char ip)
  129. [#\#
  130. (read-char ip)
  131. (string-append first-part (read-value ip))]
  132. [_
  133. first-part]))
  134. (define (read-value-single ip)
  135. (slurp-whitespace ip)
  136. (match (peek-char ip)
  137. [#\{
  138. (read-braced-value ip)]
  139. [#\"
  140. (read-char ip)
  141. (let loop ()
  142. (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\")))
  143. ip))
  144. (match (peek-char ip)
  145. [#\{
  146. (string-append first-part (read-braced-value ip) (loop))]
  147. [#\"
  148. (read-char ip)
  149. first-part]))]
  150. [(? char-numeric?)
  151. (read-while char-numeric? ip)]
  152. [(? char-alphabetic?)
  153. (define string-tag (read-until (λ (c) (or (char-whitespace? c)
  154. (char=? c #\,)))
  155. ip))
  156. (hash-ref STRING-DB string-tag
  157. (λ () string-tag))]
  158. [c
  159. (perror ip 'read-value "Parsing value, expected {, got ~v" c)]))
  160. (read-entries ip)
  161. (bibdb ENTRY-DB (make-hash)))
  162. (define (path->bibdb pth)
  163. (define bibdb
  164. (with-input-from-file
  165. pth
  166. (λ ()
  167. (port-count-lines! (current-input-port))
  168. (bibtex-parse (current-input-port)))))
  169. bibdb)
  170. (require scriblib/autobib
  171. scribble/manual)
  172. (define-syntax-rule
  173. (define-bibtex-cite bib-pth
  174. ~cite-id citet-id generate-bibliography-id)
  175. (begin
  176. (define bibtex-db (path->bibdb bib-pth))
  177. (define-cite autobib-cite autobib-citet generate-bibliography-id)
  178. (define ((make-citer citer) f . r)
  179. (apply citer
  180. (filter-map
  181. (λ (key)
  182. (and (not (string=? "\n" key))
  183. (generate-bib bibtex-db key)))
  184. (append-map (curry regexp-split #rx" +")
  185. (cons f r)))))
  186. (define ~cite-id (make-citer autobib-cite))
  187. (define citet-id (make-citer autobib-citet))))
  188. (define (parse-author as)
  189. (apply authors
  190. (for/list ([a (in-list (regexp-split #rx" *and *" as))])
  191. (match (regexp-split #rx" +" a)
  192. [(list one) (org-author-name one)]
  193. [(list one two) (author-name one two)]
  194. [(list-rest first rest) (author-name first (apply string-append (add-between rest " ")))]))))
  195. (define (parse-pages ps)
  196. (match ps
  197. [(regexp #rx"^([0-9]+)\\-+([0-9]+)$" (list _ f l))
  198. (list f l)]
  199. [#f
  200. #f]
  201. [_
  202. (error 'parse-pages "Invalid page format ~e" ps)]))
  203. (define (generate-bib db key)
  204. (match-define (bibdb raw bibs) db)
  205. (hash-ref! bibs key
  206. (λ ()
  207. (define the-raw (hash-ref raw key (λ () (error 'bibtex "Unknown citation ~e" key))))
  208. (define (raw-attr a [def #f])
  209. (hash-ref the-raw a def))
  210. (define (raw-attr* a)
  211. (hash-ref the-raw a
  212. (λ () (error 'bibtex "Key ~a is missing attribute ~a, has ~a"
  213. key a the-raw))))
  214. (match (raw-attr 'type)
  215. ["misc"
  216. (make-bib #:title (raw-attr "title")
  217. #:author (parse-author (raw-attr "author"))
  218. #:date (raw-attr "year")
  219. #:url (raw-attr "url"))]
  220. ["book"
  221. (make-bib #:title (raw-attr "title")
  222. #:author (parse-author (raw-attr "author"))
  223. #:date (raw-attr "year")
  224. #:is-book? #t
  225. #:url (raw-attr "url"))]
  226. ["article"
  227. (make-bib #:title (raw-attr "title")
  228. #:author (parse-author (raw-attr "author"))
  229. #:date (raw-attr "year")
  230. #:location (journal-location (raw-attr* "journal")
  231. #:pages (parse-pages (raw-attr "pages"))
  232. #:number (raw-attr "number")
  233. #:volume (raw-attr "volume"))
  234. #:url (raw-attr "url"))]
  235. ["inproceedings"
  236. (make-bib #:title (raw-attr "title")
  237. #:author (parse-author (raw-attr "author"))
  238. #:date (raw-attr "year")
  239. #:location (proceedings-location (raw-attr "booktitle"))
  240. #:url (raw-attr "url"))]
  241. ["webpage"
  242. (make-bib #:title (raw-attr "title")
  243. #:author (parse-author (raw-attr "author"))
  244. #:date (raw-attr "year")
  245. #:url (raw-attr "url"))]
  246. ["techreport"
  247. (make-bib #:title (raw-attr "title")
  248. #:author (parse-author (raw-attr "author"))
  249. #:date (raw-attr "year")
  250. #:location
  251. (match* ((raw-attr "institution") (raw-attr "number"))
  252. [(#f #f) @elem{}]
  253. [(l #f) @elem{@|l|}]
  254. [(#f n) @elem{@|n|}]
  255. [(l n) @elem{@|l|, @|n|}])
  256. #:url (raw-attr "url"))]
  257. [_
  258. (make-bib #:title (format "~v" the-raw))]))))
  259. (provide (struct-out bibdb)
  260. path->bibdb
  261. bibtex-parse
  262. define-bibtex-cite)