/elis-log-parsing-ideas.rkt

http://github.com/elibarzilay/rudybot · Racket · 99 lines · 72 code · 11 blank · 16 comment · 8 complexity · 8792f26ffcde85d8c23bff2dc1b6eb48 MD5 · raw file

  1. #lang racket/base
  2. (require racket/match racket/pretty (for-syntax racket/base))
  3. (struct utterance (timestamp speaker target text) #:prefab)
  4. ;; All timings are done by running 5 times, dropping highest and lowest,
  5. ;; averaging the rest, and rounding to nearest ms.
  6. ;; Original version
  7. ;; cpu time: 12622 real time: 12618 gc time: 134
  8. (define (string->utterance0 s)
  9. (match s
  10. [(regexp #px"^ *([[:print:]]*?) <= +(\".*\")" (list _ timestamp raw-string))
  11. (let ([parsed-string (read (open-input-string raw-string))])
  12. (match parsed-string
  13. [(regexp #px"^:(.*?)!(.*?)@(.*?) PRIVMSG ([[:print:]]+?) :(.*)"
  14. (list _ nick id host target text))
  15. (utterance timestamp nick target text)]
  16. [_ #f]))]
  17. [_ #f]))
  18. (define (parse-file0 input-file output-file)
  19. (call-with-input-file input-file
  20. (lambda (inp)
  21. (call-with-output-file output-file #:exists 'truncate
  22. (lambda (outp)
  23. (for ([line (in-lines inp)])
  24. (let ([utz (string->utterance line)])
  25. (when utz (pretty-print utz outp)))))))))
  26. ;; Simple printout
  27. ;; cpu time: 4295 real time: 4295 gc time: 34
  28. (define (parse-file1 input-file output-file)
  29. (call-with-input-file input-file
  30. (lambda (inp)
  31. (call-with-output-file output-file #:exists 'truncate
  32. (lambda (outp)
  33. (for ([line (in-lines inp)])
  34. (let ([utz (string->utterance line)])
  35. (when utz (fprintf outp "~s\n" utz)))))))))
  36. ;; Avoid non-greedy regexps
  37. ;; cpu time: 3052 real time: 3051 gc time: 36
  38. (define (string->utterance1 s)
  39. (match s
  40. [(regexp #px"^ *([^ ]*) <= +(\".*\")" (list _ timestamp raw-string))
  41. (let ([parsed-string (read (open-input-string raw-string))])
  42. (match parsed-string
  43. [(regexp #px"^:([^!]*)!([^@]*)@([^ ]*) PRIVMSG ([^:]+) :(.*)"
  44. (list _ nick id host target text))
  45. (utterance timestamp nick target text)]
  46. [_ #f]))]
  47. [_ #f]))
  48. ;; Use this to convert the log file
  49. (define (convert-log input-log output-log)
  50. (call-with-input-file input-log
  51. (lambda (inp)
  52. (call-with-output-file output-log #:exists 'truncate
  53. (lambda (outp)
  54. (for ([line (in-lines inp)])
  55. (define (assert c)
  56. (unless c (error 'convert-log "bad log line: ~a" line)))
  57. (assert (not (regexp-match? #rx"^ " line)))
  58. (define m (regexp-match #rx"^([^ ]*) (<=|=>) (.*)$" line))
  59. (if (not m)
  60. (displayln line outp)
  61. (let ([s (read (open-input-string (cadddr m)))])
  62. (assert (string? s))
  63. (fprintf outp "~a ~a ~a\n" (cadr m) (caddr m) s)))))))))
  64. ;; (convert-log "big-log" "new-big-log")
  65. ;; (exit)
  66. ;; Using new format, no need for reading from the string
  67. ;; cpu time: 2383 real time: 2382 gc time: 29
  68. (define (string->utterance2 s)
  69. (match s
  70. [(regexp #px"^([^ ]*) <= (.*)$" (list _ timestamp string))
  71. (match string
  72. [(regexp #px"^:([^!]*)!([^@]*)@([^ ]*) PRIVMSG ([^:]+) :(.*)"
  73. (list _ nick id host target text))
  74. (utterance timestamp nick target text)]
  75. [_ #f])]
  76. [_ #f]))
  77. ;; Combine the two regexps
  78. ;; cpu time: 1937 real time: 1936 gc time: 25
  79. (define (string->utterance3 s)
  80. (match s
  81. [(regexp #px"^([^ ]*) <= :([^!]*)!([^@]*)@([^ ]*) PRIVMSG ([^:]+) :(.*)$"
  82. (list _ timestamp nick id host target text))
  83. (utterance timestamp nick target text)]
  84. [_ #f]))
  85. ;; selectors for the version to use
  86. (define-syntax string->utterance (make-rename-transformer #'string->utterance3))
  87. (define-syntax parse-file (make-rename-transformer #'parse-file1))
  88. (time (parse-file "new-big-log" "parsed"))