PageRenderTime 55ms CodeModel.GetById 11ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 0ms

/elis-log-parsing-ideas.rkt

http://github.com/elibarzilay/rudybot
Unknown | 99 lines | 88 code | 11 blank | 0 comment | 0 complexity | 8792f26ffcde85d8c23bff2dc1b6eb48 MD5 | raw file
 1#lang racket/base
 2
 3(require racket/match racket/pretty (for-syntax racket/base))
 4
 5(struct utterance (timestamp speaker target text) #:prefab)
 6
 7;; All timings are done by running 5 times, dropping highest and lowest,
 8;; averaging the rest, and rounding to nearest ms.
 9
10;; Original version
11;; cpu time: 12622 real time: 12618 gc time: 134
12(define (string->utterance0 s)
13  (match s
14    [(regexp #px"^ *([[:print:]]*?) <= +(\".*\")" (list _ timestamp raw-string))
15     (let ([parsed-string (read (open-input-string raw-string))])
16       (match parsed-string
17         [(regexp #px"^:(.*?)!(.*?)@(.*?) PRIVMSG ([[:print:]]+?) :(.*)"
18                  (list _ nick id host target text))
19          (utterance timestamp nick target text)]
20         [_ #f]))]
21    [_ #f]))
22(define (parse-file0 input-file output-file)
23  (call-with-input-file input-file
24    (lambda (inp)
25      (call-with-output-file output-file #:exists 'truncate
26        (lambda (outp)
27          (for ([line (in-lines inp)])
28            (let ([utz (string->utterance line)])
29              (when utz (pretty-print utz outp)))))))))
30
31;; Simple printout
32;; cpu time: 4295 real time: 4295 gc time: 34
33(define (parse-file1 input-file output-file)
34  (call-with-input-file input-file
35    (lambda (inp)
36      (call-with-output-file output-file #:exists 'truncate
37        (lambda (outp)
38          (for ([line (in-lines inp)])
39            (let ([utz (string->utterance line)])
40              (when utz (fprintf outp "~s\n" utz)))))))))
41
42;; Avoid non-greedy regexps
43;; cpu time: 3052 real time: 3051 gc time: 36
44(define (string->utterance1 s)
45  (match s
46    [(regexp #px"^ *([^ ]*) <= +(\".*\")" (list _ timestamp raw-string))
47     (let ([parsed-string (read (open-input-string raw-string))])
48       (match parsed-string
49         [(regexp #px"^:([^!]*)!([^@]*)@([^ ]*) PRIVMSG ([^:]+) :(.*)"
50                  (list _ nick id host target text))
51          (utterance timestamp nick target text)]
52         [_ #f]))]
53    [_ #f]))
54
55;; Use this to convert the log file
56(define (convert-log input-log output-log)
57  (call-with-input-file input-log
58    (lambda (inp)
59      (call-with-output-file output-log #:exists 'truncate
60        (lambda (outp)
61          (for ([line (in-lines inp)])
62            (define (assert c)
63              (unless c (error 'convert-log "bad log line: ~a" line)))
64            (assert (not (regexp-match? #rx"^ " line)))
65            (define m (regexp-match #rx"^([^ ]*) (<=|=>) (.*)$" line))
66            (if (not m)
67              (displayln line outp)
68              (let ([s (read (open-input-string (cadddr m)))])
69                (assert (string? s))
70                (fprintf outp "~a ~a ~a\n" (cadr m) (caddr m) s)))))))))
71;; (convert-log "big-log" "new-big-log")
72;; (exit)
73
74;; Using new format, no need for reading from the string
75;; cpu time: 2383 real time: 2382 gc time: 29
76(define (string->utterance2 s)
77  (match s
78    [(regexp #px"^([^ ]*) <= (.*)$" (list _ timestamp string))
79     (match string
80       [(regexp #px"^:([^!]*)!([^@]*)@([^ ]*) PRIVMSG ([^:]+) :(.*)"
81                (list _ nick id host target text))
82        (utterance timestamp nick target text)]
83       [_ #f])]
84    [_ #f]))
85
86;; Combine the two regexps
87;; cpu time: 1937 real time: 1936 gc time: 25
88(define (string->utterance3 s)
89  (match s
90    [(regexp #px"^([^ ]*) <= :([^!]*)!([^@]*)@([^ ]*) PRIVMSG ([^:]+) :(.*)$"
91          (list _ timestamp   nick    id      host            target   text))
92     (utterance timestamp nick target text)]
93    [_ #f]))
94
95;; selectors for the version to use
96(define-syntax string->utterance (make-rename-transformer #'string->utterance3))
97(define-syntax parse-file        (make-rename-transformer #'parse-file1))
98
99(time (parse-file "new-big-log" "parsed"))