PageRenderTime 25ms CodeModel.GetById 6ms app.highlight 17ms RepoModel.GetById 1ms app.codeStats 0ms

/analyze-quotes.rkt

http://github.com/elibarzilay/rudybot
Unknown | 67 lines | 63 code | 4 blank | 0 comment | 0 complexity | cfb42fa1873ebac5d36a79849631e396 MD5 | raw file
 1;; Run me in "drRacket"
 2#lang racket
 3(require plot
 4         (planet schematics/schemeunit:3)
 5         (planet schematics/schemeunit:3/text-ui))
 6
 7;; jordanb says the quotes aren't coming out randomly.  I don't
 8;; particularly believe him, but let's see.  I'll find (what look
 9;; like) quotes in the log, and then measure how often each appears,
10;; and then ... somehow ... do some sorta statistical analysis (the
11;; details of which are unclear at the moment).
12
13(define (bounding-box vecs)
14  (for/fold ([xmin (vector-ref (car vecs) 0)]
15             [xmax (vector-ref (car vecs) 0)]
16             [ymin (vector-ref (car vecs) 1)]
17             [ymax (vector-ref (car vecs) 1)])
18      ([p (in-list vecs)])
19      (let ([x (vector-ref p 0)]
20            [y (vector-ref p 1)])
21        (values (min x xmin)
22                (max x xmax)
23                (min y ymin)
24                (max y ymax)))))
25
26(define-simple-check (check-bb vectors xmin xmax ymin ymax)
27  (equal? (call-with-values (lambda () (bounding-box vectors))
28            list)
29          (list xmin xmax ymin ymax)))
30
31(check-bb '(#(0 0)) 0 0 0 0)
32(check-bb '(#(0 1)) 0 0 1 1)
33(check-bb '(#(0 0) #(0 1)) 0 0 0 1)
34(check-bb '(#(0 0) #(0 1) #(1 0)) 0 1 0 1)
35
36(define *ifn*  "big-log")
37(call-with-input-file *ifn*
38  (lambda (ip)
39    (define (hash-table-increment! table key)
40      (hash-update! table key add1 0))
41    (let ([counts-by-quote (make-hash) ]
42          [histogram (make-hash)])
43      (printf "Reading from ~a ...~%" *ifn*)
44      (printf "Read ~a lines.~%"
45              (for/and ([line (in-lines ip)]
46                        [count (in-naturals)])
47                       (match line
48                         [(regexp #px"=> \"PRIVMSG #emacs :(.*)\"$" (list _ stuff))
49                          (when (and (not (regexp-match #px"^\\w+:" stuff))
50                                     (not (regexp-match #px"Arooooooooooo" stuff)))
51                            (hash-table-increment! counts-by-quote stuff))]
52                         [_ #f])
53                       count)
54              )
55      (printf "Snarfed ~a distinct quotes.~%" (hash-count counts-by-quote))
56      (for ([(k v) (in-hash counts-by-quote)] )
57        (hash-table-increment! histogram v))
58      (printf "Histogram: ~a~%" histogram)
59      (let ([vecs (hash-map histogram vector)])
60        (let-values ([(xmin xmax ymin ymax) (bounding-box vecs)])
61          (plot (points vecs)
62                #:x-label "Number of Occurrences"
63                #:y-label "Quotes"
64                #:x-min xmin
65                #:x-max xmax
66                #:y-min ymin
67                #:y-max ymax))))))