/analyze-quotes.rkt

http://github.com/elibarzilay/rudybot · Racket · 67 lines · 57 code · 4 blank · 6 comment · 5 complexity · cfb42fa1873ebac5d36a79849631e396 MD5 · raw file

  1. ;; Run me in "drRacket"
  2. #lang racket
  3. (require plot
  4. (planet schematics/schemeunit:3)
  5. (planet schematics/schemeunit:3/text-ui))
  6. ;; jordanb says the quotes aren't coming out randomly. I don't
  7. ;; particularly believe him, but let's see. I'll find (what look
  8. ;; like) quotes in the log, and then measure how often each appears,
  9. ;; and then ... somehow ... do some sorta statistical analysis (the
  10. ;; details of which are unclear at the moment).
  11. (define (bounding-box vecs)
  12. (for/fold ([xmin (vector-ref (car vecs) 0)]
  13. [xmax (vector-ref (car vecs) 0)]
  14. [ymin (vector-ref (car vecs) 1)]
  15. [ymax (vector-ref (car vecs) 1)])
  16. ([p (in-list vecs)])
  17. (let ([x (vector-ref p 0)]
  18. [y (vector-ref p 1)])
  19. (values (min x xmin)
  20. (max x xmax)
  21. (min y ymin)
  22. (max y ymax)))))
  23. (define-simple-check (check-bb vectors xmin xmax ymin ymax)
  24. (equal? (call-with-values (lambda () (bounding-box vectors))
  25. list)
  26. (list xmin xmax ymin ymax)))
  27. (check-bb '(#(0 0)) 0 0 0 0)
  28. (check-bb '(#(0 1)) 0 0 1 1)
  29. (check-bb '(#(0 0) #(0 1)) 0 0 0 1)
  30. (check-bb '(#(0 0) #(0 1) #(1 0)) 0 1 0 1)
  31. (define *ifn* "big-log")
  32. (call-with-input-file *ifn*
  33. (lambda (ip)
  34. (define (hash-table-increment! table key)
  35. (hash-update! table key add1 0))
  36. (let ([counts-by-quote (make-hash) ]
  37. [histogram (make-hash)])
  38. (printf "Reading from ~a ...~%" *ifn*)
  39. (printf "Read ~a lines.~%"
  40. (for/and ([line (in-lines ip)]
  41. [count (in-naturals)])
  42. (match line
  43. [(regexp #px"=> \"PRIVMSG #emacs :(.*)\"$" (list _ stuff))
  44. (when (and (not (regexp-match #px"^\\w+:" stuff))
  45. (not (regexp-match #px"Arooooooooooo" stuff)))
  46. (hash-table-increment! counts-by-quote stuff))]
  47. [_ #f])
  48. count)
  49. )
  50. (printf "Snarfed ~a distinct quotes.~%" (hash-count counts-by-quote))
  51. (for ([(k v) (in-hash counts-by-quote)] )
  52. (hash-table-increment! histogram v))
  53. (printf "Histogram: ~a~%" histogram)
  54. (let ([vecs (hash-map histogram vector)])
  55. (let-values ([(xmin xmax ymin ymax) (bounding-box vecs)])
  56. (plot (points vecs)
  57. #:x-label "Number of Occurrences"
  58. #:y-label "Quotes"
  59. #:x-min xmin
  60. #:x-max xmax
  61. #:y-min ymin
  62. #:y-max ymax))))))