/tests/irregex/test-irregex-mosh.ss

http://github.com/higepon/mosh · Scheme · 329 lines · 280 code · 33 blank · 16 comment · 0 complexity · 6c505a1c872cd93d2659ed454ed117e8 MD5 · raw file

  1. (import (rnrs)
  2. (rnrs r5rs)
  3. (irregex)
  4. (match)
  5. (only (srfi :13) string-contains)
  6. (srfi :64)
  7. (rename (srfi :48) (format sprintf)))
  8. (define call-with-output-string call-with-string-output-port) ;MOSH: R6RS
  9. (define (call-with-input-string str proc)
  10. (call-with-port (open-string-input-port str) proc))
  11. ;from: http://srfi.schemers.org/srfi-13/mail-archive/msg00110.html
  12. (define (string-split s delimiter TRUE)
  13. (let ((r
  14. (let ((sl (string-length s))
  15. (dl (string-length delimiter)))
  16. (let unfold ((index 0) (strings '()))
  17. (let ((start (string-contains s delimiter index)))
  18. (if start
  19. (unfold (+ start dl)
  20. (cons (substring s index start) strings))
  21. (reverse (cons (substring s index sl) strings))
  22. ))))))
  23. ;(display r)(newline) ;;SHOW PROGRESS
  24. r))
  25. ; to stderr?
  26. (define (warning desc datum)
  27. (display (list "WARNING:" desc datum))(newline))
  28. (define-syntax test
  29. (syntax-rules ()
  30. ((_ test-name expected test-expr)
  31. (begin
  32. (test-equal test-name expected test-expr)))
  33. ((_ expected test-expr) (test-equal "[NO-NAME]" expected test-expr))))
  34. (define (port-for-each proc sym)
  35. (let ((l (get-line (current-input-port))))
  36. (cond
  37. ((eof-object? l) 'ok)
  38. (else
  39. (proc l)
  40. (port-for-each proc 'none)))))
  41. (define read-line 'ok)
  42. (define (intersperse l e)
  43. (define (comp c i) (cons i (cons e c)))
  44. (let ((f (car l))
  45. (d (cdr l)))
  46. (if (null? d)
  47. f
  48. (reverse (fold-left comp (list f) d)))))
  49. (define (string-intersperse l e)
  50. (apply string-append (intersperse l e)))
  51. ;;;;;;;;;;;;;;;;;;;;
  52. ;;; orig
  53. (define (subst-matches matches subst)
  54. (define (submatch n)
  55. (if (vector? matches)
  56. (and (irregex-match-valid-index? matches n)
  57. (irregex-match-substring matches n))
  58. (list-ref matches n)))
  59. (and
  60. matches
  61. (call-with-output-string
  62. (lambda (out)
  63. (call-with-input-string subst
  64. (lambda (in)
  65. (let lp ()
  66. (let ((c (read-char in)))
  67. (cond
  68. ((not (eof-object? c))
  69. (case c
  70. ((#\&)
  71. (display (or (submatch 0) "") out))
  72. ((#\\)
  73. (let ((c (read-char in)))
  74. (if (char-numeric? c)
  75. (let lp ((res (list c)))
  76. (if (and (char? (peek-char in))
  77. (char-numeric? (peek-char in)))
  78. (lp (cons (read-char in) res))
  79. (display
  80. (or (submatch (string->number
  81. (list->string (reverse res))))
  82. "")
  83. out)))
  84. (write-char c out))))
  85. (else
  86. (write-char c out)))
  87. (lp)))))))))))
  88. (define (test-re matcher line)
  89. (match (string-split line "\t" #t)
  90. ((pattern input result subst output)
  91. (let ((name (sprintf "~A ~A ~A ~A" pattern input result subst)))
  92. (cond
  93. ((equal? "c" result)
  94. (test-error name (matcher pattern input)))
  95. ((equal? "n" result)
  96. (test-assert name (not (matcher pattern input))))
  97. (else
  98. (test name output
  99. (subst-matches (matcher pattern input) subst))))))
  100. (else
  101. (warning "invalid regex test line" line))))
  102. (test-begin "irregex") ;MOSH: srfi-64
  103. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  104. ;; basic irregex
  105. (for-each
  106. (lambda (opts)
  107. (test-group (sprintf "irregex - ~S" opts)
  108. (with-input-from-file "re-tests.txt"
  109. (lambda ()
  110. (port-for-each
  111. (lambda (line)
  112. (test-re (lambda (pat str)
  113. (irregex-search (apply irregex pat opts) str))
  114. line))
  115. read-line)))))
  116. '((backtrack)
  117. (fast)
  118. ))
  119. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  120. ;; chunked irregex
  121. (define (rope . args)
  122. (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args))
  123. (define rope-chunker
  124. (make-irregex-chunker
  125. (lambda (x) (and (pair? (cdr x)) (cdr x)))
  126. caar
  127. cadar
  128. caddar
  129. (lambda (src1 i src2 j)
  130. (if (eq? src1 src2)
  131. (substring (caar src1) i j)
  132. (let lp ((src (cdr src1))
  133. (res (list (substring (caar src1) i (caddar src1)))))
  134. (if (eq? src src2)
  135. (string-intersperse
  136. (reverse (cons (substring (caar src2) (cadar src2) j) res))
  137. "")
  138. (lp (cdr src)
  139. (cons (substring (caar src) (cadar src) (caddar src))
  140. res))))))))
  141. (define (make-ropes str)
  142. (let ((len (string-length str)))
  143. (case len
  144. ((0 1)
  145. (list (rope str)))
  146. ((2)
  147. (list (rope str)
  148. (rope (substring str 0 1) (substring str 1 2))))
  149. ((3)
  150. (list (rope str)
  151. (rope (substring str 0 1) (substring str 1 3))
  152. (rope (substring str 0 2) (substring str 2 3))
  153. (rope (substring str 0 1)
  154. (substring str 1 2)
  155. (substring str 2 3))))
  156. (else
  157. (let ((mid (quotient (+ len 1) 2)))
  158. (list (rope str)
  159. (rope (substring str 0 1) (substring str 1 len))
  160. (rope (substring str 0 mid) (substring str mid len))
  161. (rope (substring str 0 (- len 1))
  162. (substring str (- len 1) len))
  163. (rope (substring str 0 1)
  164. (substring str 1 mid)
  165. (substring str mid len))
  166. ))))))
  167. (define (make-shared-ropes str)
  168. (let ((len (string-length str)))
  169. (case len
  170. ((0 1)
  171. '())
  172. ((2)
  173. (list (list (list str 0 1) (list str 1 2))))
  174. ((3)
  175. (list (list (list str 0 1) (list str 1 3))
  176. (list (list str 0 2) (list str 2 3))
  177. (list (list str 0 1) (list str 1 2) (list str 2 3))))
  178. (else
  179. (let ((mid (quotient (+ len 1) 2)))
  180. (list (list (list str 0 1) (list str 1 len))
  181. (list (list str 0 mid) (list str mid len))
  182. (list (list str 0 (- len 1))
  183. (list str (- len 1) len))
  184. (list (list str 0 1) (list str 1 mid) (list str mid len))
  185. ))))))
  186. (for-each
  187. (lambda (opts)
  188. (test-group (sprintf "irregex/chunked - ~S" opts)
  189. (with-input-from-file "re-tests.txt"
  190. (lambda ()
  191. (port-for-each
  192. (lambda (line)
  193. (match (string-split line "\t" #t)
  194. ((pattern input result subst output)
  195. (let ((name
  196. (sprintf "~A ~A ~A ~A" pattern input result subst)))
  197. (cond
  198. ((equal? "c" result))
  199. ((equal? "n" result)
  200. (for-each
  201. (lambda (rope)
  202. (test-assert name
  203. (not (irregex-search/chunked pattern
  204. rope-chunker
  205. rope))))
  206. (append (make-ropes input)
  207. (make-shared-ropes input))))
  208. (else
  209. (for-each
  210. (lambda (rope)
  211. (test name output
  212. (subst-matches (irregex-search/chunked pattern
  213. rope-chunker
  214. rope)
  215. subst)))
  216. (append (make-ropes input)
  217. (make-shared-ropes input)))))))
  218. (else
  219. (warning "invalid regex test line" line)))
  220. )
  221. read-line)))))
  222. '((backtrack)
  223. (fast)
  224. ))
  225. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  226. ;; pregexp
  227. '(test-group "pregexp"
  228. (with-input-from-file "re-tests.txt"
  229. (lambda ()
  230. (port-for-each
  231. (lambda (line) (test-re pregexp-match line))
  232. read-line))))
  233. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  234. ;; default regex (PCRE)
  235. '(test-group "regex"
  236. (with-input-from-file "re-tests.txt"
  237. (lambda ()
  238. (port-for-each
  239. (lambda (line) (test-re string-search line))
  240. read-line))))
  241. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  242. (test-group "unmatchable patterns"
  243. (test-assert (not (irregex-search '(or) "abc")))
  244. (test-assert (not (irregex-search '(: "ab" (or)) "abc")))
  245. (test-assert (not (irregex-search '(submatch "ab" (or)) "abc")))
  246. (test-assert (not (irregex-search '(: "ab" (submatch (or))) "abc")))
  247. (test-assert (not (irregex-search '(/) "abc")))
  248. (test-assert (not (irregex-search '(: "ab" (/)) "abc")))
  249. (test-assert (not (irregex-search '(~ any) "abc")))
  250. (test-assert (not (irregex-search '(: "ab" (~ any)) "abc")))
  251. (test-assert (not (irregex-search '("") "abc")))
  252. (test-assert (not (irregex-search '(: "ab" ("")) "abc")))
  253. )
  254. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  255. (test-group "API"
  256. (test-assert (irregex? (irregex "a.*b")))
  257. (test-assert (irregex? (irregex '(: "a" (* any) "b"))))
  258. (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f #f))))
  259. (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f #f))))
  260. (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb")))
  261. (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
  262. (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
  263. (test-assert (not (irregex-match-data? (vector #f #f #f #f #f #f #f #f #f #f #f))))
  264. (test 0 (irregex-num-submatches (irregex "a.*b")))
  265. (test 1 (irregex-num-submatches (irregex "a(.*)b")))
  266. (test 2 (irregex-num-submatches (irregex "(a(.*))b")))
  267. (test 2 (irregex-num-submatches (irregex "a(.*)(b)")))
  268. (test 10 (irregex-num-submatches (irregex "((((((((((a))))))))))")))
  269. (test 0 (irregex-match-num-submatches (irregex-search "a.*b" "axxxb")))
  270. (test 1 (irregex-match-num-submatches (irregex-search "a(.*)b" "axxxb")))
  271. (test 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
  272. (test 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
  273. (test 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))
  274. )
  275. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  276. (test-group "utils"
  277. (test "h*llo world"
  278. (irregex-replace "[aeiou]" "hello world" "*"))
  279. (test "h*ll* w*rld"
  280. (irregex-replace/all "[aeiou]" "hello world" "*"))
  281. (test '("bob@test.com" "fred@example.com")
  282. (irregex-fold 'email
  283. (lambda (i m s) (cons (irregex-match-substring m) s))
  284. '()
  285. "bob@test.com and fred@example.com"
  286. (lambda (i s) (reverse s))))
  287. (test '("bob@test.com" "fred@example.com")
  288. (irregex-fold/chunked
  289. 'email
  290. (lambda (src i m s) (cons (irregex-match-substring m) s))
  291. '()
  292. rope-chunker
  293. (rope "bob@test.com and fred@example.com")
  294. (lambda (src i s) (reverse s))))
  295. )
  296. (test-end)