/strings.arc

http://github.com/alimoeeny/arc · Unknown · 241 lines · 204 code · 37 blank · 0 comment · 0 complexity · 2ee9f4940936fecb0619a495cece434f MD5 · raw file

  1. ; Matching. Spun off 29 Jul 06.
  2. ; arc> (tostring (writec (coerce 133 'char)))
  3. ;
  4. ;> (define ss (open-output-string))
  5. ;> (write-char (integer->char 133) ss)
  6. ;> (get-output-string ss)
  7. ;"\u0085"
  8. (def tokens (s (o sep whitec))
  9. (let test (testify sep)
  10. (let rec (afn (cs toks tok)
  11. (if (no cs) (consif tok toks)
  12. (test (car cs)) (self (cdr cs) (consif tok toks) nil)
  13. (self (cdr cs) toks (cons (car cs) tok))))
  14. (rev (map [coerce _ 'string]
  15. (map rev (rec (coerce s 'cons) nil nil)))))))
  16. ; names of cut, split, halve not optimal
  17. (def halve (s (o sep whitec))
  18. (let test (testify sep)
  19. (let rec (afn (cs tok)
  20. (if (no cs) (list (rev tok))
  21. (test (car cs)) (list cs (rev tok))
  22. (self (cdr cs) (cons (car cs) tok))))
  23. (rev (map [coerce _ 'string]
  24. (rec (coerce s 'cons) nil))))))
  25. ; maybe promote to arc.arc, but if so include a list clause
  26. (def positions (test seq)
  27. (accum a
  28. (let f (testify test)
  29. (forlen i seq
  30. (if (f (seq i)) (a i))))))
  31. (def lines (s)
  32. (accum a
  33. ((afn ((p . ps))
  34. (if ps
  35. (do (a (rem #\return (cut s (+ p 1) (car ps))))
  36. (self ps))
  37. (a (cut s (+ p 1)))))
  38. (cons -1 (positions #\newline s)))))
  39. (def slices (s test)
  40. (accum a
  41. ((afn ((p . ps))
  42. (if ps
  43. (do (a (cut s (+ p 1) (car ps)))
  44. (self ps))
  45. (a (cut s (+ p 1)))))
  46. (cons -1 (positions test s)))))
  47. (def nonascii (s)
  48. (isnt (len s) (len (utf-8-bytes s))))
  49. ; > (require (lib "uri-codec.ss" "net"))
  50. ;> (form-urlencoded-decode "x%ce%bbx")
  51. ;"xÎťx"
  52. ; first byte: 0-7F, 1 char; c2-df 2; e0-ef 3, f0-f4 4.
  53. ; Fixed for utf8 by pc.
  54. (def urldecode (s)
  55. (tostring
  56. (forlen i s
  57. (caselet c (s i)
  58. #\+ (writec #\space)
  59. #\% (do (when (> (- (len s) i) 2)
  60. (writeb (int (cut s (+ i 1) (+ i 3)) 16)))
  61. (++ i 2))
  62. (writec c)))))
  63. (def urlencode (s)
  64. (tostring
  65. (each c (utf-8-bytes s)
  66. (writec #\%)
  67. (let i (int c)
  68. (if (< i 16) (writec #\0))
  69. (pr (coerce i 'string 16))))))
  70. (mac litmatch (pat string (o start 0))
  71. (w/uniq (gstring gstart)
  72. `(with (,gstring ,string ,gstart ,start)
  73. (unless (> (+ ,gstart ,(len pat)) (len ,gstring))
  74. (and ,@(let acc nil
  75. (forlen i pat
  76. (push `(is ,(pat i) (,gstring (+ ,gstart ,i)))
  77. acc))
  78. (rev acc)))))))
  79. ; litmatch would be cleaner if map worked for string and integer args:
  80. ; ,@(map (fn (n c)
  81. ; `(is ,c (,gstring (+ ,gstart ,n))))
  82. ; (len pat)
  83. ; pat)
  84. (mac endmatch (pat string)
  85. (w/uniq (gstring glen)
  86. `(withs (,gstring ,string ,glen (len ,gstring))
  87. (unless (> ,(len pat) (len ,gstring))
  88. (and ,@(let acc nil
  89. (forlen i pat
  90. (push `(is ,(pat (- (len pat) 1 i))
  91. (,gstring (- ,glen 1 ,i)))
  92. acc))
  93. (rev acc)))))))
  94. (def posmatch (pat seq (o start 0))
  95. (catch
  96. (if (isa pat 'fn)
  97. (for i start (- (len seq) 1)
  98. (when (pat (seq i)) (throw i)))
  99. (for i start (- (len seq) (len pat))
  100. (when (headmatch pat seq i) (throw i))))
  101. nil))
  102. (def headmatch (pat seq (o start 0))
  103. (let p (len pat)
  104. ((afn (i)
  105. (or (is i p)
  106. (and (is (pat i) (seq (+ i start)))
  107. (self (+ i 1)))))
  108. 0)))
  109. (def begins (seq pat (o start 0))
  110. (unless (len> pat (- (len seq) start))
  111. (headmatch pat seq start)))
  112. (def subst (new old seq)
  113. (let boundary (+ (- (len seq) (len old)) 1)
  114. (tostring
  115. (forlen i seq
  116. (if (and (< i boundary) (headmatch old seq i))
  117. (do (++ i (- (len old) 1))
  118. (pr new))
  119. (pr (seq i)))))))
  120. (def multisubst (pairs seq)
  121. (tostring
  122. (forlen i seq
  123. (iflet (old new) (find [begins seq (car _) i] pairs)
  124. (do (++ i (- (len old) 1))
  125. (pr new))
  126. (pr (seq i))))))
  127. ; not a good name
  128. (def findsubseq (pat seq (o start 0))
  129. (if (< (- (len seq) start) (len pat))
  130. nil
  131. (if (headmatch pat seq start)
  132. start
  133. (findsubseq pat seq (+ start 1)))))
  134. (def blank (s) (~find ~whitec s))
  135. (def nonblank (s) (unless (blank s) s))
  136. (def trim (s (o where 'both) (o test whitec))
  137. (withs (f (testify test)
  138. p1 (pos ~f s))
  139. (if p1
  140. (cut s
  141. (if (in where 'front 'both) p1 0)
  142. (when (in where 'end 'both)
  143. (let i (- (len s) 1)
  144. (while (and (> i p1) (f (s i)))
  145. (-- i))
  146. (+ i 1))))
  147. "")))
  148. (def num (n (o digits 2) (o trail-zeros nil) (o init-zero nil))
  149. (withs (comma
  150. (fn (i)
  151. (tostring
  152. (map [apply pr (rev _)]
  153. (rev (intersperse '(#\,)
  154. (tuples (rev (coerce (string i) 'cons))
  155. 3))))))
  156. abrep
  157. (let a (abs n)
  158. (if (< digits 1)
  159. (comma (roundup a))
  160. (exact a)
  161. (string (comma a)
  162. (when (and trail-zeros (> digits 0))
  163. (string "." (newstring digits #\0))))
  164. (withs (d (expt 10 digits)
  165. m (/ (roundup (* a d)) d)
  166. i (trunc m)
  167. r (abs (trunc (- (* m d) (* i d)))))
  168. (+ (if (is i 0)
  169. (if (or init-zero (is r 0)) "0" "")
  170. (comma i))
  171. (withs (rest (string r)
  172. padded (+ (newstring (- digits (len rest)) #\0)
  173. rest)
  174. final (if trail-zeros
  175. padded
  176. (trim padded 'end [is _ #\0])))
  177. (string (unless (empty final) ".")
  178. final)))))))
  179. (if (and (< n 0) (find [and (digit _) (isnt _ #\0)] abrep))
  180. (+ "-" abrep)
  181. abrep)))
  182. (def joinstr (lst (o glue " "))
  183. (string (intersperse glue lst)))
  184. ; by Andrew Wilcox
  185. (def begins-rest (pattern s)
  186. (if (begins s pattern)
  187. (cut s (len pattern))))
  188. ; English
  189. (def pluralize (n str)
  190. (if (or (is n 1) (single n))
  191. str
  192. (string str "s")))
  193. (def plural (n x)
  194. (string n #\ (pluralize n x)))
  195. (def capitalize (str)
  196. (if (empty str) str
  197. (+ (upcase (str 0)) (cut str 1))))
  198. (load "help/strings.arc")
  199. ; http://www.eki.ee/letter/chardata.cgi?HTML4=1
  200. ; http://jrgraphix.net/research/unicode_blocks.php?block=1
  201. ; http://home.tiscali.nl/t876506/utf8tbl.html
  202. ; http://www.fileformat.info/info/unicode/block/latin_supplement/utf8test.htm
  203. ; http://en.wikipedia.org/wiki/Utf-8
  204. ; http://unicode.org/charts/charindex2.html