PageRenderTime 14ms CodeModel.GetById 1ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 1ms

/strings.arc

http://github.com/alimoeeny/arc
Unknown | 241 lines | 204 code | 37 blank | 0 comment | 0 complexity | 2ee9f4940936fecb0619a495cece434f MD5 | raw file
  1; Matching.  Spun off 29 Jul 06.
  2
  3; arc> (tostring (writec (coerce 133 'char)))
  4;
  5;> (define ss (open-output-string))
  6;> (write-char (integer->char 133) ss)
  7;> (get-output-string ss)
  8;"\u0085"
  9
 10(def tokens (s (o sep whitec))
 11  (let test (testify sep)
 12    (let rec (afn (cs toks tok)
 13               (if (no cs)         (consif tok toks)
 14                   (test (car cs)) (self (cdr cs) (consif tok toks) nil)
 15                                   (self (cdr cs) toks (cons (car cs) tok))))
 16      (rev (map [coerce _ 'string]
 17                (map rev (rec (coerce s 'cons) nil nil)))))))
 18
 19; names of cut, split, halve not optimal
 20
 21(def halve (s (o sep whitec))
 22  (let test (testify sep)
 23    (let rec (afn (cs tok)
 24               (if (no cs)         (list (rev tok))
 25                   (test (car cs)) (list cs (rev tok))
 26                                   (self (cdr cs) (cons (car cs) tok))))
 27      (rev (map [coerce _ 'string]
 28                (rec (coerce s 'cons) nil))))))
 29
 30; maybe promote to arc.arc, but if so include a list clause
 31
 32(def positions (test seq)
 33  (accum a
 34    (let f (testify test)
 35      (forlen i seq
 36        (if (f (seq i)) (a i))))))
 37
 38(def lines (s)
 39  (accum a
 40    ((afn ((p . ps))
 41       (if ps
 42           (do (a (rem #\return (cut s (+ p 1) (car ps))))
 43               (self ps))
 44           (a (cut s (+ p 1)))))
 45     (cons -1 (positions #\newline s)))))
 46
 47(def slices (s test)
 48  (accum a
 49    ((afn ((p . ps))
 50       (if ps
 51           (do (a (cut s (+ p 1) (car ps)))
 52               (self ps))
 53           (a (cut s (+ p 1)))))
 54     (cons -1 (positions test s)))))
 55
 56(def nonascii (s)
 57  (isnt (len s) (len (utf-8-bytes s))))
 58
 59; > (require (lib "uri-codec.ss" "net"))
 60;> (form-urlencoded-decode "x%ce%bbx")
 61;"xÎťx"
 62
 63; first byte: 0-7F, 1 char; c2-df 2; e0-ef 3, f0-f4 4. 
 64
 65; Fixed for utf8 by pc.
 66
 67(def urldecode (s)
 68 (tostring
 69  (forlen i s
 70    (caselet c (s i)
 71      #\+ (writec #\space)
 72      #\% (do (when (> (- (len s) i) 2)
 73                (writeb (int (cut s (+ i 1) (+ i 3)) 16)))
 74              (++ i 2))
 75          (writec c)))))
 76
 77(def urlencode (s)
 78  (tostring 
 79    (each c (utf-8-bytes s)
 80      (writec #\%)
 81      (let i (int c)
 82        (if (< i 16) (writec #\0))
 83        (pr (coerce i 'string 16))))))
 84
 85(mac litmatch (pat string (o start 0))
 86  (w/uniq (gstring gstart)
 87    `(with (,gstring ,string ,gstart ,start)
 88       (unless (> (+ ,gstart ,(len pat)) (len ,gstring))
 89         (and ,@(let acc nil
 90                  (forlen i pat
 91                    (push `(is ,(pat i) (,gstring (+ ,gstart ,i)))
 92                           acc))
 93                  (rev acc)))))))
 94
 95; litmatch would be cleaner if map worked for string and integer args:
 96
 97;             ,@(map (fn (n c)  
 98;                      `(is ,c (,gstring (+ ,gstart ,n))))
 99;                    (len pat)
100;                    pat)
101
102(mac endmatch (pat string)
103  (w/uniq (gstring glen)
104    `(withs (,gstring ,string ,glen (len ,gstring))
105       (unless (> ,(len pat) (len ,gstring))
106         (and ,@(let acc nil
107                  (forlen i pat
108                    (push `(is ,(pat (- (len pat) 1 i)) 
109                               (,gstring (- ,glen 1 ,i)))
110                           acc))
111                  (rev acc)))))))
112
113(def posmatch (pat seq (o start 0))
114  (catch
115    (if (isa pat 'fn)
116        (for i start (- (len seq) 1)
117          (when (pat (seq i)) (throw i)))
118        (for i start (- (len seq) (len pat))
119          (when (headmatch pat seq i) (throw i))))
120    nil))
121
122(def headmatch (pat seq (o start 0))
123  (let p (len pat) 
124    ((afn (i)      
125       (or (is i p) 
126           (and (is (pat i) (seq (+ i start)))
127                (self (+ i 1)))))
128     0)))
129
130(def begins (seq pat (o start 0))
131  (unless (len> pat (- (len seq) start))
132    (headmatch pat seq start)))
133
134(def subst (new old seq)
135  (let boundary (+ (- (len seq) (len old)) 1)
136    (tostring 
137      (forlen i seq
138        (if (and (< i boundary) (headmatch old seq i))
139            (do (++ i (- (len old) 1))
140                (pr new))
141            (pr (seq i)))))))
142
143(def multisubst (pairs seq)
144  (tostring 
145    (forlen i seq
146      (iflet (old new) (find [begins seq (car _) i] pairs)
147        (do (++ i (- (len old) 1))
148            (pr new))
149        (pr (seq i))))))
150
151; not a good name
152
153(def findsubseq (pat seq (o start 0))
154  (if (< (- (len seq) start) (len pat))
155       nil
156      (if (headmatch pat seq start)
157          start
158          (findsubseq pat seq (+ start 1)))))
159
160(def blank (s) (~find ~whitec s))
161
162(def nonblank (s) (unless (blank s) s))
163
164(def trim (s (o where 'both) (o test whitec))
165  (withs (f   (testify test)
166           p1 (pos ~f s))
167    (if p1
168        (cut s 
169             (if (in where 'front 'both) p1 0)
170             (when (in where 'end 'both)
171               (let i (- (len s) 1)
172                 (while (and (> i p1) (f (s i)))
173                   (-- i))
174                 (+ i 1))))
175        "")))
176
177(def num (n (o digits 2) (o trail-zeros nil) (o init-zero nil))
178  (withs (comma
179          (fn (i)
180            (tostring
181              (map [apply pr (rev _)]
182                   (rev (intersperse '(#\,)
183                                     (tuples (rev (coerce (string i) 'cons))
184                                             3))))))
185          abrep
186          (let a (abs n)
187            (if (< digits 1)
188                 (comma (roundup a))
189                (exact a)
190                 (string (comma a)
191                         (when (and trail-zeros (> digits 0))
192                           (string "." (newstring digits #\0))))
193                 (withs (d (expt 10 digits)
194                         m (/ (roundup (* a d)) d)
195                         i (trunc m)
196                         r (abs (trunc (- (* m d) (* i d)))))
197                   (+ (if (is i 0) 
198                          (if (or init-zero (is r 0)) "0" "") 
199                          (comma i))
200                      (withs (rest   (string r)
201                              padded (+ (newstring (- digits (len rest)) #\0)
202                                        rest)
203                              final  (if trail-zeros
204                                         padded
205                                         (trim padded 'end [is _ #\0])))
206                        (string (unless (empty final) ".")
207                                final)))))))
208    (if (and (< n 0) (find [and (digit _) (isnt _ #\0)] abrep))
209        (+ "-" abrep)
210        abrep)))
211        
212(def joinstr (lst (o glue " ")) 
213  (string (intersperse glue lst)))
214
215; by Andrew Wilcox
216(def begins-rest (pattern s)
217  (if (begins s pattern)
218      (cut s (len pattern))))
219
220; English
221
222(def pluralize (n str)
223  (if (or (is n 1) (single n))
224      str
225      (string str "s")))
226
227(def plural (n x)
228  (string n #\  (pluralize n x)))
229
230(def capitalize (str)
231  (if (empty str) str
232      (+ (upcase (str 0)) (cut str 1))))
233
234(load "help/strings.arc")
235
236; http://www.eki.ee/letter/chardata.cgi?HTML4=1
237; http://jrgraphix.net/research/unicode_blocks.php?block=1
238; http://home.tiscali.nl/t876506/utf8tbl.html
239; http://www.fileformat.info/info/unicode/block/latin_supplement/utf8test.htm
240; http://en.wikipedia.org/wiki/Utf-8
241; http://unicode.org/charts/charindex2.html