PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/re_str.ml

http://github.com/avsm/ocaml-re
OCaml | 279 lines | 225 code | 41 blank | 13 comment | 32 complexity | cf9f20dfb289f861d04dc0165854411e MD5 | raw file
Possible License(s): LGPL-2.1
  1. (***********************************************************************)
  2. (* *)
  3. (* Objective Caml *)
  4. (* *)
  5. (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
  6. (* *)
  7. (* Copyright 1996 Institut National de Recherche en Informatique et *)
  8. (* en Automatique. All rights reserved. This file is distributed *)
  9. (* under the terms of the GNU Library General Public License. *)
  10. (* *)
  11. (***********************************************************************)
  12. (* Modified by Jerome.Vouillon@pps.jussieu.fr for integration in RE *)
  13. (* $Id: re_str.ml,v 1.3 2002/07/03 15:47:54 vouillon Exp $ *)
  14. type regexp =
  15. { re : Re.t;
  16. mutable mtch : Re.re option;
  17. mutable srch : Re.re option }
  18. let compile_regexp s c =
  19. { re = Re_emacs.re ~case:(not c) s;
  20. mtch = None;
  21. srch = None }
  22. let rec get_mtch re =
  23. match re.mtch with
  24. Some r -> r
  25. | None -> re.mtch <- Some (Re.compile (Re.seq [Re.start; re.re]));
  26. get_mtch re
  27. let rec get_srch re =
  28. match re.srch with
  29. Some r -> r
  30. | None -> re.srch <- Some (Re.compile re.re);
  31. get_srch re
  32. let state = ref None
  33. let string_match re s p =
  34. try
  35. state := Some (Re.exec ~pos:p (get_mtch re) s);
  36. true
  37. with Not_found ->
  38. state := None;
  39. false
  40. let string_partial_match re s p =
  41. match
  42. Re.exec_partial ~pos:p (get_mtch re) s
  43. with
  44. `Full -> string_match re s p
  45. | `Partial -> true
  46. | `Mismatch -> false
  47. let search_forward re s p =
  48. try
  49. let res = Re.exec ~pos:p (get_srch re) s in
  50. state := Some res;
  51. fst (Re.get_ofs res 0)
  52. with Not_found ->
  53. state := None;
  54. raise Not_found
  55. let rec search_backward re s p =
  56. try
  57. let res = Re.exec ~pos:p (get_mtch re) s in
  58. state := Some res;
  59. p
  60. with Not_found ->
  61. state := None;
  62. if p = 0 then raise Not_found else
  63. search_backward re s (p - 1)
  64. let beginning_group i =
  65. match !state with
  66. Some m -> fst (Re.get_ofs m i)
  67. | None -> raise Not_found
  68. let end_group i =
  69. match !state with
  70. Some m -> snd (Re.get_ofs m i)
  71. | None -> raise Not_found
  72. let get_len i =
  73. match !state with
  74. None -> 0
  75. | Some m ->
  76. try
  77. let (b, e) = Re.get_ofs m i in
  78. e - b
  79. with Not_found ->
  80. 0
  81. let rec repl_length repl p q len =
  82. if p < len then begin
  83. if repl.[p] <> '\\' then
  84. repl_length repl (p + 1) (q + 1) len
  85. else begin
  86. let p = p + 1 in
  87. if p = len then failwith "Str.replace: illegal backslash sequence";
  88. match repl.[p] with
  89. '\\' ->
  90. repl_length repl (p + 1) (q + 1) len
  91. | '0' .. '9' as c ->
  92. repl_length
  93. repl (p + 1) (q + get_len (Char.code c - Char.code '0')) len
  94. | _ ->
  95. repl_length repl (p + 1) (q + 2) len
  96. end
  97. end else
  98. q
  99. let rec replace orig repl p res q len =
  100. if p < len then begin
  101. let c = repl.[p] in
  102. if c <> '\\' then begin
  103. res.[q] <- c;
  104. replace orig repl (p + 1) res (q + 1) len
  105. end else begin
  106. match repl.[p + 1] with
  107. '\\' ->
  108. res.[q] <- '\\';
  109. replace orig repl (p + 2) res (q + 1) len
  110. | '0' .. '9' as c ->
  111. let d =
  112. try
  113. match !state with
  114. None ->
  115. raise Not_found
  116. | Some m ->
  117. let (b, e) = Re.get_ofs m (Char.code c - Char.code '0') in
  118. let d = e - b in
  119. if d > 0 then String.blit orig b res q d;
  120. d
  121. with Not_found ->
  122. 0
  123. in
  124. replace repl orig (p + 2) res (q + d) len
  125. | c ->
  126. res.[q] <- '\\';
  127. res.[q + 1] <- c;
  128. replace repl orig (p + 2) res (q + 2) len
  129. end
  130. end
  131. let replacement_text repl orig =
  132. let len = String.length repl in
  133. let res = String.create (repl_length repl 0 0 len) in
  134. replace orig repl 0 res 0 (String.length repl);
  135. res
  136. let quote s =
  137. let len = String.length s in
  138. let buf = String.create (2 * len) in
  139. let pos = ref 0 in
  140. for i = 0 to len - 1 do
  141. match s.[i] with
  142. '[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' as c ->
  143. buf.[!pos] <- '\\'; buf.[!pos + 1] <- c; pos := !pos + 2
  144. | c ->
  145. buf.[!pos] <- c; pos := !pos + 1
  146. done;
  147. String.sub buf 0 !pos
  148. let string_before s n = String.sub s 0 n
  149. let string_after s n = String.sub s n (String.length s - n)
  150. let first_chars s n = String.sub s 0 n
  151. let last_chars s n = String.sub s (String.length s - n) n
  152. let regexp e = compile_regexp e false
  153. let regexp_case_fold e = compile_regexp e true
  154. let regexp_string s = compile_regexp (quote s) false
  155. let regexp_string_case_fold s = compile_regexp (quote s) true
  156. let group_beginning n =
  157. if n < 0 || n >= 10 then invalid_arg "Str.group_beginning" else
  158. let pos = beginning_group n in
  159. if pos = -1 then raise Not_found else pos
  160. let group_end n =
  161. if n < 0 || n >= 10 then invalid_arg "Str.group_end" else
  162. let pos = end_group n in
  163. if pos = -1 then raise Not_found else pos
  164. let matched_group n txt =
  165. let b = group_beginning n and e = group_end n in String.sub txt b (e-b)
  166. let replace_matched repl matched = replacement_text repl matched
  167. let match_beginning () = group_beginning 0
  168. and match_end () = group_end 0
  169. and matched_string txt = matched_group 0 txt
  170. let substitute_first expr repl_fun text =
  171. try
  172. let pos = search_forward expr text 0 in
  173. String.concat "" [string_before text pos;
  174. repl_fun text;
  175. string_after text (match_end())]
  176. with Not_found ->
  177. text
  178. let global_substitute expr repl_fun text =
  179. let rec replace start last_was_empty =
  180. try
  181. let startpos = if last_was_empty then start + 1 else start in
  182. if startpos > String.length text then raise Not_found;
  183. let pos = search_forward expr text startpos in
  184. let end_pos = match_end() in
  185. let repl_text = repl_fun text in
  186. String.sub text start (pos-start) ::
  187. repl_text ::
  188. replace end_pos (end_pos = pos)
  189. with Not_found ->
  190. [string_after text start] in
  191. String.concat "" (replace 0 false)
  192. let global_replace expr repl text =
  193. global_substitute expr (replacement_text repl) text
  194. and replace_first expr repl text =
  195. substitute_first expr (replacement_text repl) text
  196. let bounded_split expr text num =
  197. let start =
  198. if string_match expr text 0 then match_end() else 0 in
  199. let rec split start n =
  200. if start >= String.length text then [] else
  201. if n = 1 then [string_after text start] else
  202. try
  203. let pos = search_forward expr text start in
  204. String.sub text start (pos-start) :: split (match_end()) (n-1)
  205. with Not_found ->
  206. [string_after text start] in
  207. split start num
  208. let split expr text = bounded_split expr text 0
  209. let bounded_split_delim expr text num =
  210. let rec split start n =
  211. if start > String.length text then [] else
  212. if n = 1 then [string_after text start] else
  213. try
  214. let pos = search_forward expr text start in
  215. String.sub text start (pos-start) :: split (match_end()) (n-1)
  216. with Not_found ->
  217. [string_after text start] in
  218. if text = "" then [] else split 0 num
  219. let split_delim expr text = bounded_split_delim expr text 0
  220. type split_result = Text of string | Delim of string
  221. let bounded_full_split expr text num =
  222. let rec split start n =
  223. if start >= String.length text then [] else
  224. if n = 1 then [Text(string_after text start)] else
  225. try
  226. let pos = search_forward expr text start in
  227. let s = matched_string text in
  228. if pos > start then
  229. Text(String.sub text start (pos-start)) ::
  230. Delim(s) ::
  231. split (match_end()) (n-1)
  232. else
  233. Delim(s) ::
  234. split (match_end()) (n-1)
  235. with Not_found ->
  236. [Text(string_after text start)] in
  237. split 0 num
  238. let full_split expr text = bounded_full_split expr text 0