PageRenderTime 50ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/re_posix.ml

http://github.com/avsm/ocaml-re
OCaml | 154 lines | 119 code | 7 blank | 28 comment | 59 complexity | b953b858db628e6eb764b64df220c4d0 MD5 | raw file
Possible License(s): LGPL-2.1
  1. (*
  2. RE - A regular expression library
  3. Copyright (C) 2001 Jerome Vouillon
  4. email: Jerome.Vouillon@pps.jussieu.fr
  5. This library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2 of the License, or (at your option) any later version.
  9. This library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with this library; if not, write to the Free Software
  15. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. *)
  17. (*
  18. What we could (should?) do:
  19. - a* ==> longest ((shortest (no_group a)* ), a | ()) (!!!)
  20. - abc understood as (ab)c
  21. - "((a?)|b)" against "ab" should not bind the first subpattern to anything
  22. Note that it should be possible to handle "(((ab)c)d)e" efficiently
  23. *)
  24. exception Parse_error
  25. exception Not_supported
  26. let parse newline s =
  27. let i = ref 0 in
  28. let l = String.length s in
  29. let eos () = !i = l in
  30. let test c = not (eos ()) && s.[!i] = c in
  31. let accept c = let r = test c in if r then incr i; r in
  32. let get () = let r = s.[!i] in incr i; r in
  33. let unget () = decr i in
  34. let rec regexp () = regexp' (branch ())
  35. and regexp' left =
  36. if accept '|' then regexp' (Re.alt [left; branch ()]) else left
  37. and branch () = branch' []
  38. and branch' left =
  39. if eos () || test '|' || test ')' then Re.seq (List.rev left)
  40. else branch' (piece () :: left)
  41. and piece () =
  42. let r = atom () in
  43. if accept '*' then Re.rep (Re.nest r) else
  44. if accept '+' then Re.rep1 (Re.nest r) else
  45. if accept '?' then Re.opt r else
  46. if accept '{' then
  47. match integer () with
  48. Some i ->
  49. let j = if accept ',' then integer () else Some i in
  50. if not (accept '}') then raise Parse_error;
  51. begin match j with
  52. Some j when j < i -> raise Parse_error | _ -> ()
  53. end;
  54. Re.repn (Re.nest r) i j
  55. | None ->
  56. unget (); r
  57. else
  58. r
  59. and atom () =
  60. if accept '.' then begin
  61. if newline then Re.notnl else Re.any
  62. end else if accept '(' then begin
  63. let r = regexp () in
  64. if not (accept ')') then raise Parse_error;
  65. Re.group r
  66. end else
  67. if accept '^' then begin
  68. if newline then Re.bol else Re.bos
  69. end else if accept '$' then begin
  70. if newline then Re.eol else Re.eos
  71. end else if accept '[' then begin
  72. if accept '^' then
  73. Re.diff (Re.compl (bracket [])) (Re.char '\n')
  74. else
  75. Re.alt (bracket [])
  76. end else
  77. if accept '\\' then begin
  78. if eos () then raise Parse_error;
  79. match get () with
  80. '|' | '(' | ')' | '*' | '+' | '?'
  81. | '[' | '.' | '^' | '$' | '{' | '\\' as c -> Re.char c
  82. | _ -> raise Parse_error
  83. end else begin
  84. if eos () then raise Parse_error;
  85. match get () with
  86. '*' | '+' | '?' | '{' | '\\' -> raise Parse_error
  87. | c -> Re.char c
  88. end
  89. and integer () =
  90. if eos () then None else
  91. match get () with
  92. '0'..'9' as d -> integer' (Char.code d - Char.code '0')
  93. | _ -> unget (); None
  94. and integer' i =
  95. if eos () then Some i else
  96. match get () with
  97. '0'..'9' as d ->
  98. let i' = 10 * i + (Char.code d - Char.code '0') in
  99. if i' < i then raise Parse_error;
  100. integer' i'
  101. | _ ->
  102. unget (); Some i
  103. and bracket s =
  104. if s <> [] && accept ']' then s else begin
  105. let c = char () in
  106. if accept '-' then begin
  107. if accept ']' then Re.char c :: Re.char '-' :: s else begin
  108. let c' = char () in
  109. bracket (Re.rg c c' :: s)
  110. end
  111. end else
  112. bracket (Re.char c :: s)
  113. end
  114. and char () =
  115. if eos () then raise Parse_error;
  116. let c = get () in
  117. if c = '[' then begin
  118. if accept '=' then raise Not_supported
  119. else if accept ':' then begin
  120. raise Not_supported (*XXX*)
  121. end else if accept '.' then begin
  122. if eos () then raise Parse_error;
  123. let c = get () in
  124. if not (accept '.') then raise Not_supported;
  125. if not (accept ']') then raise Parse_error;
  126. c
  127. end else
  128. c
  129. end else
  130. c
  131. in
  132. let res = regexp () in
  133. if not (eos ()) then raise Parse_error;
  134. res
  135. type opt = [`ICase | `NoSub | `Newline]
  136. let re ?(opts = []) s =
  137. let r = parse (List.memq `Newline opts) s in
  138. let r = if List.memq `ICase opts then Re.no_case r else r in
  139. let r = if List.memq `NoSub opts then Re.no_group r else r in
  140. r
  141. let compile re = Re.compile (Re.longest re)
  142. let compile_pat ?(opts = []) s = compile (re ~opts s)