PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/regexp.ex

https://github.com/seanjensengrey/elixir
Elixir | 162 lines | 154 code | 7 blank | 1 comment | 6 complexity | 72970910d9c98cdf789f74024280b4ec MD5 | raw file
  1. % Regular expressions for Elixir built on top of the re module
  2. % in the Erlang Standard Library. More information can be found
  3. % on re documentation: http://www.erlang.org/doc/man/re.html
  4. %
  5. % Regular expressions in Elixir can be created using Regexp.new,
  6. % Regexp.compile (check their documentation) or using the special
  7. % form with ~r:
  8. %
  9. % % A simple regular expressions that matches foo anywhere in the string
  10. % ~r(foo)
  11. %
  12. % % A regular expression with case insensitive options and handle unicode chars
  13. % ~r(foo)iu
  14. %
  15. % The re module provides several options, some of them are not available
  16. % in Elixir while others are enabled by default. The ones enabled by default are:
  17. %
  18. % * multiline - the given string is always considered to be multiline, so
  19. % ^ and $ marks the beginning and end of each line. You need to use \A
  20. % and \z to match the end or beginning of the string
  21. %
  22. % The available options, followed by their shortcut in parenthesis, are:
  23. %
  24. % * unicode (u) - used when you want to match against specific unicode characters
  25. % * caseless (i) - add case insensitivity
  26. % * dotall (m) - causes dot to match newlines and also set newline to anycrlf.
  27. % The new line setting can be overwritten by setting (*CR) or (*LF) or (*CRLF)
  28. % or (*ANY) according to re documentation
  29. % * extended (x) - whitespace characters are ignored except when escaped and
  30. % allow # to delimit comments
  31. % * firstline (f) - forces the unanchored pattern to match before or at the first
  32. % newline, though the matched text may continue over the newline
  33. % * ungreedy (r) - invert the "greediness" of the regexp
  34. %
  35. % The options not available are:
  36. %
  37. % * anchored - not available, use ^ or \A instead
  38. % * dollar_endonly - not available, use \z instead
  39. % * no_auto_capture - not available, use ?: instead
  40. % * newline - not available, use (*CR) or (*LF) or (*CRLF) or (*ANYCRLF)
  41. % or (*ANY) at the beginning of the regexp according to the re documentation
  42. %
  43. module Regexp
  44. def new(regexp_bin, options := [])
  45. #Regexp::Behavior(regexp_bin, options)
  46. end
  47. % Escape the given string so it can match a regular expression.
  48. def escape(string)
  49. Erlang.re.replace(string, @escape_regexp, "\\\\&", [{'return,'binary},'global])
  50. end
  51. % Have the escape regexp pre-compiled and stored.
  52. { 'ok, compiled } = Erlang.re.compile("\\\\|\\{|\\[|\\(|\\)|\\]|\\}|\\.|\\?|\\*")
  53. @('escape_regexp, compiled)
  54. module Behavior
  55. % Creates a new regular expression. It expects two arguments,
  56. % the regular expression and a set of options. Both should be
  57. % a string or a list of chars and, if not, to_char_list is
  58. % invoked in order to retrieve the list of chars.
  59. %
  60. % ## Examples
  61. %
  62. % Regexp.new("foo", "iu")
  63. %
  64. def __bound__(regexp_bin, options)
  65. parsed_options = options.to_char_list.foldl ['multiline], do (x, acc)
  66. parse_option(x, acc)
  67. end
  68. { 'ok, compiled } = Erlang.re.compile(regexp_bin, parsed_options)
  69. @('bin: regexp_bin, 'parsed_options: parsed_options, 'compiled: compiled)
  70. end
  71. % Returns a boolean depending if the regular expressions matches the given string.
  72. def match?(target)
  73. 'nomatch != Erlang.re.run(target, @compiled)
  74. end
  75. % Run the regular expression against the given target. It returns a list with
  76. % all matches or nil if no match occurred.
  77. def run(target)
  78. case Erlang.re.run(target, @compiled, [{'capture, 'all, 'binary}])
  79. match 'nomatch
  80. nil
  81. match {'match, results}
  82. results
  83. end
  84. end
  85. % Returns lists with the match indexes in the given string.
  86. def indexes(target, offset := 0)
  87. case Erlang.re.run(target, @compiled, [{'capture, 'all, 'index},{'offset,offset}])
  88. match 'nomatch
  89. nil
  90. match {'match, results}
  91. results
  92. end
  93. end
  94. % Same as run, but scan the target several times collecting all matches of
  95. % the regular expression. This is similar to the /g option in Perl.
  96. def scan(target, offset := 0)
  97. case Erlang.re.run(target, @compiled, [{'capture, 'all, 'binary},'global,{'offset,offset}])
  98. match 'nomatch
  99. []
  100. match {'match, results}
  101. results.map do (result)
  102. case result
  103. match [t]
  104. t
  105. match [h|t]
  106. t
  107. end
  108. end
  109. end
  110. end
  111. % Split the given *target* in the number of *parts* specified.
  112. def split(target, parts := 'infinity)
  113. list = Erlang.re.split(target, @compiled, [{'return,'binary},'trim,{'parts, parts}])
  114. [l for l in list, l != ""]
  115. end
  116. % Receives a string and a replacement and returns a string where the first match
  117. % of the regular expressions is replaced by replacement. Inside the replacement,
  118. % you can either give "&" to access the whole regular expression or \N, where
  119. % N is in integer to access an specific matching parens.
  120. %
  121. % ## Examples
  122. %
  123. % "abc" = ~r(d).replace("abc", "d")
  124. % "adc" = ~r(b).replace("abc", "d")
  125. % "a[b]c" = ~r(b).replace("abc", "[&]")
  126. % "a[&]c" = ~r(b).replace("abc", "[\\&]")
  127. % "a[b]c" = ~r[(b)].replace("abc", "[\\1]")
  128. %
  129. def replace(string, replacement)
  130. Erlang.re.replace(string, @compiled, replacement, [{'return,'binary}])
  131. end
  132. % The same as replace, but replaces all parts where the regular expressions
  133. % matches in the string. Please read `replace` for documentation and examples.
  134. def replace_all(string, replacement)
  135. Erlang.re.replace(string, @compiled, replacement, [{'return,'binary},'global])
  136. end
  137. private
  138. def parse_option($u, acc); ['unicode|acc]; end
  139. def parse_option($i, acc); ['caseless|acc]; end
  140. def parse_option($x, acc); ['extended|acc]; end
  141. def parse_option($f, acc); ['firstline|acc]; end
  142. def parse_option($r, acc); ['ungreedy|acc]; end
  143. def parse_option($m, acc); ['dotall, {'newline, 'anycrlf}|acc]; end
  144. def parse_option(option, _)
  145. error({'badarg, "unknown option \"#{option.chr}\""})
  146. end
  147. end
  148. end