PageRenderTime 54ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/CS/tags/V1.2rc2/include/csutil/regexp.h

#
C Header | 207 lines | 52 code | 15 blank | 140 comment | 0 complexity | 5ba34e0c83f7df0bce698555c21e1a74 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, LGPL-2.0
  1. /*
  2. Copyright (C) 2004 by Frank Richter
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Library General Public
  5. License as published by the Free Software Foundation; either
  6. version 2 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Library General Public License for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this library; if not, write to the Free
  13. Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #ifndef __CS_UTIL_REGEXP_H__
  16. #define __CS_UTIL_REGEXP_H__
  17. /**\file
  18. * Regular expressions support.
  19. */
  20. #include "csextern.h"
  21. #include "csutil/array.h"
  22. /**
  23. * Possible errors that can occur during matching.
  24. */
  25. enum csRegExpMatchError
  26. {
  27. /// No problems during matching.
  28. csrxNoError,
  29. /// The pattern didn't match the string.
  30. csrxNoMatch,
  31. /**
  32. * There was an invalid \c \\{...\\} construct in the regular expression.
  33. * A valid \c \\{...\\} construct must contain either a single number, or
  34. * two numbers in increasing order separated by a comma.
  35. */
  36. csrxBadBraces,
  37. /**
  38. * There was a syntax error in the regular expression.
  39. */
  40. csrxBadPattern,
  41. /**
  42. * A repetition operator such as ? or * appeared in a bad position (with no
  43. * preceding subexpression to act on).
  44. */
  45. csrxBadRepetition,
  46. /**
  47. * The regular expression referred to an invalid collating element (one not
  48. * defined in the current locale for string collation).
  49. */
  50. csrxErrCollate,
  51. /**
  52. * The regular expression referred to an invalid character class name.
  53. */
  54. csrxErrCharType,
  55. /**
  56. * The regular expression ended with \c \\.
  57. */
  58. csrxErrEscape,
  59. /**
  60. * There was an invalid number in the \c \\digit construct.
  61. */
  62. csrxErrSubReg,
  63. /**
  64. * There were unbalanced square brackets in the regular expression.
  65. */
  66. csrxErrBrackets,
  67. /**
  68. * An extended regular expression had unbalanced parentheses, or a basic
  69. * regular expression had unbalanced \c \\( and \c \\).
  70. */
  71. csrxErrParentheses,
  72. /**
  73. * The regular expression had unbalanced \c \\{ and \c \\}.
  74. */
  75. csrxErrBraces,
  76. /**
  77. * One of the endpoints in a range expression was invalid.
  78. */
  79. csrxErrRange,
  80. /**
  81. * Out of memory.
  82. */
  83. csrxErrSpace,
  84. /**
  85. * Unknown error.
  86. */
  87. csrxErrUnknown
  88. };
  89. /**
  90. * Flags for regular expression matching
  91. */
  92. enum csRegExpMatchFlags
  93. {
  94. /**
  95. * Ignore case when matching letters.
  96. */
  97. csrxIgnoreCase = 1,
  98. /**
  99. * Treat a newline in string as dividing string into multiple lines, so that
  100. * \c $ can match before the newline and \c ^ can match after. Also, don't
  101. * permit \c . to match a newline, and don't permit \c [^...] to match a
  102. * newline.
  103. *
  104. * Otherwise, newline acts like any other ordinary character.
  105. */
  106. csrxNewLine = 2,
  107. /**
  108. * Do not regard the beginning of the specified string as the beginning of a
  109. * line; more generally, don't make any assumptions about what text might
  110. * precede it.
  111. */
  112. csrxNotBOL = 4,
  113. /**
  114. * Do not regard the end of the specified string as the end of a line; more
  115. * generally, don't make any assumptions about what text might follow it.
  116. */
  117. csrxNotEOL = 8
  118. };
  119. /**
  120. * Information about (sub)expression matches.
  121. */
  122. struct CS_CRYSTALSPACE_EXPORT csRegExpMatch
  123. {
  124. /// Offset to the match from the beginning of the string to match against.
  125. size_t startOffset;
  126. /**
  127. * Offset to the end of the match from the beginning of the string to match
  128. * against.
  129. */
  130. size_t endOffset;
  131. };
  132. /**
  133. * Matcher for regular expressions.
  134. * \note Implementation note: if the platform supports it, this class wraps
  135. * the runtime libraries POSIX.2 regular expression interface; otherwise,
  136. * it uses a built-in implementation of POSIX.2 regular expression (borrowed
  137. * from glibc.) Both of them utiltize "compilation" of pattern for faster
  138. * repeated matching of the same pattern. Although compilation is transparent,
  139. * you should be aware that calls to Match() with different flags than
  140. * previous calls cause a recompilation of the pattern. Thus, to improve
  141. * performance, same \p flags should be used for subsequent calls to Match().
  142. */
  143. class CS_CRYSTALSPACE_EXPORT csRegExpMatcher
  144. {
  145. void* regex;
  146. char* pattern;
  147. int compiledFlags;
  148. csRegExpMatchError compileError;
  149. bool extendedRE;
  150. bool Compile (int flags, bool nosub);
  151. public:
  152. /**
  153. * Create a new RE matcher.
  154. * \param pattern Pattern to match against.
  155. * \param extendedRE Treat the pattern as an extended regular expression,
  156. * rather than as a basic regular expression.
  157. */
  158. csRegExpMatcher (const char* pattern, bool extendedRE = false);
  159. /// Copy constructor
  160. csRegExpMatcher (const csRegExpMatcher& other);
  161. /// Destructor.
  162. ~csRegExpMatcher ();
  163. /// Assignment operator
  164. csRegExpMatcher& operator= (const csRegExpMatcher &other);
  165. /**
  166. * Match a string against the pattern.
  167. * \param string String against which to attempt match.
  168. * \param flags One or more of csRegExpMatchFlags. Flags are combined using
  169. * the bitwise-or \p | operator.
  170. * \return csrxNoError in case of success, else an error code.
  171. * \note Also check the "Implementation note" in the csRegExpMatcher
  172. * description.
  173. */
  174. csRegExpMatchError Match (const char* string, int flags = 0);
  175. /**
  176. * Match a string against the pattern.
  177. * \param string String against which to attempt match.
  178. * \param matches Array receiving the locations of individual
  179. * (sub)expression matches.
  180. * \param flags One or more of csRegExpMatchFlags. Flags are combined using
  181. * the bitwise-or \p | operator.
  182. * \return csrxNoError in case of success, else an error code.
  183. * \note Also check the "Implementation note" in the csRegExpMatcher
  184. * description.
  185. */
  186. csRegExpMatchError Match (const char* string,
  187. csArray<csRegExpMatch>& matches, int flags = 0);
  188. };
  189. #endif // __CS_UTIL_REGEXP_H__