PageRenderTime 89ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/CS/migrated/tags/PRE_CSARRAY_SIZE_T_INDICES/include/csutil/regexp.h

#
C Header | 195 lines | 50 code | 13 blank | 132 comment | 0 complexity | 82b44bf62e86e15be556f604e401bc60 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, LGPL-2.0
  1. /*
  2. Copyright (C) 2004 by Frank Richter
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Library General Public
  5. License as published by the Free Software Foundation; either
  6. version 2 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Library General Public License for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this library; if not, write to the Free
  13. Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #ifndef __CS_UTIL_REGEXP_H__
  16. #define __CS_UTIL_REGEXP_H__
  17. /**\file
  18. * Regular expressions support.
  19. */
  20. #include "csextern.h"
  21. #include "csutil/array.h"
  22. /**
  23. * Possible errors that can occur during matching.
  24. */
  25. enum csRegExpMatchError
  26. {
  27. /// No problems during matching.
  28. NoError,
  29. /// The pattern didn't match the string.
  30. NoMatch,
  31. /**
  32. * There was an invalid \c \\{...\\} construct in the regular expression.
  33. * A valid \c \\{...\\} construct must contain either a single number, or
  34. * two numbers in increasing order separated by a comma.
  35. */
  36. BadBraces,
  37. /**
  38. * There was a syntax error in the regular expression.
  39. */
  40. BadPattern,
  41. /**
  42. * A repetition operator such as ? or * appeared in a bad position (with no
  43. * preceding subexpression to act on).
  44. */
  45. BadRepetition,
  46. /**
  47. * The regular expression referred to an invalid collating element (one not
  48. * defined in the current locale for string collation).
  49. */
  50. ErrCollate,
  51. /**
  52. * The regular expression referred to an invalid character class name.
  53. */
  54. ErrCharType,
  55. /**
  56. * The regular expression ended with \c \\.
  57. */
  58. ErrEscape,
  59. /**
  60. * There was an invalid number in the \c \\digit construct.
  61. */
  62. ErrSubReg,
  63. /**
  64. * There were unbalanced square brackets in the regular expression.
  65. */
  66. ErrBrackets,
  67. /**
  68. * An extended regular expression had unbalanced parentheses, or a basic
  69. * regular expression had unbalanced \c \\( and \c \\).
  70. */
  71. ErrParentheses,
  72. /**
  73. * The regular expression had unbalanced \c \\{ and \c \\}.
  74. */
  75. ErrBraces,
  76. /**
  77. * One of the endpoints in a range expression was invalid.
  78. */
  79. ErrRange,
  80. /**
  81. * Out of memory.
  82. */
  83. ErrSpace,
  84. /**
  85. * Unknown error.
  86. */
  87. ErrUnknown
  88. };
  89. /**
  90. * Flags for regular expression matching
  91. */
  92. enum csRegExpMatchFlags
  93. {
  94. /**
  95. * Ignore case when matching letters.
  96. */
  97. IgnoreCase = 1,
  98. /**
  99. * Treat a newline in string as dividing string into multiple lines, so that
  100. * \c $ can match before the newline and \c ^ can match after. Also, don't
  101. * permit \c . to match a newline, and don't permit \c [^...] to match a
  102. * newline.
  103. *
  104. * Otherwise, newline acts like any other ordinary character.
  105. */
  106. NewLine = 2,
  107. /**
  108. * Do not regard the beginning of the specified string as the beginning of a
  109. * line; more generally, don't make any assumptions about what text might
  110. * precede it.
  111. */
  112. NotBOL = 4,
  113. /**
  114. * Do not regard the end of the specified string as the end of a line; more
  115. * generally, don't make any assumptions about what text might follow it.
  116. */
  117. NotEOL = 8
  118. };
  119. /**
  120. * Information about (sub)expression matches.
  121. */
  122. struct CS_CSUTIL_EXPORT csRegExpMatch
  123. {
  124. /// Offset to the match from the beginning of the string to match against.
  125. size_t startOffset;
  126. /**
  127. * Offset to the end of the match from the beginning of the string to match
  128. * against.
  129. */
  130. size_t endOffset;
  131. };
  132. /**
  133. * Matcher for regular expressions.
  134. * \note Implementation note: if the platform supports it, this class wraps
  135. * the runtime libraries POSIX.2 regular expression interface; otherwise,
  136. * it uses a built-in implementation of POSIX.2 regular expression (borrowed
  137. * from glibc.) Both of them utiltize "compilation" of pattern for faster
  138. * repeated matching of the same pattern. Although compilation is transparent,
  139. * you should be aware that calls to Match() with different flags than
  140. * previous calls cause a recompilation of the pattern. Thus, to improve
  141. * performance, same \p flags should be used for subsequent calls to Match().
  142. */
  143. class CS_CSUTIL_EXPORT csRegExpMatcher
  144. {
  145. void* regex;
  146. char* pattern;
  147. int compiledFlags;
  148. csRegExpMatchError compileError;
  149. bool extendedRE;
  150. bool Compile (int flags, bool nosub);
  151. public:
  152. /**
  153. * Create a new RE matcher.
  154. * \param pattern Pattern to match against.
  155. * \param extendedRE Treat the pattern as an extended regular expression,
  156. * rather than as a basic regular expression.
  157. */
  158. csRegExpMatcher (const char* pattern, bool extendedRE = false);
  159. /// Destructor.
  160. ~csRegExpMatcher ();
  161. /**
  162. * Match a string against the pattern.
  163. * \param string String against which to attempt match.
  164. * \param flags One or more of csRegExpMatchFlags. Flags are combined using
  165. * the bitwise-or \p | operator.
  166. * \return NoError in case of success, else an error code.
  167. * \note Also check the "Implementation note" in the csRegExpMatcher
  168. * description.
  169. */
  170. csRegExpMatchError Match (const char* string, int flags = 0);
  171. /**
  172. * \copydoc Match()
  173. * \param matches Array containing the locations of individual
  174. * (sub)expression matches.
  175. */
  176. csRegExpMatchError Match (const char* string,
  177. csArray<csRegExpMatch>& matches, int flags = 0);
  178. };
  179. #endif // __CS_UTIL_REGEXP_H__