PageRenderTime 54ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/CS/tags/V1.4.0rc1/include/csutil/regexp.h

#
C Header | 226 lines | 67 code | 17 blank | 142 comment | 2 complexity | 0cb0f59b6aa40001efaebcadd6319b5c MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, LGPL-2.0
  1. /*
  2. Copyright (C) 2004 by Frank Richter
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Library General Public
  5. License as published by the Free Software Foundation; either
  6. version 2 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Library General Public License for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this library; if not, write to the Free
  13. Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #ifndef __CS_UTIL_REGEXP_H__
  16. #define __CS_UTIL_REGEXP_H__
  17. /**\file
  18. * Regular expressions support.
  19. */
  20. #include "csextern.h"
  21. #include "csutil/array.h"
  22. // Include system's (if available) ...
  23. #ifdef CS_HAVE_REGEX
  24. #include <regex.h>
  25. #else
  26. // ... resp. the one included with CS.
  27. #if (defined(CS_COMPILER_MSVC) || defined(CS_COMPILER_BCC)) && \
  28. !defined(__STDC__)
  29. #define __STDC__ 1
  30. #define __STDC__DEFINED
  31. #endif
  32. #include "generic/regex.h"
  33. #ifdef __STDC__DEFINED
  34. #undef __STDC__
  35. #endif
  36. #endif
  37. /**
  38. * Possible errors that can occur during matching.
  39. */
  40. enum csRegExpMatchError
  41. {
  42. /// No problems during matching.
  43. csrxNoError,
  44. /// The pattern didn't match the string.
  45. csrxNoMatch,
  46. /**
  47. * There was an invalid \c \\{...\\} construct in the regular expression.
  48. * A valid \c \\{...\\} construct must contain either a single number, or
  49. * two numbers in increasing order separated by a comma.
  50. */
  51. csrxBadBraces,
  52. /**
  53. * There was a syntax error in the regular expression.
  54. */
  55. csrxBadPattern,
  56. /**
  57. * A repetition operator such as ? or * appeared in a bad position (with no
  58. * preceding subexpression to act on).
  59. */
  60. csrxBadRepetition,
  61. /**
  62. * The regular expression referred to an invalid collating element (one not
  63. * defined in the current locale for string collation).
  64. */
  65. csrxErrCollate,
  66. /**
  67. * The regular expression referred to an invalid character class name.
  68. */
  69. csrxErrCharType,
  70. /**
  71. * The regular expression ended with \c \\.
  72. */
  73. csrxErrEscape,
  74. /**
  75. * There was an invalid number in the \c \\digit construct.
  76. */
  77. csrxErrSubReg,
  78. /**
  79. * There were unbalanced square brackets in the regular expression.
  80. */
  81. csrxErrBrackets,
  82. /**
  83. * An extended regular expression had unbalanced parentheses, or a basic
  84. * regular expression had unbalanced \c \\( and \c \\).
  85. */
  86. csrxErrParentheses,
  87. /**
  88. * The regular expression had unbalanced \c \\{ and \c \\}.
  89. */
  90. csrxErrBraces,
  91. /**
  92. * One of the endpoints in a range expression was invalid.
  93. */
  94. csrxErrRange,
  95. /**
  96. * Out of memory.
  97. */
  98. csrxErrSpace,
  99. /**
  100. * Unknown error.
  101. */
  102. csrxErrUnknown
  103. };
  104. /**
  105. * Flags for regular expression matching
  106. */
  107. enum csRegExpMatchFlags
  108. {
  109. /**
  110. * Ignore case when matching letters.
  111. */
  112. csrxIgnoreCase = 1,
  113. /**
  114. * Treat a newline in string as dividing string into multiple lines, so that
  115. * \c $ can match before the newline and \c ^ can match after. Also, don't
  116. * permit \c . to match a newline, and don't permit \c [^...] to match a
  117. * newline.
  118. *
  119. * Otherwise, newline acts like any other ordinary character.
  120. */
  121. csrxNewLine = 2,
  122. /**
  123. * Do not regard the beginning of the specified string as the beginning of a
  124. * line; more generally, don't make any assumptions about what text might
  125. * precede it.
  126. */
  127. csrxNotBOL = 4,
  128. /**
  129. * Do not regard the end of the specified string as the end of a line; more
  130. * generally, don't make any assumptions about what text might follow it.
  131. */
  132. csrxNotEOL = 8
  133. };
  134. /**
  135. * Information about (sub)expression matches.
  136. */
  137. struct CS_CRYSTALSPACE_EXPORT csRegExpMatch
  138. {
  139. /// Offset to the match from the beginning of the string to match against.
  140. size_t startOffset;
  141. /**
  142. * Offset to the end of the match from the beginning of the string to match
  143. * against.
  144. */
  145. size_t endOffset;
  146. };
  147. /**
  148. * Matcher for regular expressions.
  149. * \note Implementation note: if the platform supports it, this class wraps
  150. * the runtime libraries POSIX.2 regular expression interface; otherwise,
  151. * it uses a built-in implementation of POSIX.2 regular expression (borrowed
  152. * from glibc.) Both of them utiltize "compilation" of pattern for faster
  153. * repeated matching of the same pattern. Although compilation is transparent,
  154. * you should be aware that calls to Match() with different flags than
  155. * previous calls cause a recompilation of the pattern. Thus, to improve
  156. * performance, same \p flags should be used for subsequent calls to Match().
  157. */
  158. class CS_CRYSTALSPACE_EXPORT csRegExpMatcher :
  159. public CS::Memory::CustomAllocated
  160. {
  161. regex_t regex;
  162. char* pattern;
  163. int compiledFlags;
  164. bool regexpSetup : 1;
  165. bool extendedRE : 1;
  166. csRegExpMatchError compileError : 30;
  167. bool Compile (int flags, bool nosub);
  168. public:
  169. /**
  170. * Create a new RE matcher.
  171. * \param pattern Pattern to match against.
  172. * \param extendedRE Treat the pattern as an extended regular expression,
  173. * rather than as a basic regular expression.
  174. */
  175. csRegExpMatcher (const char* pattern, bool extendedRE = false);
  176. /// Copy constructor
  177. csRegExpMatcher (const csRegExpMatcher& other);
  178. /// Destructor.
  179. ~csRegExpMatcher ();
  180. /// Assignment operator
  181. csRegExpMatcher& operator= (const csRegExpMatcher &other);
  182. /**
  183. * Match a string against the pattern.
  184. * \param string String against which to attempt match.
  185. * \param flags One or more of csRegExpMatchFlags. Flags are combined using
  186. * the bitwise-or \p | operator.
  187. * \return csrxNoError in case of success, else an error code.
  188. * \note Also check the "Implementation note" in the csRegExpMatcher
  189. * description.
  190. */
  191. csRegExpMatchError Match (const char* string, int flags = 0);
  192. /**
  193. * Match a string against the pattern.
  194. * \param string String against which to attempt match.
  195. * \param matches Array receiving the locations of individual
  196. * (sub)expression matches.
  197. * \param flags One or more of csRegExpMatchFlags. Flags are combined using
  198. * the bitwise-or \p | operator.
  199. * \return csrxNoError in case of success, else an error code.
  200. * \note Also check the "Implementation note" in the csRegExpMatcher
  201. * description.
  202. */
  203. csRegExpMatchError Match (const char* string,
  204. csArray<csRegExpMatch>& matches, int flags = 0);
  205. };
  206. #endif // __CS_UTIL_REGEXP_H__