PageRenderTime 43ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/R2006-03-17/octave-forge/main/strings/regexp.cc

#
C++ | 139 lines | 99 code | 15 blank | 25 comment | 19 complexity | 3fb786a20640f3426bbce7229b28134e MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Author: Paul Kienzle <pkienzle@users.sf.net>
  3. This program is granted to the public domain
  4. 2002-01-15 Paul Kienzle <pkienzle@users.sf.net>
  5. * Initial revision
  6. */
  7. #include <octave/oct.h>
  8. #ifndef OCTAVE_LOCAL_BUFFER
  9. #include <vector>
  10. #define OCTAVE_LOCAL_BUFFER(T, buf, size) \
  11. std::vector<T> buf ## _vector (size); \
  12. T *buf = &(buf ## _vector[0])
  13. #endif
  14. #ifdef __MINGW32__
  15. #define __restrict
  16. #endif
  17. #include <regex.h>
  18. DEFUN_DLD(regexp,args,nargout,"\
  19. Regular expression string matching.\n\
  20. match = regexp(pattern,string)\n\
  21. Returns the start and end indices of the matching substring, or the\n\
  22. or the empty matrix if there is none. There is one additional row\n\
  23. for each set of parentheses in the pattern indicating the start and\n\
  24. end index for the first match for that subexpression. So the\n\
  25. expression string(match(2,1):match(2,2)) will return the string\n\
  26. matched by the first set of parentheses. Note that parentheses\n\
  27. within ( exp1 | exp2 | ... ) are not counted.\n\
  28. [match s1 s2 ...] = regexp(pattern,string)\n\
  29. Returns the matching substrings in s1, s2, etc.\n\
  30. If there is no match then empty strings are returned.\n\
  31. \n\
  32. Uses the POSIX extended matching routine regcomp. See your local manpages\n\
  33. for details.\n\
  34. ")
  35. {
  36. octave_value_list retval;
  37. int nargin = args.length();
  38. if (nargin != 2)
  39. {
  40. print_usage("regexp");
  41. return retval;
  42. }
  43. std::string pattern = args(0).string_value ();
  44. if (error_state)
  45. {
  46. gripe_wrong_type_arg ("regexp", args(0));
  47. return retval;
  48. }
  49. std::string buffer = args(1).string_value ();
  50. if (error_state)
  51. {
  52. gripe_wrong_type_arg ("regexp", args(1));
  53. return retval;
  54. }
  55. regex_t compiled;
  56. int err=regcomp(&compiled, pattern.c_str(), REG_EXTENDED);
  57. if (err)
  58. {
  59. int len = regerror(err, &compiled, NULL, 0);
  60. char *errmsg = (char *)malloc(len);
  61. if (errmsg)
  62. {
  63. regerror(err, &compiled, errmsg, len);
  64. error("regexp: %s in pattern (%s)", errmsg, pattern.c_str());
  65. free(errmsg);
  66. }
  67. else
  68. {
  69. error("out of memory");
  70. }
  71. regfree(&compiled);
  72. return retval;
  73. }
  74. // allocate space for the matches
  75. int subexpr = 1;
  76. for (unsigned int i=0; i < pattern.length(); i++)
  77. {
  78. subexpr += ( pattern[i] == '(' ? 1 : 0 );
  79. }
  80. OCTAVE_LOCAL_BUFFER (regmatch_t, match, subexpr );
  81. // do the match
  82. if (regexec(&compiled, buffer.c_str(), subexpr, match, 0)==0)
  83. {
  84. // Count actual matches (this may be less than the number of
  85. // parentheses if there are parentheses inside of ( ... | ... )
  86. int matches = 0;
  87. while (matches < subexpr && match[matches].rm_so >= 0) matches++;
  88. if (nargout > matches)
  89. {
  90. error("regexp: too many return values requested");
  91. return retval;
  92. }
  93. // Allocate space for the return values
  94. if (nargout == 0) nargout = 1;
  95. retval.resize(nargout);
  96. // Copy the match indices to retval(0)
  97. Matrix indices(matches,2);
  98. for (int i=0 ; i < subexpr && match[i].rm_so >= 0; i++)
  99. indices(i,0) = match[i].rm_so+1, indices(i,1) = match[i].rm_eo;
  100. retval(0) = indices;
  101. // Copy the substrings to the output arguments
  102. for (int i=1 ; i < nargout && match[i].rm_so >= 0; i++)
  103. retval(i) = buffer.substr(match[i].rm_so,match[i].rm_eo-match[i].rm_so);
  104. }
  105. else
  106. {
  107. for (int i=nargout-1; i > 0; i--) retval(i) = "";
  108. retval(0) = Matrix(0,0);
  109. }
  110. regfree(&compiled);
  111. return retval;
  112. }
  113. /*
  114. %!assert(regexp("f(.*)uck"," firetruck "),[2,10;3,7]);
  115. %!test
  116. %! [m,b]=regexp("f(.*)uck"," firetruck ");
  117. %! assert(m,[2,10;3,7]);
  118. %! assert(b, "iretr");
  119. */