PageRenderTime 43ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/R2004-02-12/octave-forge/main/strings/regexp.cc

#
C++ | 138 lines | 96 code | 17 blank | 25 comment | 19 complexity | 9db5b8dab4bf67ec2a512906f168bb79 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Author: Paul Kienzle <pkienzle@users.sf.net>
  3. This program is granted to the public domain
  4. 2002-01-15 Paul Kienzle <pkienzle@users.sf.net>
  5. * Initial revision
  6. */
  7. #include <octave/oct.h>
  8. #ifndef OCTAVE_LOCAL_BUFFER
  9. #include <vector>
  10. #define OCTAVE_LOCAL_BUFFER(T, buf, size) \
  11. std::vector<T> buf ## _vector (size); \
  12. T *buf = &(buf ## _vector[0])
  13. #endif
  14. #include <regex.h>
  15. DEFUN_DLD(regexp,args,nargout,"\
  16. Regular expression string matching.\n\
  17. match = regexp(pattern,string)\n\
  18. Returns the start and end indices of the matching substring, or the\n\
  19. or the empty matrix if there is none. There is one additional row\n\
  20. for each set of parentheses in the pattern indicating the start and\n\
  21. end index for the first match for that subexpression. So the\n\
  22. expression string(match(2,1):match(2,2)) will return the string\n\
  23. matched by the first set of parentheses. Note that parentheses\n\
  24. within ( exp1 | exp2 | ... ) are not counted.\n\
  25. [match s1 s2 ...] = regexp(pattern,string)\n\
  26. Returns the matching substrings in s1, s2, etc.\n\
  27. If there is no match then empty strings are returned.\n\
  28. \n\
  29. Uses the POSIX extended matching routine regcomp. See your local manpages\n\
  30. for details.\n\
  31. ")
  32. {
  33. octave_value_list retval;
  34. int nargin = args.length();
  35. if (nargin != 2)
  36. {
  37. print_usage("regexp");
  38. return retval;
  39. }
  40. std::string pattern = args(0).string_value ();
  41. if (error_state)
  42. {
  43. gripe_wrong_type_arg ("regexp", args(0));
  44. return retval;
  45. }
  46. std::string buffer = args(1).string_value ();
  47. if (error_state)
  48. {
  49. gripe_wrong_type_arg ("regexp", args(1));
  50. return retval;
  51. }
  52. regex_t compiled;
  53. int err=regcomp(&compiled, pattern.c_str(), REG_EXTENDED);
  54. if (err)
  55. {
  56. int len = regerror(err, &compiled, NULL, 0);
  57. char *errmsg = (char *)malloc(len);
  58. if (errmsg)
  59. {
  60. regerror(err, &compiled, errmsg, len);
  61. error("regexp: %s in pattern (%s)", errmsg, pattern.c_str());
  62. free(errmsg);
  63. }
  64. else
  65. {
  66. error("out of memory");
  67. }
  68. regfree(&compiled);
  69. return retval;
  70. }
  71. // allocate space for the matches
  72. int subexpr = 1;
  73. for (unsigned int i=0; i < pattern.length(); i++)
  74. {
  75. subexpr += ( pattern[i] == '(' ? 1 : 0 );
  76. }
  77. OCTAVE_LOCAL_BUFFER (regmatch_t, match, subexpr );
  78. // do the match
  79. if (regexec(&compiled, buffer.c_str(), subexpr, match, 0)==0)
  80. {
  81. // Count actual matches (this may be less than the number of
  82. // parentheses if there are parentheses inside of ( ... | ... )
  83. int matches = 0;
  84. while (matches < subexpr && match[matches].rm_so >= 0) matches++;
  85. if (nargout > matches)
  86. {
  87. error("regexp: too many return values requested");
  88. return retval;
  89. }
  90. // Allocate space for the return values
  91. if (nargout == 0) nargout = 1;
  92. retval.resize(nargout);
  93. // Copy the match indices to retval(0)
  94. Matrix indices(matches,2);
  95. for (int i=0 ; i < subexpr && match[i].rm_so >= 0; i++)
  96. indices(i,0) = match[i].rm_so+1, indices(i,1) = match[i].rm_eo;
  97. retval(0) = indices;
  98. // Copy the substrings to the output arguments
  99. for (int i=1 ; i < nargout && match[i].rm_so >= 0; i++)
  100. retval(i) = buffer.substr(match[i].rm_so,match[i].rm_eo-match[i].rm_so);
  101. }
  102. else
  103. {
  104. for (int i=nargout-1; i > 0; i--) retval(i) = "";
  105. retval(0) = Matrix(0,0);
  106. }
  107. regfree(&compiled);
  108. return retval;
  109. }
  110. /*
  111. %!assert(regexp("f(.*)uck"," firetruck "),[2,10;3,7]);
  112. %!test
  113. %! [m,b]=regexp("f(.*)uck"," firetruck ");
  114. %! assert(m,[2,10;3,7]);
  115. %! assert(b, "iretr");
  116. */