PageRenderTime 57ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/R2002-04-20/octave-forge/main/strings/regexp.cc

#
C++ | 119 lines | 90 code | 13 blank | 16 comment | 19 complexity | cf7c880a24c3abd60f0f60bd2c11eb93 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Author: Paul Kienzle <pkienzle@users.sf.net>
  3. This program is granted to the public domain
  4. 2002-01-15 Paul Kienzle <pkienzle@users.sf.net>
  5. * Initial revision
  6. */
  7. #include <octave/oct.h>
  8. #include <regex.h>
  9. DEFUN_DLD(regexp,args,nargout,"\
  10. Regular expression string matching.\n\
  11. match = regexp(pattern,string)\n\
  12. Returns the start and end indices of the matching substring, or the\n\
  13. or the empty matrix if there is none. There is one additional row\n\
  14. for each set of parentheses in the pattern indicating the start and\n\
  15. end index for the first match for that subexpression. So the\n\
  16. expression string(match(2,1):match(2,2)) will return the string\n\
  17. matched by the first set of parentheses. Note that parentheses\n\
  18. within ( exp1 | exp2 | ... ) are not counted.\n\
  19. [match s1 s2 ...] = regexp(pattern,string)\n\
  20. Returns the matching substrings in s1, s2, etc.\n\
  21. If there is no match then empty strings are returned.\n\
  22. \n\
  23. Uses the POSIX extended matching routine regcomp. See your local manpages\n\
  24. for details.\n\
  25. ")
  26. {
  27. octave_value_list retval;
  28. int nargin = args.length();
  29. if (nargin != 2)
  30. {
  31. print_usage("regexp");
  32. return retval;
  33. }
  34. std::string pattern = args(0).string_value ();
  35. if (error_state)
  36. {
  37. gripe_wrong_type_arg ("regexp", args(0));
  38. return retval;
  39. }
  40. std::string buffer = args(1).string_value ();
  41. if (error_state)
  42. {
  43. gripe_wrong_type_arg ("regexp", args(1));
  44. return retval;
  45. }
  46. regex_t compiled;
  47. int err=regcomp(&compiled, pattern.c_str(), REG_EXTENDED);
  48. if (err)
  49. {
  50. int len = regerror(err, &compiled, NULL, 0);
  51. char *errmsg = (char *)malloc(len);
  52. if (errmsg)
  53. {
  54. regerror(err, &compiled, errmsg, len);
  55. error("regexp: %s in pattern (%s)", errmsg, pattern.c_str());
  56. free(errmsg);
  57. }
  58. else
  59. {
  60. error("out of memory");
  61. }
  62. regfree(&compiled);
  63. return retval;
  64. }
  65. // allocate space for the matches
  66. int subexpr = 1;
  67. for (unsigned int i=0; i < pattern.length(); i++)
  68. {
  69. subexpr += ( pattern[i] == '(' ? 1 : 0 );
  70. }
  71. regmatch_t match[subexpr];
  72. // do the match
  73. if (regexec(&compiled, buffer.c_str(), subexpr, match, 0)==0)
  74. {
  75. // Count actual matches (this may be less than the number of
  76. // parentheses if there are parentheses inside of ( ... | ... )
  77. int matches = 0;
  78. while (matches < subexpr && match[matches].rm_so >= 0) matches++;
  79. if (nargout > matches)
  80. {
  81. error("regexp: too many return values requested");
  82. return retval;
  83. }
  84. // Allocate space for the return values
  85. if (nargout == 0) nargout = 1;
  86. retval.resize(nargout);
  87. // Copy the match indices to retval(0)
  88. Matrix indices(matches,2);
  89. for (int i=0 ; i < subexpr && match[i].rm_so >= 0; i++)
  90. indices(i,0) = match[i].rm_so+1, indices(i,1) = match[i].rm_eo;
  91. retval(0) = indices;
  92. // Copy the substrings to the output arguments
  93. for (int i=1 ; i < nargout && match[i].rm_so >= 0; i++)
  94. retval(i) = buffer.substr(match[i].rm_so,match[i].rm_eo-match[i].rm_so);
  95. }
  96. else
  97. {
  98. for (int i=nargout-1; i > 0; i--) retval(i) = "";
  99. retval(0) = Matrix(0,0);
  100. }
  101. regfree(&compiled);
  102. return retval;
  103. }