PageRenderTime 69ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/R2003-06-01/octave-forge/main/strings/regexp.cc

#
C++ | 128 lines | 96 code | 16 blank | 16 comment | 19 complexity | 9d851ddb5d284ba612bf706c6660c140 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Author: Paul Kienzle <pkienzle@users.sf.net>
  3. This program is granted to the public domain
  4. 2002-01-15 Paul Kienzle <pkienzle@users.sf.net>
  5. * Initial revision
  6. */
  7. #include <octave/oct.h>
  8. #include <memory>
  9. #ifndef OCTAVE_LOCAL_BUFFER
  10. #define OCTAVE_LOCAL_BUFFER(T, buf, size) \
  11. std::auto_ptr<T> buf ## _auto_ptr (new T [size]); \
  12. T *buf = buf ## _auto_ptr.get ()
  13. #endif
  14. #include <regex.h>
  15. DEFUN_DLD(regexp,args,nargout,"\
  16. Regular expression string matching.\n\
  17. match = regexp(pattern,string)\n\
  18. Returns the start and end indices of the matching substring, or the\n\
  19. or the empty matrix if there is none. There is one additional row\n\
  20. for each set of parentheses in the pattern indicating the start and\n\
  21. end index for the first match for that subexpression. So the\n\
  22. expression string(match(2,1):match(2,2)) will return the string\n\
  23. matched by the first set of parentheses. Note that parentheses\n\
  24. within ( exp1 | exp2 | ... ) are not counted.\n\
  25. [match s1 s2 ...] = regexp(pattern,string)\n\
  26. Returns the matching substrings in s1, s2, etc.\n\
  27. If there is no match then empty strings are returned.\n\
  28. \n\
  29. Uses the POSIX extended matching routine regcomp. See your local manpages\n\
  30. for details.\n\
  31. ")
  32. {
  33. octave_value_list retval;
  34. int nargin = args.length();
  35. if (nargin != 2)
  36. {
  37. print_usage("regexp");
  38. return retval;
  39. }
  40. std::string pattern = args(0).string_value ();
  41. if (error_state)
  42. {
  43. gripe_wrong_type_arg ("regexp", args(0));
  44. return retval;
  45. }
  46. std::string buffer = args(1).string_value ();
  47. if (error_state)
  48. {
  49. gripe_wrong_type_arg ("regexp", args(1));
  50. return retval;
  51. }
  52. regex_t compiled;
  53. int err=regcomp(&compiled, pattern.c_str(), REG_EXTENDED);
  54. if (err)
  55. {
  56. int len = regerror(err, &compiled, NULL, 0);
  57. char *errmsg = (char *)malloc(len);
  58. if (errmsg)
  59. {
  60. regerror(err, &compiled, errmsg, len);
  61. error("regexp: %s in pattern (%s)", errmsg, pattern.c_str());
  62. free(errmsg);
  63. }
  64. else
  65. {
  66. error("out of memory");
  67. }
  68. regfree(&compiled);
  69. return retval;
  70. }
  71. // allocate space for the matches
  72. int subexpr = 1;
  73. for (unsigned int i=0; i < pattern.length(); i++)
  74. {
  75. subexpr += ( pattern[i] == '(' ? 1 : 0 );
  76. }
  77. OCTAVE_LOCAL_BUFFER (regmatch_t, match, subexpr );
  78. // do the match
  79. if (regexec(&compiled, buffer.c_str(), subexpr, match, 0)==0)
  80. {
  81. // Count actual matches (this may be less than the number of
  82. // parentheses if there are parentheses inside of ( ... | ... )
  83. int matches = 0;
  84. while (matches < subexpr && match[matches].rm_so >= 0) matches++;
  85. if (nargout > matches)
  86. {
  87. error("regexp: too many return values requested");
  88. return retval;
  89. }
  90. // Allocate space for the return values
  91. if (nargout == 0) nargout = 1;
  92. retval.resize(nargout);
  93. // Copy the match indices to retval(0)
  94. Matrix indices(matches,2);
  95. for (int i=0 ; i < subexpr && match[i].rm_so >= 0; i++)
  96. indices(i,0) = match[i].rm_so+1, indices(i,1) = match[i].rm_eo;
  97. retval(0) = indices;
  98. // Copy the substrings to the output arguments
  99. for (int i=1 ; i < nargout && match[i].rm_so >= 0; i++)
  100. retval(i) = buffer.substr(match[i].rm_so,match[i].rm_eo-match[i].rm_so);
  101. }
  102. else
  103. {
  104. for (int i=nargout-1; i > 0; i--) retval(i) = "";
  105. retval(0) = Matrix(0,0);
  106. }
  107. regfree(&compiled);
  108. return retval;
  109. }