PageRenderTime 44ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/src/matcher.cpp

http://github.com/akrennmair/newsbeuter
C++ | 206 lines | 155 code | 35 blank | 16 comment | 23 complexity | d94c3ffe224f154654e8c9bbc63d8bb6 MD5 | raw file
  1. #include <matcher.h>
  2. #include <logger.h>
  3. #include <utils.h>
  4. #include <exceptions.h>
  5. #include <sys/time.h>
  6. #include <regex.h>
  7. #include <ctime>
  8. #include <cassert>
  9. #include <sstream>
  10. #include <vector>
  11. namespace newsbeuter {
  12. matchable::matchable() { }
  13. matchable::~matchable() { }
  14. matcher::matcher() { }
  15. matcher::matcher(const std::string& expr) : exp(expr) {
  16. parse(expr);
  17. }
  18. const std::string& matcher::get_expression() {
  19. return exp;
  20. }
  21. bool matcher::parse(const std::string& expr) {
  22. struct timeval tv1, tv2;
  23. gettimeofday(&tv1, nullptr);
  24. errmsg = "";
  25. bool b = p.parse_string(expr);
  26. if (!b) {
  27. errmsg = utils::wstr2str(p.get_error());
  28. }
  29. gettimeofday(&tv2, nullptr);
  30. unsigned long diff = (((tv2.tv_sec - tv1.tv_sec) * 1000000) + tv2.tv_usec) - tv1.tv_usec;
  31. LOG(level::DEBUG, "matcher::parse: parsing `%s' took %lu µs (success = %d)", expr, diff, b ? 1 : 0);
  32. return b;
  33. }
  34. bool matcher::matches(matchable* item) {
  35. /*
  36. * with this method, every class that is derived from matchable can be
  37. * matched against a filter expression that was previously passed to the
  38. * class with the parse() method.
  39. *
  40. * This makes it easy to use the matcher virtually everywhere, since C++
  41. * allows multiple inheritance (i.e. deriving from matchable can even be
  42. * used in class hierarchies), and deriving from matchable means that you
  43. * only have to implement two methods has_attribute() and get_attribute().
  44. *
  45. * The whole matching code is speed-critical, as the matching happens on a
  46. * lot of different occassions, and slow matching can be easily measured
  47. * (and felt by the user) on slow computers with a lot of items to match.
  48. */
  49. bool retval = false;
  50. if (item) {
  51. scope_measure m1("matcher::matches");
  52. retval = matches_r(p.get_root(), item);
  53. }
  54. return retval;
  55. }
  56. bool matcher::matchop_lt(expression * e, matchable * item) {
  57. if (!item->has_attribute(e->name))
  58. throw matcherexception(matcherexception::type::ATTRIB_UNAVAIL, e->name);
  59. std::istringstream islit(e->literal);
  60. std::istringstream isatt(item->get_attribute(e->name));
  61. int ilit, iatt;
  62. islit >> ilit;
  63. isatt >> iatt;
  64. return iatt < ilit;
  65. }
  66. bool matcher::matchop_between(expression * e, matchable * item) {
  67. if (!item->has_attribute(e->name))
  68. throw matcherexception(matcherexception::type::ATTRIB_UNAVAIL, e->name);
  69. std::vector<std::string> lit = utils::tokenize(e->literal, ":");
  70. std::istringstream isatt(item->get_attribute(e->name));
  71. int att;
  72. isatt >> att;
  73. if (lit.size() < 2)
  74. return false;
  75. std::istringstream is1(lit[0]), is2(lit[1]);
  76. int i1, i2;
  77. is1 >> i1;
  78. is2 >> i2;
  79. if (i1 > i2) {
  80. int tmp = i1;
  81. i1 = i2;
  82. i2 = tmp;
  83. }
  84. return (att >= i1 && att <= i2);
  85. }
  86. bool matcher::matchop_gt(expression * e, matchable * item) {
  87. if (!item->has_attribute(e->name))
  88. throw matcherexception(matcherexception::type::ATTRIB_UNAVAIL, e->name);
  89. std::istringstream islit(e->literal);
  90. std::istringstream isatt(item->get_attribute(e->name));
  91. int ilit, iatt;
  92. islit >> ilit;
  93. isatt >> iatt;
  94. return iatt > ilit;
  95. }
  96. bool matcher::matchop_rxeq(expression * e, matchable * item) {
  97. if (!item->has_attribute(e->name))
  98. throw matcherexception(matcherexception::type::ATTRIB_UNAVAIL, e->name);
  99. if (!e->regex) {
  100. e->regex = new regex_t;
  101. int err;
  102. if ((err = regcomp(e->regex, e->literal.c_str(), REG_EXTENDED | REG_ICASE | REG_NOSUB)) != 0) {
  103. char buf[1024];
  104. regerror(err, e->regex, buf, sizeof(buf));
  105. throw matcherexception(matcherexception::type::INVALID_REGEX, e->literal, buf);
  106. }
  107. }
  108. if (regexec(e->regex, item->get_attribute(e->name).c_str(), 0, nullptr, 0)==0)
  109. return true;
  110. return false;
  111. }
  112. bool matcher::matchop_cont(expression * e, matchable * item) {
  113. if (!item->has_attribute(e->name))
  114. throw matcherexception(matcherexception::type::ATTRIB_UNAVAIL, e->name);
  115. std::vector<std::string> elements = utils::tokenize(item->get_attribute(e->name), " ");
  116. std::string literal = e->literal;
  117. for (auto elem : elements) {
  118. if (literal == elem) {
  119. return true;
  120. }
  121. }
  122. return false;
  123. }
  124. bool matcher::matchop_eq(expression * e, matchable * item) {
  125. if (!item->has_attribute(e->name)) {
  126. LOG(level::WARN, "matcher::matches_r: attribute %s not available", e->name);
  127. throw matcherexception(matcherexception::type::ATTRIB_UNAVAIL, e->name);
  128. }
  129. return (item->get_attribute(e->name)==e->literal);
  130. }
  131. bool matcher::matches_r(expression * e, matchable * item) {
  132. if (e) {
  133. switch (e->op) {
  134. /* the operator "and" and "or" simply connect two different subexpressions */
  135. case LOGOP_AND:
  136. return matches_r(e->l, item) && matches_r(e->r, item); // short-circuit evaulation in C -> short circuit evaluation in the filter language
  137. case LOGOP_OR:
  138. return matches_r(e->l, item) || matches_r(e->r, item); // same here
  139. /* while the other operator connect an attribute with a value */
  140. case MATCHOP_EQ:
  141. return matchop_eq(e, item);
  142. case MATCHOP_NE:
  143. return !matchop_eq(e, item);
  144. case MATCHOP_LT:
  145. return matchop_lt(e, item);
  146. case MATCHOP_BETWEEN:
  147. return matchop_between(e, item);
  148. case MATCHOP_GT:
  149. return matchop_gt(e, item);
  150. case MATCHOP_LE:
  151. return !matchop_gt(e, item);
  152. case MATCHOP_GE:
  153. return !matchop_lt(e, item);
  154. case MATCHOP_RXEQ:
  155. return matchop_rxeq(e, item);
  156. case MATCHOP_RXNE:
  157. return !matchop_rxeq(e, item);
  158. case MATCHOP_CONTAINS:
  159. return matchop_cont(e, item);
  160. case MATCHOP_CONTAINSNOT:
  161. return !matchop_cont(e, item);
  162. }
  163. return false;
  164. } else {
  165. return true; // shouldn't happen
  166. }
  167. }
  168. const std::string& matcher::get_parse_error() {
  169. return errmsg;
  170. }
  171. }