PageRenderTime 53ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/notmuch-0.13.2/lib/query.cc

#
C++ | 608 lines | 454 code | 115 blank | 39 comment | 60 complexity | 40f202fcbc9fe6fe6810468efb21c336 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1
  1. /* query.cc - Support for searching a notmuch database
  2. *
  3. * Copyright Š 2009 Carl Worth
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see http://www.gnu.org/licenses/ .
  17. *
  18. * Author: Carl Worth <cworth@cworth.org>
  19. */
  20. #include "notmuch-private.h"
  21. #include "database-private.h"
  22. #include <glib.h> /* GHashTable, GPtrArray */
  23. struct _notmuch_query {
  24. notmuch_database_t *notmuch;
  25. const char *query_string;
  26. notmuch_sort_t sort;
  27. notmuch_string_list_t *exclude_terms;
  28. notmuch_bool_t omit_excluded;
  29. };
  30. typedef struct _notmuch_mset_messages {
  31. notmuch_messages_t base;
  32. notmuch_database_t *notmuch;
  33. Xapian::MSetIterator iterator;
  34. Xapian::MSetIterator iterator_end;
  35. } notmuch_mset_messages_t;
  36. struct _notmuch_doc_id_set {
  37. unsigned int *bitmap;
  38. unsigned int bound;
  39. };
  40. #define DOCIDSET_WORD(bit) ((bit) / sizeof (unsigned int))
  41. #define DOCIDSET_BIT(bit) ((bit) % sizeof (unsigned int))
  42. struct visible _notmuch_threads {
  43. notmuch_query_t *query;
  44. /* The ordered list of doc ids matched by the query. */
  45. GArray *doc_ids;
  46. /* Our iterator's current position in doc_ids. */
  47. unsigned int doc_id_pos;
  48. /* The set of matched docid's that have not been assigned to a
  49. * thread. Initially, this contains every docid in doc_ids. */
  50. notmuch_doc_id_set_t match_set;
  51. };
  52. /* We need this in the message functions so forward declare. */
  53. static notmuch_bool_t
  54. _notmuch_doc_id_set_init (void *ctx,
  55. notmuch_doc_id_set_t *doc_ids,
  56. GArray *arr);
  57. static notmuch_bool_t
  58. _debug_query (void)
  59. {
  60. char *env = getenv ("NOTMUCH_DEBUG_QUERY");
  61. return (env && strcmp (env, "") != 0);
  62. }
  63. notmuch_query_t *
  64. notmuch_query_create (notmuch_database_t *notmuch,
  65. const char *query_string)
  66. {
  67. notmuch_query_t *query;
  68. if (_debug_query ())
  69. fprintf (stderr, "Query string is:\n%s\n", query_string);
  70. query = talloc (NULL, notmuch_query_t);
  71. if (unlikely (query == NULL))
  72. return NULL;
  73. query->notmuch = notmuch;
  74. query->query_string = talloc_strdup (query, query_string);
  75. query->sort = NOTMUCH_SORT_NEWEST_FIRST;
  76. query->exclude_terms = _notmuch_string_list_create (query);
  77. query->omit_excluded = TRUE;
  78. return query;
  79. }
  80. const char *
  81. notmuch_query_get_query_string (notmuch_query_t *query)
  82. {
  83. return query->query_string;
  84. }
  85. void
  86. notmuch_query_set_omit_excluded (notmuch_query_t *query, notmuch_bool_t omit_excluded)
  87. {
  88. query->omit_excluded = omit_excluded;
  89. }
  90. void
  91. notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
  92. {
  93. query->sort = sort;
  94. }
  95. notmuch_sort_t
  96. notmuch_query_get_sort (notmuch_query_t *query)
  97. {
  98. return query->sort;
  99. }
  100. void
  101. notmuch_query_add_tag_exclude (notmuch_query_t *query, const char *tag)
  102. {
  103. char *term = talloc_asprintf (query, "%s%s", _find_prefix ("tag"), tag);
  104. _notmuch_string_list_append (query->exclude_terms, term);
  105. }
  106. /* We end up having to call the destructors explicitly because we had
  107. * to use "placement new" in order to initialize C++ objects within a
  108. * block that we allocated with talloc. So C++ is making talloc
  109. * slightly less simple to use, (we wouldn't need
  110. * talloc_set_destructor at all otherwise).
  111. */
  112. static int
  113. _notmuch_messages_destructor (notmuch_mset_messages_t *messages)
  114. {
  115. messages->iterator.~MSetIterator ();
  116. messages->iterator_end.~MSetIterator ();
  117. return 0;
  118. }
  119. /* Return a query that matches messages with the excluded tags
  120. * registered with query. Any tags that explicitly appear in xquery
  121. * will not be excluded, and will be removed from the list of exclude
  122. * tags. The caller of this function has to combine the returned
  123. * query appropriately.*/
  124. static Xapian::Query
  125. _notmuch_exclude_tags (notmuch_query_t *query, Xapian::Query xquery)
  126. {
  127. Xapian::Query exclude_query = Xapian::Query::MatchNothing;
  128. for (notmuch_string_node_t *term = query->exclude_terms->head; term;
  129. term = term->next) {
  130. Xapian::TermIterator it = xquery.get_terms_begin ();
  131. Xapian::TermIterator end = xquery.get_terms_end ();
  132. for (; it != end; it++) {
  133. if ((*it).compare (term->string) == 0)
  134. break;
  135. }
  136. if (it == end)
  137. exclude_query = Xapian::Query (Xapian::Query::OP_OR,
  138. exclude_query, Xapian::Query (term->string));
  139. else
  140. term->string = talloc_strdup (query, "");
  141. }
  142. return exclude_query;
  143. }
  144. notmuch_messages_t *
  145. notmuch_query_search_messages (notmuch_query_t *query)
  146. {
  147. notmuch_database_t *notmuch = query->notmuch;
  148. const char *query_string = query->query_string;
  149. notmuch_mset_messages_t *messages;
  150. messages = talloc (query, notmuch_mset_messages_t);
  151. if (unlikely (messages == NULL))
  152. return NULL;
  153. try {
  154. messages->base.is_of_list_type = FALSE;
  155. messages->base.iterator = NULL;
  156. messages->notmuch = notmuch;
  157. new (&messages->iterator) Xapian::MSetIterator ();
  158. new (&messages->iterator_end) Xapian::MSetIterator ();
  159. talloc_set_destructor (messages, _notmuch_messages_destructor);
  160. Xapian::Enquire enquire (*notmuch->xapian_db);
  161. Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
  162. _find_prefix ("type"),
  163. "mail"));
  164. Xapian::Query string_query, final_query, exclude_query;
  165. Xapian::MSet mset;
  166. Xapian::MSetIterator iterator;
  167. unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
  168. Xapian::QueryParser::FLAG_PHRASE |
  169. Xapian::QueryParser::FLAG_LOVEHATE |
  170. Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
  171. Xapian::QueryParser::FLAG_WILDCARD |
  172. Xapian::QueryParser::FLAG_PURE_NOT);
  173. if (strcmp (query_string, "") == 0 ||
  174. strcmp (query_string, "*") == 0)
  175. {
  176. final_query = mail_query;
  177. } else {
  178. string_query = notmuch->query_parser->
  179. parse_query (query_string, flags);
  180. final_query = Xapian::Query (Xapian::Query::OP_AND,
  181. mail_query, string_query);
  182. }
  183. messages->base.excluded_doc_ids = NULL;
  184. if (query->exclude_terms) {
  185. exclude_query = _notmuch_exclude_tags (query, final_query);
  186. if (query->omit_excluded)
  187. final_query = Xapian::Query (Xapian::Query::OP_AND_NOT,
  188. final_query, exclude_query);
  189. else {
  190. exclude_query = Xapian::Query (Xapian::Query::OP_AND,
  191. exclude_query, final_query);
  192. enquire.set_weighting_scheme (Xapian::BoolWeight());
  193. enquire.set_query (exclude_query);
  194. mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
  195. GArray *excluded_doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
  196. for (iterator = mset.begin (); iterator != mset.end (); iterator++) {
  197. unsigned int doc_id = *iterator;
  198. g_array_append_val (excluded_doc_ids, doc_id);
  199. }
  200. messages->base.excluded_doc_ids = talloc (messages, _notmuch_doc_id_set);
  201. _notmuch_doc_id_set_init (query, messages->base.excluded_doc_ids,
  202. excluded_doc_ids);
  203. g_array_unref (excluded_doc_ids);
  204. }
  205. }
  206. enquire.set_weighting_scheme (Xapian::BoolWeight());
  207. switch (query->sort) {
  208. case NOTMUCH_SORT_OLDEST_FIRST:
  209. enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, FALSE);
  210. break;
  211. case NOTMUCH_SORT_NEWEST_FIRST:
  212. enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, TRUE);
  213. break;
  214. case NOTMUCH_SORT_MESSAGE_ID:
  215. enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, FALSE);
  216. break;
  217. case NOTMUCH_SORT_UNSORTED:
  218. break;
  219. }
  220. if (_debug_query ()) {
  221. fprintf (stderr, "Exclude query is:\n%s\n",
  222. exclude_query.get_description ().c_str ());
  223. fprintf (stderr, "Final query is:\n%s\n",
  224. final_query.get_description ().c_str ());
  225. }
  226. enquire.set_query (final_query);
  227. mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
  228. messages->iterator = mset.begin ();
  229. messages->iterator_end = mset.end ();
  230. return &messages->base;
  231. } catch (const Xapian::Error &error) {
  232. fprintf (stderr, "A Xapian exception occurred performing query: %s\n",
  233. error.get_msg().c_str());
  234. fprintf (stderr, "Query string was: %s\n", query->query_string);
  235. notmuch->exception_reported = TRUE;
  236. talloc_free (messages);
  237. return NULL;
  238. }
  239. }
  240. notmuch_bool_t
  241. _notmuch_mset_messages_valid (notmuch_messages_t *messages)
  242. {
  243. notmuch_mset_messages_t *mset_messages;
  244. mset_messages = (notmuch_mset_messages_t *) messages;
  245. return (mset_messages->iterator != mset_messages->iterator_end);
  246. }
  247. static Xapian::docid
  248. _notmuch_mset_messages_get_doc_id (notmuch_messages_t *messages)
  249. {
  250. notmuch_mset_messages_t *mset_messages;
  251. mset_messages = (notmuch_mset_messages_t *) messages;
  252. if (! _notmuch_mset_messages_valid (&mset_messages->base))
  253. return 0;
  254. return *mset_messages->iterator;
  255. }
  256. notmuch_message_t *
  257. _notmuch_mset_messages_get (notmuch_messages_t *messages)
  258. {
  259. notmuch_message_t *message;
  260. Xapian::docid doc_id;
  261. notmuch_private_status_t status;
  262. notmuch_mset_messages_t *mset_messages;
  263. mset_messages = (notmuch_mset_messages_t *) messages;
  264. if (! _notmuch_mset_messages_valid (&mset_messages->base))
  265. return NULL;
  266. doc_id = *mset_messages->iterator;
  267. message = _notmuch_message_create (mset_messages,
  268. mset_messages->notmuch, doc_id,
  269. &status);
  270. if (message == NULL &&
  271. status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
  272. {
  273. INTERNAL_ERROR ("a messages iterator contains a non-existent document ID.\n");
  274. }
  275. if (messages->excluded_doc_ids &&
  276. _notmuch_doc_id_set_contains (messages->excluded_doc_ids, doc_id))
  277. notmuch_message_set_flag (message, NOTMUCH_MESSAGE_FLAG_EXCLUDED, TRUE);
  278. return message;
  279. }
  280. void
  281. _notmuch_mset_messages_move_to_next (notmuch_messages_t *messages)
  282. {
  283. notmuch_mset_messages_t *mset_messages;
  284. mset_messages = (notmuch_mset_messages_t *) messages;
  285. mset_messages->iterator++;
  286. }
  287. static notmuch_bool_t
  288. _notmuch_doc_id_set_init (void *ctx,
  289. notmuch_doc_id_set_t *doc_ids,
  290. GArray *arr)
  291. {
  292. unsigned int max = 0;
  293. unsigned int *bitmap;
  294. for (unsigned int i = 0; i < arr->len; i++)
  295. max = MAX(max, g_array_index (arr, unsigned int, i));
  296. bitmap = talloc_zero_array (ctx, unsigned int, 1 + max / sizeof (*bitmap));
  297. if (bitmap == NULL)
  298. return FALSE;
  299. doc_ids->bitmap = bitmap;
  300. doc_ids->bound = max + 1;
  301. for (unsigned int i = 0; i < arr->len; i++) {
  302. unsigned int doc_id = g_array_index (arr, unsigned int, i);
  303. bitmap[DOCIDSET_WORD(doc_id)] |= 1 << DOCIDSET_BIT(doc_id);
  304. }
  305. return TRUE;
  306. }
  307. notmuch_bool_t
  308. _notmuch_doc_id_set_contains (notmuch_doc_id_set_t *doc_ids,
  309. unsigned int doc_id)
  310. {
  311. if (doc_id >= doc_ids->bound)
  312. return FALSE;
  313. return doc_ids->bitmap[DOCIDSET_WORD(doc_id)] & (1 << DOCIDSET_BIT(doc_id));
  314. }
  315. void
  316. _notmuch_doc_id_set_remove (notmuch_doc_id_set_t *doc_ids,
  317. unsigned int doc_id)
  318. {
  319. if (doc_id < doc_ids->bound)
  320. doc_ids->bitmap[DOCIDSET_WORD(doc_id)] &= ~(1 << DOCIDSET_BIT(doc_id));
  321. }
  322. /* Glib objects force use to use a talloc destructor as well, (but not
  323. * nearly as ugly as the for messages due to C++ objects). At
  324. * this point, I'd really like to have some talloc-friendly
  325. * equivalents for the few pieces of glib that I'm using. */
  326. static int
  327. _notmuch_threads_destructor (notmuch_threads_t *threads)
  328. {
  329. if (threads->doc_ids)
  330. g_array_unref (threads->doc_ids);
  331. return 0;
  332. }
  333. notmuch_threads_t *
  334. notmuch_query_search_threads (notmuch_query_t *query)
  335. {
  336. notmuch_threads_t *threads;
  337. notmuch_messages_t *messages;
  338. threads = talloc (query, notmuch_threads_t);
  339. if (threads == NULL)
  340. return NULL;
  341. threads->doc_ids = NULL;
  342. talloc_set_destructor (threads, _notmuch_threads_destructor);
  343. threads->query = query;
  344. messages = notmuch_query_search_messages (query);
  345. if (messages == NULL) {
  346. talloc_free (threads);
  347. return NULL;
  348. }
  349. threads->doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
  350. while (notmuch_messages_valid (messages)) {
  351. unsigned int doc_id = _notmuch_mset_messages_get_doc_id (messages);
  352. g_array_append_val (threads->doc_ids, doc_id);
  353. notmuch_messages_move_to_next (messages);
  354. }
  355. threads->doc_id_pos = 0;
  356. talloc_free (messages);
  357. if (! _notmuch_doc_id_set_init (threads, &threads->match_set,
  358. threads->doc_ids)) {
  359. talloc_free (threads);
  360. return NULL;
  361. }
  362. return threads;
  363. }
  364. void
  365. notmuch_query_destroy (notmuch_query_t *query)
  366. {
  367. talloc_free (query);
  368. }
  369. notmuch_bool_t
  370. notmuch_threads_valid (notmuch_threads_t *threads)
  371. {
  372. unsigned int doc_id;
  373. while (threads->doc_id_pos < threads->doc_ids->len) {
  374. doc_id = g_array_index (threads->doc_ids, unsigned int,
  375. threads->doc_id_pos);
  376. if (_notmuch_doc_id_set_contains (&threads->match_set, doc_id))
  377. break;
  378. threads->doc_id_pos++;
  379. }
  380. return threads->doc_id_pos < threads->doc_ids->len;
  381. }
  382. notmuch_thread_t *
  383. notmuch_threads_get (notmuch_threads_t *threads)
  384. {
  385. unsigned int doc_id;
  386. if (! notmuch_threads_valid (threads))
  387. return NULL;
  388. doc_id = g_array_index (threads->doc_ids, unsigned int,
  389. threads->doc_id_pos);
  390. return _notmuch_thread_create (threads->query,
  391. threads->query->notmuch,
  392. doc_id,
  393. &threads->match_set,
  394. threads->query->exclude_terms,
  395. threads->query->sort);
  396. }
  397. void
  398. notmuch_threads_move_to_next (notmuch_threads_t *threads)
  399. {
  400. threads->doc_id_pos++;
  401. }
  402. void
  403. notmuch_threads_destroy (notmuch_threads_t *threads)
  404. {
  405. talloc_free (threads);
  406. }
  407. unsigned
  408. notmuch_query_count_messages (notmuch_query_t *query)
  409. {
  410. notmuch_database_t *notmuch = query->notmuch;
  411. const char *query_string = query->query_string;
  412. Xapian::doccount count = 0;
  413. try {
  414. Xapian::Enquire enquire (*notmuch->xapian_db);
  415. Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
  416. _find_prefix ("type"),
  417. "mail"));
  418. Xapian::Query string_query, final_query, exclude_query;
  419. Xapian::MSet mset;
  420. unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
  421. Xapian::QueryParser::FLAG_PHRASE |
  422. Xapian::QueryParser::FLAG_LOVEHATE |
  423. Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
  424. Xapian::QueryParser::FLAG_WILDCARD |
  425. Xapian::QueryParser::FLAG_PURE_NOT);
  426. if (strcmp (query_string, "") == 0 ||
  427. strcmp (query_string, "*") == 0)
  428. {
  429. final_query = mail_query;
  430. } else {
  431. string_query = notmuch->query_parser->
  432. parse_query (query_string, flags);
  433. final_query = Xapian::Query (Xapian::Query::OP_AND,
  434. mail_query, string_query);
  435. }
  436. exclude_query = _notmuch_exclude_tags (query, final_query);
  437. final_query = Xapian::Query (Xapian::Query::OP_AND_NOT,
  438. final_query, exclude_query);
  439. enquire.set_weighting_scheme(Xapian::BoolWeight());
  440. enquire.set_docid_order(Xapian::Enquire::ASCENDING);
  441. if (_debug_query ()) {
  442. fprintf (stderr, "Exclude query is:\n%s\n",
  443. exclude_query.get_description ().c_str ());
  444. fprintf (stderr, "Final query is:\n%s\n",
  445. final_query.get_description ().c_str ());
  446. }
  447. enquire.set_query (final_query);
  448. mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
  449. count = mset.get_matches_estimated();
  450. } catch (const Xapian::Error &error) {
  451. fprintf (stderr, "A Xapian exception occurred: %s\n",
  452. error.get_msg().c_str());
  453. fprintf (stderr, "Query string was: %s\n", query->query_string);
  454. }
  455. return count;
  456. }
  457. unsigned
  458. notmuch_query_count_threads (notmuch_query_t *query)
  459. {
  460. notmuch_messages_t *messages;
  461. GHashTable *hash;
  462. unsigned int count;
  463. notmuch_sort_t sort;
  464. sort = query->sort;
  465. query->sort = NOTMUCH_SORT_UNSORTED;
  466. messages = notmuch_query_search_messages (query);
  467. query->sort = sort;
  468. if (messages == NULL)
  469. return 0;
  470. hash = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, NULL);
  471. if (hash == NULL) {
  472. talloc_free (messages);
  473. return 0;
  474. }
  475. while (notmuch_messages_valid (messages)) {
  476. notmuch_message_t *message = notmuch_messages_get (messages);
  477. const char *thread_id = notmuch_message_get_thread_id (message);
  478. char *thread_id_copy = talloc_strdup (messages, thread_id);
  479. if (unlikely (thread_id_copy == NULL)) {
  480. notmuch_message_destroy (message);
  481. count = 0;
  482. goto DONE;
  483. }
  484. g_hash_table_insert (hash, thread_id_copy, NULL);
  485. notmuch_message_destroy (message);
  486. notmuch_messages_move_to_next (messages);
  487. }
  488. count = g_hash_table_size (hash);
  489. DONE:
  490. g_hash_table_unref (hash);
  491. talloc_free (messages);
  492. return count;
  493. }