]> git.cworth.org Git - obsolete/notmuch-old/blob - lib/query.cc
Merge remote branch 'amdragon/search-perf-3'
[obsolete/notmuch-old] / lib / query.cc
1 /* query.cc - Support for searching a notmuch database
2  *
3  * Copyright © 2009 Carl Worth
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see http://www.gnu.org/licenses/ .
17  *
18  * Author: Carl Worth <cworth@cworth.org>
19  */
20
21 #include "notmuch-private.h"
22 #include "database-private.h"
23
24 #include <glib.h> /* GHashTable, GPtrArray */
25
26 struct _notmuch_query {
27     notmuch_database_t *notmuch;
28     const char *query_string;
29     notmuch_sort_t sort;
30 };
31
32 typedef struct _notmuch_mset_messages {
33     notmuch_messages_t base;
34     notmuch_database_t *notmuch;
35     Xapian::MSetIterator iterator;
36     Xapian::MSetIterator iterator_end;
37 } notmuch_mset_messages_t;
38
39 struct _notmuch_doc_id_set {
40     unsigned int *bitmap;
41     unsigned int bound;
42 };
43
44 #define DOCIDSET_WORD(bit) ((bit) / sizeof (unsigned int))
45 #define DOCIDSET_BIT(bit) ((bit) % sizeof (unsigned int))
46
47 struct _notmuch_threads {
48     notmuch_query_t *query;
49
50     /* The ordered list of doc ids matched by the query. */
51     GArray *doc_ids;
52     /* Our iterator's current position in doc_ids. */
53     unsigned int doc_id_pos;
54     /* The set of matched docid's that have not been assigned to a
55      * thread. Initially, this contains every docid in doc_ids. */
56     notmuch_doc_id_set_t match_set;
57 };
58
59 notmuch_query_t *
60 notmuch_query_create (notmuch_database_t *notmuch,
61                       const char *query_string)
62 {
63     notmuch_query_t *query;
64
65 #ifdef DEBUG_QUERY
66     fprintf (stderr, "Query string is:\n%s\n", query_string);
67 #endif
68
69     query = talloc (NULL, notmuch_query_t);
70     if (unlikely (query == NULL))
71         return NULL;
72
73     query->notmuch = notmuch;
74
75     query->query_string = talloc_strdup (query, query_string);
76
77     query->sort = NOTMUCH_SORT_NEWEST_FIRST;
78
79     return query;
80 }
81
82 const char *
83 notmuch_query_get_query_string (notmuch_query_t *query)
84 {
85     return query->query_string;
86 }
87
88 void
89 notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
90 {
91     query->sort = sort;
92 }
93
94 notmuch_sort_t
95 notmuch_query_get_sort (notmuch_query_t *query)
96 {
97     return query->sort;
98 }
99
100 /* We end up having to call the destructors explicitly because we had
101  * to use "placement new" in order to initialize C++ objects within a
102  * block that we allocated with talloc. So C++ is making talloc
103  * slightly less simple to use, (we wouldn't need
104  * talloc_set_destructor at all otherwise).
105  */
106 static int
107 _notmuch_messages_destructor (notmuch_mset_messages_t *messages)
108 {
109     messages->iterator.~MSetIterator ();
110     messages->iterator_end.~MSetIterator ();
111
112     return 0;
113 }
114
115 notmuch_messages_t *
116 notmuch_query_search_messages (notmuch_query_t *query)
117 {
118     notmuch_database_t *notmuch = query->notmuch;
119     const char *query_string = query->query_string;
120     notmuch_mset_messages_t *messages;
121
122     messages = talloc (query, notmuch_mset_messages_t);
123     if (unlikely (messages == NULL))
124         return NULL;
125
126     try {
127
128         messages->base.is_of_list_type = FALSE;
129         messages->base.iterator = NULL;
130         messages->notmuch = notmuch;
131         new (&messages->iterator) Xapian::MSetIterator ();
132         new (&messages->iterator_end) Xapian::MSetIterator ();
133
134         talloc_set_destructor (messages, _notmuch_messages_destructor);
135
136         Xapian::Enquire enquire (*notmuch->xapian_db);
137         Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
138                                                    _find_prefix ("type"),
139                                                    "mail"));
140         Xapian::Query string_query, final_query;
141         Xapian::MSet mset;
142         unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
143                               Xapian::QueryParser::FLAG_PHRASE |
144                               Xapian::QueryParser::FLAG_LOVEHATE |
145                               Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
146                               Xapian::QueryParser::FLAG_WILDCARD |
147                               Xapian::QueryParser::FLAG_PURE_NOT);
148
149         if (strcmp (query_string, "") == 0 ||
150             strcmp (query_string, "*") == 0)
151         {
152             final_query = mail_query;
153         } else {
154             string_query = notmuch->query_parser->
155                 parse_query (query_string, flags);
156             final_query = Xapian::Query (Xapian::Query::OP_AND,
157                                          mail_query, string_query);
158         }
159
160         enquire.set_weighting_scheme (Xapian::BoolWeight());
161
162         switch (query->sort) {
163         case NOTMUCH_SORT_OLDEST_FIRST:
164             enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, FALSE);
165             break;
166         case NOTMUCH_SORT_NEWEST_FIRST:
167             enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, TRUE);
168             break;
169         case NOTMUCH_SORT_MESSAGE_ID:
170             enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, FALSE);
171             break;
172         case NOTMUCH_SORT_UNSORTED:
173             break;
174         }
175
176 #if DEBUG_QUERY
177         fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
178 #endif
179
180         enquire.set_query (final_query);
181
182         mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
183
184         messages->iterator = mset.begin ();
185         messages->iterator_end = mset.end ();
186
187         return &messages->base;
188
189     } catch (const Xapian::Error &error) {
190         fprintf (stderr, "A Xapian exception occurred performing query: %s\n",
191                  error.get_msg().c_str());
192         fprintf (stderr, "Query string was: %s\n", query->query_string);
193         notmuch->exception_reported = TRUE;
194         talloc_free (messages);
195         return NULL;
196     }
197 }
198
199 notmuch_bool_t
200 _notmuch_mset_messages_valid (notmuch_messages_t *messages)
201 {
202     notmuch_mset_messages_t *mset_messages;
203
204     mset_messages = (notmuch_mset_messages_t *) messages;
205
206     return (mset_messages->iterator != mset_messages->iterator_end);
207 }
208
209 static Xapian::docid
210 _notmuch_mset_messages_get_doc_id (notmuch_messages_t *messages)
211 {
212     notmuch_mset_messages_t *mset_messages;
213
214     mset_messages = (notmuch_mset_messages_t *) messages;
215
216     if (! _notmuch_mset_messages_valid (&mset_messages->base))
217         return 0;
218
219     return *mset_messages->iterator;
220 }
221
222 notmuch_message_t *
223 _notmuch_mset_messages_get (notmuch_messages_t *messages)
224 {
225     notmuch_message_t *message;
226     Xapian::docid doc_id;
227     notmuch_private_status_t status;
228     notmuch_mset_messages_t *mset_messages;
229
230     mset_messages = (notmuch_mset_messages_t *) messages;
231
232     if (! _notmuch_mset_messages_valid (&mset_messages->base))
233         return NULL;
234
235     doc_id = *mset_messages->iterator;
236
237     message = _notmuch_message_create (mset_messages,
238                                        mset_messages->notmuch, doc_id,
239                                        &status);
240
241     if (message == NULL &&
242        status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
243     {
244         INTERNAL_ERROR ("a messages iterator contains a non-existent document ID.\n");
245     }
246
247     return message;
248 }
249
250 void
251 _notmuch_mset_messages_move_to_next (notmuch_messages_t *messages)
252 {
253     notmuch_mset_messages_t *mset_messages;
254
255     mset_messages = (notmuch_mset_messages_t *) messages;
256
257     mset_messages->iterator++;
258 }
259
260 static notmuch_bool_t
261 _notmuch_doc_id_set_init (void *ctx,
262                           notmuch_doc_id_set_t *doc_ids,
263                           GArray *arr)
264 {
265     unsigned int max = 0;
266     unsigned int *bitmap;
267
268     for (unsigned int i = 0; i < arr->len; i++)
269         max = MAX(max, g_array_index (arr, unsigned int, i));
270     bitmap = talloc_zero_array (ctx, unsigned int, 1 + max / sizeof (*bitmap));
271
272     if (bitmap == NULL)
273         return FALSE;
274
275     doc_ids->bitmap = bitmap;
276     doc_ids->bound = max + 1;
277
278     for (unsigned int i = 0; i < arr->len; i++) {
279         unsigned int doc_id = g_array_index (arr, unsigned int, i);
280         bitmap[DOCIDSET_WORD(doc_id)] |= 1 << DOCIDSET_BIT(doc_id);
281     }
282
283     return TRUE;
284 }
285
286 notmuch_bool_t
287 _notmuch_doc_id_set_contains (notmuch_doc_id_set_t *doc_ids,
288                               unsigned int doc_id)
289 {
290     if (doc_id >= doc_ids->bound)
291         return FALSE;
292     return doc_ids->bitmap[DOCIDSET_WORD(doc_id)] & (1 << DOCIDSET_BIT(doc_id));
293 }
294
295 void
296 _notmuch_doc_id_set_remove (notmuch_doc_id_set_t *doc_ids,
297                             unsigned int doc_id)
298 {
299     if (doc_id < doc_ids->bound)
300         doc_ids->bitmap[DOCIDSET_WORD(doc_id)] &= ~(1 << DOCIDSET_BIT(doc_id));
301 }
302
303 /* Glib objects force use to use a talloc destructor as well, (but not
304  * nearly as ugly as the for messages due to C++ objects). At
305  * this point, I'd really like to have some talloc-friendly
306  * equivalents for the few pieces of glib that I'm using. */
307 static int
308 _notmuch_threads_destructor (notmuch_threads_t *threads)
309 {
310     if (threads->doc_ids)
311         g_array_unref (threads->doc_ids);
312
313     return 0;
314 }
315
316 notmuch_threads_t *
317 notmuch_query_search_threads (notmuch_query_t *query)
318 {
319     notmuch_threads_t *threads;
320     notmuch_messages_t *messages;
321
322     threads = talloc (query, notmuch_threads_t);
323     if (threads == NULL)
324         return NULL;
325     threads->doc_ids = NULL;
326     talloc_set_destructor (threads, _notmuch_threads_destructor);
327
328     threads->query = query;
329
330     messages = notmuch_query_search_messages (query);
331     if (messages == NULL) {
332             talloc_free (threads);
333             return NULL;
334     }
335
336     threads->doc_ids = g_array_new (FALSE, FALSE, sizeof (unsigned int));
337     while (notmuch_messages_valid (messages)) {
338         unsigned int doc_id = _notmuch_mset_messages_get_doc_id (messages);
339         g_array_append_val (threads->doc_ids, doc_id);
340         notmuch_messages_move_to_next (messages);
341     }
342     threads->doc_id_pos = 0;
343
344     talloc_free (messages);
345
346     if (! _notmuch_doc_id_set_init (threads, &threads->match_set,
347                                     threads->doc_ids)) {
348         talloc_free (threads);
349         return NULL;
350     }
351
352     return threads;
353 }
354
355 void
356 notmuch_query_destroy (notmuch_query_t *query)
357 {
358     talloc_free (query);
359 }
360
361 notmuch_bool_t
362 notmuch_threads_valid (notmuch_threads_t *threads)
363 {
364     unsigned int doc_id;
365
366     while (threads->doc_id_pos < threads->doc_ids->len) {
367         doc_id = g_array_index (threads->doc_ids, unsigned int,
368                                 threads->doc_id_pos);
369         if (_notmuch_doc_id_set_contains (&threads->match_set, doc_id))
370             break;
371
372         threads->doc_id_pos++;
373     }
374
375     return threads->doc_id_pos < threads->doc_ids->len;
376 }
377
378 notmuch_thread_t *
379 notmuch_threads_get (notmuch_threads_t *threads)
380 {
381     unsigned int doc_id;
382
383     if (! notmuch_threads_valid (threads))
384         return NULL;
385
386     doc_id = g_array_index (threads->doc_ids, unsigned int,
387                             threads->doc_id_pos);
388     return _notmuch_thread_create (threads->query,
389                                    threads->query->notmuch,
390                                    doc_id,
391                                    &threads->match_set,
392                                    threads->query->sort);
393 }
394
395 void
396 notmuch_threads_move_to_next (notmuch_threads_t *threads)
397 {
398     threads->doc_id_pos++;
399 }
400
401 void
402 notmuch_threads_destroy (notmuch_threads_t *threads)
403 {
404     talloc_free (threads);
405 }
406
407 unsigned
408 notmuch_query_count_messages (notmuch_query_t *query)
409 {
410     notmuch_database_t *notmuch = query->notmuch;
411     const char *query_string = query->query_string;
412     Xapian::doccount count = 0;
413
414     try {
415         Xapian::Enquire enquire (*notmuch->xapian_db);
416         Xapian::Query mail_query (talloc_asprintf (query, "%s%s",
417                                                    _find_prefix ("type"),
418                                                    "mail"));
419         Xapian::Query string_query, final_query;
420         Xapian::MSet mset;
421         unsigned int flags = (Xapian::QueryParser::FLAG_BOOLEAN |
422                               Xapian::QueryParser::FLAG_PHRASE |
423                               Xapian::QueryParser::FLAG_LOVEHATE |
424                               Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
425                               Xapian::QueryParser::FLAG_WILDCARD |
426                               Xapian::QueryParser::FLAG_PURE_NOT);
427
428         if (strcmp (query_string, "") == 0 ||
429             strcmp (query_string, "*") == 0)
430         {
431             final_query = mail_query;
432         } else {
433             string_query = notmuch->query_parser->
434                 parse_query (query_string, flags);
435             final_query = Xapian::Query (Xapian::Query::OP_AND,
436                                          mail_query, string_query);
437         }
438
439         enquire.set_weighting_scheme(Xapian::BoolWeight());
440         enquire.set_docid_order(Xapian::Enquire::ASCENDING);
441
442 #if DEBUG_QUERY
443         fprintf (stderr, "Final query is:\n%s\n", final_query.get_description().c_str());
444 #endif
445
446         enquire.set_query (final_query);
447
448         mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
449
450         count = mset.get_matches_estimated();
451
452     } catch (const Xapian::Error &error) {
453         fprintf (stderr, "A Xapian exception occurred: %s\n",
454                  error.get_msg().c_str());
455         fprintf (stderr, "Query string was: %s\n", query->query_string);
456     }
457
458     return count;
459 }