1 /* database.cc - The database interfaces of the notmuch mail library
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
21 #include "database-private.h"
27 #include <glib.h> /* g_strdup_printf, g_free, GPtrArray, GHashTable */
32 notmuch_status_to_string (notmuch_status_t status)
35 case NOTMUCH_STATUS_SUCCESS:
36 return "No error occurred";
37 case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
38 return "A Xapian exception occurred";
39 case NOTMUCH_STATUS_FILE_ERROR:
40 return "Something went wrong trying to read or write a file";
41 case NOTMUCH_STATUS_FILE_NOT_EMAIL:
42 return "File is not an email";
43 case NOTMUCH_STATUS_NULL_POINTER:
44 return "Erroneous NULL pointer";
45 case NOTMUCH_STATUS_TAG_TOO_LONG:
46 return "Tag value is too long";
48 case NOTMUCH_STATUS_LAST_STATUS:
49 return "Unknown error status value";
53 /* XXX: We should drop this function and convert all callers to call
54 * _notmuch_message_add_term instead. */
56 add_term (Xapian::Document doc,
57 const char *prefix_name,
66 prefix = _find_prefix (prefix_name);
68 term = g_strdup_printf ("%s%s", prefix, value);
70 if (strlen (term) <= NOTMUCH_TERM_MAX)
77 find_doc_ids (notmuch_database_t *notmuch,
78 const char *prefix_name,
80 Xapian::PostingIterator *begin,
81 Xapian::PostingIterator *end)
83 Xapian::PostingIterator i;
86 term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value);
88 *begin = notmuch->xapian_db->postlist_begin (term);
90 *end = notmuch->xapian_db->postlist_end (term);
95 static notmuch_private_status_t
96 find_unique_doc_id (notmuch_database_t *notmuch,
97 const char *prefix_name,
101 Xapian::PostingIterator i, end;
103 find_doc_ids (notmuch, prefix_name, value, &i, &end);
107 return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND;
110 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
114 static Xapian::Document
115 find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
117 return notmuch->xapian_db->get_document (doc_id);
120 static notmuch_private_status_t
121 find_unique_document (notmuch_database_t *notmuch,
122 const char *prefix_name,
124 Xapian::Document *document,
125 unsigned int *doc_id)
127 notmuch_private_status_t status;
129 status = find_unique_doc_id (notmuch, prefix_name, value, doc_id);
132 *document = Xapian::Document ();
136 *document = find_document_for_doc_id (notmuch, *doc_id);
137 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
141 insert_thread_id (GHashTable *thread_ids, Xapian::Document doc)
144 const char *value, *id, *comma;
146 value_string = doc.get_value (NOTMUCH_VALUE_THREAD);
147 value = value_string.c_str();
148 if (strlen (value)) {
151 comma = strchr (id, ',');
153 comma = id + strlen (id);
154 g_hash_table_insert (thread_ids,
155 strndup (id, comma - id), NULL);
164 notmuch_database_find_message (notmuch_database_t *notmuch,
165 const char *message_id)
167 notmuch_private_status_t status;
170 status = find_unique_doc_id (notmuch, "msgid", message_id, &doc_id);
172 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
175 return _notmuch_message_create (notmuch, notmuch, doc_id);
178 /* Return one or more thread_ids, (as a GPtrArray of strings), for the
179 * given message based on looking into the database for any messages
180 * referenced in parents, and also for any messages in the database
181 * referencing message_id.
183 * Caller should free all strings in the array and the array itself,
184 * (g_ptr_array_free) when done. */
186 find_thread_ids (notmuch_database_t *notmuch,
188 const char *message_id)
190 Xapian::PostingIterator child, children_end;
191 Xapian::Document doc;
192 GHashTable *thread_ids;
195 const char *parent_message_id;
198 thread_ids = g_hash_table_new_full (g_str_hash, g_str_equal,
201 find_doc_ids (notmuch, "ref", message_id, &child, &children_end);
202 for ( ; child != children_end; child++) {
203 doc = find_document_for_doc_id (notmuch, *child);
204 insert_thread_id (thread_ids, doc);
207 for (i = 0; i < parents->len; i++) {
208 notmuch_message_t *parent;
209 notmuch_thread_ids_t *ids;
211 parent_message_id = (char *) g_ptr_array_index (parents, i);
212 parent = notmuch_database_find_message (notmuch, parent_message_id);
216 for (ids = notmuch_message_get_thread_ids (parent);
217 notmuch_thread_ids_has_more (ids);
218 notmuch_thread_ids_advance (ids))
222 id = notmuch_thread_ids_get (ids);
223 g_hash_table_insert (thread_ids, strdup (id), NULL);
226 notmuch_message_destroy (parent);
229 result = g_ptr_array_new ();
231 keys = g_hash_table_get_keys (thread_ids);
232 for (l = keys; l; l = l->next) {
233 char *id = (char *) l->data;
234 g_ptr_array_add (result, id);
238 /* We're done with the hash table, but we've taken the pointers to
239 * the allocated strings and put them into our result array, so
240 * tell the hash not to free them on its way out. */
241 g_hash_table_steal_all (thread_ids);
242 g_hash_table_unref (thread_ids);
247 /* Advance 'str' past any whitespace or RFC 822 comments. A comment is
248 * a (potentially nested) parenthesized sequence with '\' used to
249 * escape any character (including parentheses).
251 * If the sequence to be skipped continues to the end of the string,
252 * then 'str' will be left pointing at the final terminating '\0'
256 skip_space_and_comments (const char **str)
261 while (*s && (isspace (*s) || *s == '(')) {
262 while (*s && isspace (*s))
267 while (*s && nesting) {
283 /* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
284 * comments, and the '<' and '>' delimeters.
286 * If not NULL, then *next will be made to point to the first character
287 * not parsed, (possibly pointing to the final '\0' terminator.
289 * Returns a newly allocated string which the caller should free()
292 * Returns NULL if there is any error parsing the message-id. */
294 parse_message_id (const char *message_id, const char **next)
299 if (message_id == NULL)
304 skip_space_and_comments (&s);
306 /* Skip any unstructured text as well. */
307 while (*s && *s != '<')
318 skip_space_and_comments (&s);
321 while (*end && *end != '>')
330 if (end > s && *end == '>')
335 result = strndup (s, end - s + 1);
337 /* Finally, collapse any whitespace that is within the message-id
343 for (r = result, len = strlen (r); *r; r++, len--)
344 if (*r == ' ' || *r == '\t')
345 memmove (r, r+1, len);
351 /* Parse a References header value, putting a copy of each referenced
352 * message-id into 'array'. */
354 parse_references (GPtrArray *array,
363 ref = parse_message_id (refs, &refs);
366 g_ptr_array_add (array, ref);
371 notmuch_database_default_path (void)
373 if (getenv ("NOTMUCH_BASE"))
374 return strdup (getenv ("NOTMUCH_BASE"));
376 return g_strdup_printf ("%s/mail", getenv ("HOME"));
380 notmuch_database_create (const char *path)
382 notmuch_database_t *notmuch = NULL;
383 char *notmuch_path = NULL;
386 char *local_path = NULL;
389 path = local_path = notmuch_database_default_path ();
391 err = stat (path, &st);
393 fprintf (stderr, "Error: Cannot create database at %s: %s.\n",
394 path, strerror (errno));
398 if (! S_ISDIR (st.st_mode)) {
399 fprintf (stderr, "Error: Cannot create database at %s: Not a directory.\n",
404 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
406 err = mkdir (notmuch_path, 0755);
409 fprintf (stderr, "Error: Cannot create directory %s: %s.\n",
410 notmuch_path, strerror (errno));
414 notmuch = notmuch_database_open (path);
426 notmuch_database_open (const char *path)
428 notmuch_database_t *notmuch = NULL;
429 char *notmuch_path = NULL, *xapian_path = NULL;
432 char *local_path = NULL;
435 path = local_path = notmuch_database_default_path ();
437 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
439 err = stat (notmuch_path, &st);
441 fprintf (stderr, "Error opening database at %s: %s\n",
442 notmuch_path, strerror (errno));
446 xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian");
448 notmuch = talloc (NULL, notmuch_database_t);
449 notmuch->path = talloc_strdup (notmuch, path);
452 notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
453 Xapian::DB_CREATE_OR_OPEN);
454 notmuch->query_parser = new Xapian::QueryParser;
455 notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
456 notmuch->query_parser->set_database (*notmuch->xapian_db);
457 } catch (const Xapian::Error &error) {
458 fprintf (stderr, "A Xapian exception occurred: %s\n",
459 error.get_msg().c_str());
474 notmuch_database_close (notmuch_database_t *notmuch)
476 delete notmuch->query_parser;
477 delete notmuch->xapian_db;
478 talloc_free (notmuch);
482 notmuch_database_get_path (notmuch_database_t *notmuch)
484 return notmuch->path;
488 notmuch_database_add_message (notmuch_database_t *notmuch,
489 const char *filename)
491 notmuch_message_file_t *message_file;
492 notmuch_message_t *message;
493 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
495 GPtrArray *parents, *thread_ids;
497 const char *refs, *in_reply_to, *date, *header;
498 const char *from, *to, *subject, *old_filename;
503 message_file = notmuch_message_file_open (filename);
504 if (message_file == NULL) {
505 ret = NOTMUCH_STATUS_FILE_ERROR;
509 notmuch_message_file_restrict_headers (message_file,
520 /* The first order of business is to find/create a message ID. */
522 header = notmuch_message_file_get_header (message_file, "message-id");
524 message_id = parse_message_id (header, NULL);
525 /* So the header value isn't RFC-compliant, but it's
526 * better than no message-id at all. */
527 if (message_id == NULL)
528 message_id = xstrdup (header);
530 /* No message-id at all, let's generate one by taking a
531 * hash over the file's contents. */
532 char *sha1 = notmuch_sha1_of_file (filename);
534 /* If that failed too, something is really wrong. Give up. */
536 ret = NOTMUCH_STATUS_FILE_ERROR;
540 message_id = g_strdup_printf ("notmuch-sha1-%s", sha1);
544 /* Now that we have a message ID, we get a message object,
545 * (which may or may not reference an existing document in the
548 /* Use NULL for owner since we want to free this locally. */
550 /* XXX: This call can fail by either out-of-memory or an
551 * "impossible" Xapian exception. We should rewrite it to
552 * allow us to propagate the error status. */
553 message = _notmuch_message_create_for_message_id (NULL, notmuch,
555 if (message == NULL) {
556 fprintf (stderr, "Internal error. This shouldn't happen.\n\n");
557 fprintf (stderr, "I mean, it's possible you ran out of memory, but then this code path is still an internal error since it should have detected that and propagated the status value up the stack.\n");
561 /* Has a message previously been added with the same ID? */
562 old_filename = notmuch_message_get_filename (message);
563 if (old_filename && strlen (old_filename)) {
564 /* XXX: This is too noisy to actually print, and what do we
565 * really expect the user to do? Go manually delete a
566 * redundant message or merge two similar messages?
567 * Instead we should handle this transparently.
569 * What we likely want to move to is adding both filenames
570 * to the database so that subsequent indexing will pick up
571 * terms from both files.
575 "Note: Attempting to add a message with a duplicate message ID:\n"
576 "Old: %s\n" "New: %s\n",
577 old_filename, filename);
578 fprintf (stderr, "The old filename will be used, but any new terms\n"
579 "from the new message will added to the database.\n");
582 _notmuch_message_set_filename (message, filename);
583 _notmuch_message_add_term (message, "type", "mail");
586 /* Next, find the thread(s) to which this message belongs. */
587 parents = g_ptr_array_new ();
589 refs = notmuch_message_file_get_header (message_file, "references");
590 parse_references (parents, refs);
592 in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
593 parse_references (parents, in_reply_to);
595 for (i = 0; i < parents->len; i++)
596 _notmuch_message_add_term (message, "ref",
597 (char *) g_ptr_array_index (parents, i));
599 thread_ids = find_thread_ids (notmuch, parents, message_id);
603 for (i = 0; i < parents->len; i++)
604 g_free (g_ptr_array_index (parents, i));
605 g_ptr_array_free (parents, TRUE);
607 if (thread_ids->len) {
612 for (i = 0; i < thread_ids->len; i++) {
613 id = (char *) thread_ids->pdata[i];
614 _notmuch_message_add_thread_id (message, id);
616 thread_id = g_string_new (id);
618 g_string_append_printf (thread_id, ",%s", id);
622 g_string_free (thread_id, TRUE);
624 _notmuch_message_ensure_thread_id (message);
627 g_ptr_array_free (thread_ids, TRUE);
629 date = notmuch_message_file_get_header (message_file, "date");
630 _notmuch_message_set_date (message, date);
632 from = notmuch_message_file_get_header (message_file, "from");
633 subject = notmuch_message_file_get_header (message_file, "subject");
634 to = notmuch_message_file_get_header (message_file, "to");
640 ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
643 _notmuch_message_sync (message);
645 } catch (const Xapian::Error &error) {
646 fprintf (stderr, "A Xapian exception occurred: %s.\n",
647 error.get_msg().c_str());
648 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
654 notmuch_message_destroy (message);
656 notmuch_message_file_close (message_file);