1 /* database.cc - The database interfaces of the notmuch mail library
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
21 #include "database-private.h"
27 #include <glib.h> /* g_strdup_printf, g_free, GPtrArray, GHashTable */
31 #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
38 /* Here's the current schema for our database:
40 * We currently have two different types of documents: mail and timestamps.
44 * A mail document is associated with a particular email message file
45 * on disk. It is indexed with the following prefixed terms:
47 * Single terms of given prefix:
51 * id: Unique ID of mail, (from Message-ID header or generated
52 * as "notmuch-sha1-<sha1_sum_of_entire_file>.
54 * Multiple terms of given prefix:
56 * ref: The message IDs from all In-Reply-To and References
57 * headers in the message.
59 * tag: Any tags associated with this message by the user.
61 * thread: The thread ID of all threads to which the mail belongs
63 * A mail document also has two values:
65 * TIMESTAMP: The time_t value corresponding to the message's
68 * MESSAGE_ID: The unique ID of the mail mess (see "id" above)
72 * A timestamp document is used by a client of the notmuch library to
73 * maintain data necessary to allow for efficient polling of mail
74 * directories. The notmuch library does no interpretation of
75 * timestamps, but merely allows the user to store and retrieve
76 * timestamps as name/value pairs.
78 * The timestamp document is indexed with a single prefixed term:
80 * timestamp: The user's key value (likely a directory name)
82 * and has a single value:
84 * TIMETAMPS: The time_t value from the user.
87 /* With these prefix values we follow the conventions published here:
89 * http://xapian.org/docs/omega/termprefixes.html
91 * as much as makes sense. Note that I took some liberty in matching
92 * the reserved prefix values to notmuch concepts, (for example, 'G'
93 * is documented as "newsGroup (or similar entity - e.g. a web forum
94 * name)", for which I think the thread is the closest analogue in
95 * notmuch. This in spite of the fact that we will eventually be
96 * storing mailing-list messages where 'G' for "mailing list name"
97 * might be even a closer analogue. I'm treating the single-character
98 * prefixes preferentially for core notmuch concepts (which will be
99 * nearly universal to all mail messages).
102 prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
105 { "ref", "XREFERENCE" },
106 { "timestamp", "XTIMESTAMP" },
109 prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
115 _find_prefix (const char *name)
119 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_INTERNAL); i++)
120 if (strcmp (name, BOOLEAN_PREFIX_INTERNAL[i].name) == 0)
121 return BOOLEAN_PREFIX_INTERNAL[i].prefix;
123 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++)
124 if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0)
125 return BOOLEAN_PREFIX_EXTERNAL[i].prefix;
127 fprintf (stderr, "Internal error: No prefix exists for '%s'\n", name);
134 notmuch_status_to_string (notmuch_status_t status)
137 case NOTMUCH_STATUS_SUCCESS:
138 return "No error occurred";
139 case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
140 return "A Xapian exception occurred";
141 case NOTMUCH_STATUS_FILE_ERROR:
142 return "Something went wrong trying to read or write a file";
143 case NOTMUCH_STATUS_FILE_NOT_EMAIL:
144 return "File is not an email";
145 case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
146 return "Message ID is identical to a message in database";
147 case NOTMUCH_STATUS_NULL_POINTER:
148 return "Erroneous NULL pointer";
149 case NOTMUCH_STATUS_TAG_TOO_LONG:
150 return "Tag value is too long (exceeds NOTMUCH_TAG_MAX)";
152 case NOTMUCH_STATUS_LAST_STATUS:
153 return "Unknown error status value";
157 /* XXX: We should drop this function and convert all callers to call
158 * _notmuch_message_add_term instead. */
160 add_term (Xapian::Document doc,
161 const char *prefix_name,
170 prefix = _find_prefix (prefix_name);
172 term = g_strdup_printf ("%s%s", prefix, value);
174 if (strlen (term) <= NOTMUCH_TERM_MAX)
181 find_doc_ids (notmuch_database_t *notmuch,
182 const char *prefix_name,
184 Xapian::PostingIterator *begin,
185 Xapian::PostingIterator *end)
187 Xapian::PostingIterator i;
190 term = g_strdup_printf ("%s%s", _find_prefix (prefix_name), value);
192 *begin = notmuch->xapian_db->postlist_begin (term);
194 *end = notmuch->xapian_db->postlist_end (term);
199 static notmuch_private_status_t
200 find_unique_doc_id (notmuch_database_t *notmuch,
201 const char *prefix_name,
203 unsigned int *doc_id)
205 Xapian::PostingIterator i, end;
207 find_doc_ids (notmuch, prefix_name, value, &i, &end);
211 return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND;
214 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
218 static Xapian::Document
219 find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
221 return notmuch->xapian_db->get_document (doc_id);
224 static notmuch_private_status_t
225 find_unique_document (notmuch_database_t *notmuch,
226 const char *prefix_name,
228 Xapian::Document *document,
229 unsigned int *doc_id)
231 notmuch_private_status_t status;
233 status = find_unique_doc_id (notmuch, prefix_name, value, doc_id);
236 *document = Xapian::Document ();
240 *document = find_document_for_doc_id (notmuch, *doc_id);
241 return NOTMUCH_PRIVATE_STATUS_SUCCESS;
244 /* XXX: Should rewrite this to accept a notmuch_message_t* instead of
245 * a Xapian:Document and then we could just use
246 * notmuch_message_get_thread_ids instead of duplicating its logic
249 insert_thread_id (GHashTable *thread_ids, Xapian::Document doc)
252 Xapian::TermIterator i;
253 const char *prefix_str = _find_prefix ("thread");
256 assert (strlen (prefix_str) == 1);
258 prefix = *prefix_str;
260 i = doc.termlist_begin ();
261 i.skip_to (prefix_str);
264 if (i == doc.termlist_end ())
267 if (value_string.empty () || value_string[0] != prefix)
269 g_hash_table_insert (thread_ids,
270 strdup (value_string.c_str () + 1), NULL);
276 notmuch_database_find_message (notmuch_database_t *notmuch,
277 const char *message_id)
279 notmuch_private_status_t status;
282 status = find_unique_doc_id (notmuch, "id", message_id, &doc_id);
284 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
287 return _notmuch_message_create (notmuch, notmuch, doc_id);
290 /* Return one or more thread_ids, (as a GPtrArray of strings), for the
291 * given message based on looking into the database for any messages
292 * referenced in parents, and also for any messages in the database
293 * referencing message_id.
295 * Caller should free all strings in the array and the array itself,
296 * (g_ptr_array_free) when done. */
298 find_thread_ids (notmuch_database_t *notmuch,
300 const char *message_id)
302 Xapian::PostingIterator child, children_end;
303 Xapian::Document doc;
304 GHashTable *thread_ids;
307 const char *parent_message_id;
310 thread_ids = g_hash_table_new_full (g_str_hash, g_str_equal,
313 find_doc_ids (notmuch, "ref", message_id, &child, &children_end);
314 for ( ; child != children_end; child++) {
315 doc = find_document_for_doc_id (notmuch, *child);
316 insert_thread_id (thread_ids, doc);
319 for (i = 0; i < parents->len; i++) {
320 notmuch_message_t *parent;
321 notmuch_thread_ids_t *ids;
323 parent_message_id = (char *) g_ptr_array_index (parents, i);
324 parent = notmuch_database_find_message (notmuch, parent_message_id);
328 for (ids = notmuch_message_get_thread_ids (parent);
329 notmuch_thread_ids_has_more (ids);
330 notmuch_thread_ids_advance (ids))
334 id = notmuch_thread_ids_get (ids);
335 g_hash_table_insert (thread_ids, strdup (id), NULL);
338 notmuch_message_destroy (parent);
341 result = g_ptr_array_new ();
343 keys = g_hash_table_get_keys (thread_ids);
344 for (l = keys; l; l = l->next) {
345 char *id = (char *) l->data;
346 g_ptr_array_add (result, id);
350 /* We're done with the hash table, but we've taken the pointers to
351 * the allocated strings and put them into our result array, so
352 * tell the hash not to free them on its way out. */
353 g_hash_table_steal_all (thread_ids);
354 g_hash_table_unref (thread_ids);
359 /* Advance 'str' past any whitespace or RFC 822 comments. A comment is
360 * a (potentially nested) parenthesized sequence with '\' used to
361 * escape any character (including parentheses).
363 * If the sequence to be skipped continues to the end of the string,
364 * then 'str' will be left pointing at the final terminating '\0'
368 skip_space_and_comments (const char **str)
373 while (*s && (isspace (*s) || *s == '(')) {
374 while (*s && isspace (*s))
379 while (*s && nesting) {
395 /* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
396 * comments, and the '<' and '>' delimeters.
398 * If not NULL, then *next will be made to point to the first character
399 * not parsed, (possibly pointing to the final '\0' terminator.
401 * Returns a newly allocated string which the caller should free()
404 * Returns NULL if there is any error parsing the message-id. */
406 parse_message_id (const char *message_id, const char **next)
411 if (message_id == NULL)
416 skip_space_and_comments (&s);
418 /* Skip any unstructured text as well. */
419 while (*s && *s != '<')
430 skip_space_and_comments (&s);
433 while (*end && *end != '>')
442 if (end > s && *end == '>')
447 result = strndup (s, end - s + 1);
449 /* Finally, collapse any whitespace that is within the message-id
455 for (r = result, len = strlen (r); *r; r++, len--)
456 if (*r == ' ' || *r == '\t')
457 memmove (r, r+1, len);
463 /* Parse a References header value, putting a copy of each referenced
464 * message-id into 'array'. */
466 parse_references (GPtrArray *array,
475 ref = parse_message_id (refs, &refs);
478 g_ptr_array_add (array, ref);
483 notmuch_database_default_path (void)
485 if (getenv ("NOTMUCH_BASE"))
486 return strdup (getenv ("NOTMUCH_BASE"));
488 return g_strdup_printf ("%s/mail", getenv ("HOME"));
492 notmuch_database_create (const char *path)
494 notmuch_database_t *notmuch = NULL;
495 char *notmuch_path = NULL;
498 char *local_path = NULL;
501 path = local_path = notmuch_database_default_path ();
503 err = stat (path, &st);
505 fprintf (stderr, "Error: Cannot create database at %s: %s.\n",
506 path, strerror (errno));
510 if (! S_ISDIR (st.st_mode)) {
511 fprintf (stderr, "Error: Cannot create database at %s: Not a directory.\n",
516 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
518 err = mkdir (notmuch_path, 0755);
521 fprintf (stderr, "Error: Cannot create directory %s: %s.\n",
522 notmuch_path, strerror (errno));
526 notmuch = notmuch_database_open (path);
538 notmuch_database_open (const char *path)
540 notmuch_database_t *notmuch = NULL;
541 char *notmuch_path = NULL, *xapian_path = NULL;
544 char *local_path = NULL;
548 path = local_path = notmuch_database_default_path ();
550 notmuch_path = g_strdup_printf ("%s/%s", path, ".notmuch");
552 err = stat (notmuch_path, &st);
554 fprintf (stderr, "Error opening database at %s: %s\n",
555 notmuch_path, strerror (errno));
559 xapian_path = g_strdup_printf ("%s/%s", notmuch_path, "xapian");
561 notmuch = talloc (NULL, notmuch_database_t);
562 notmuch->path = talloc_strdup (notmuch, path);
565 notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
566 Xapian::DB_CREATE_OR_OPEN);
567 notmuch->query_parser = new Xapian::QueryParser;
568 notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
569 notmuch->query_parser->set_database (*notmuch->xapian_db);
571 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
572 prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i];
573 notmuch->query_parser->add_boolean_prefix (prefix->name,
576 } catch (const Xapian::Error &error) {
577 fprintf (stderr, "A Xapian exception occurred: %s\n",
578 error.get_msg().c_str());
593 notmuch_database_close (notmuch_database_t *notmuch)
595 delete notmuch->query_parser;
596 delete notmuch->xapian_db;
597 talloc_free (notmuch);
601 notmuch_database_get_path (notmuch_database_t *notmuch)
603 return notmuch->path;
606 notmuch_private_status_t
607 find_timestamp_document (notmuch_database_t *notmuch, const char *db_key,
608 Xapian::Document *doc, unsigned int *doc_id)
610 return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id);
613 /* We allow the user to use arbitrarily long keys for timestamps,
614 * (they're for filesystem paths after all, which have no limit we
615 * know about). But we have a term-length limit. So if we exceed that,
616 * we'll use the SHA-1 of the user's key as the actual key for
617 * constructing a database term.
619 * Caution: This function returns a newly allocated string which the
620 * caller should free() when finished.
623 timestamp_db_key (const char *key)
625 int term_len = strlen (_find_prefix ("timestamp")) + strlen (key);
627 if (term_len > NOTMUCH_TERM_MAX)
628 return notmuch_sha1_of_string (key);
634 notmuch_database_set_timestamp (notmuch_database_t *notmuch,
635 const char *key, time_t timestamp)
637 Xapian::Document doc;
639 notmuch_private_status_t status;
640 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
643 db_key = timestamp_db_key (key);
646 status = find_timestamp_document (notmuch, db_key, &doc, &doc_id);
648 doc.add_value (NOTMUCH_VALUE_TIMESTAMP,
649 Xapian::sortable_serialise (timestamp));
651 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
652 char *term = talloc_asprintf (NULL, "%s%s",
653 _find_prefix ("timestamp"), db_key);
657 notmuch->xapian_db->add_document (doc);
659 notmuch->xapian_db->replace_document (doc_id, doc);
662 } catch (Xapian::Error &error) {
663 fprintf (stderr, "A Xapian exception occurred: %s.\n",
664 error.get_msg().c_str());
665 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
675 notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key)
677 Xapian::Document doc;
679 notmuch_private_status_t status;
683 db_key = timestamp_db_key (key);
686 status = find_timestamp_document (notmuch, db_key, &doc, &doc_id);
688 if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
691 ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP));
692 } catch (Xapian::Error &error) {
704 notmuch_database_add_message (notmuch_database_t *notmuch,
705 const char *filename)
707 notmuch_message_file_t *message_file;
708 notmuch_message_t *message;
709 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
711 GPtrArray *parents, *thread_ids;
713 const char *refs, *in_reply_to, *date, *header;
714 const char *from, *to, *subject, *old_filename;
719 message_file = notmuch_message_file_open (filename);
720 if (message_file == NULL) {
721 ret = NOTMUCH_STATUS_FILE_ERROR;
725 notmuch_message_file_restrict_headers (message_file,
736 /* The first order of business is to find/create a message ID. */
738 header = notmuch_message_file_get_header (message_file, "message-id");
740 message_id = parse_message_id (header, NULL);
741 /* So the header value isn't RFC-compliant, but it's
742 * better than no message-id at all. */
743 if (message_id == NULL)
744 message_id = xstrdup (header);
746 /* No message-id at all, let's generate one by taking a
747 * hash over the file's contents. */
748 char *sha1 = notmuch_sha1_of_file (filename);
750 /* If that failed too, something is really wrong. Give up. */
752 ret = NOTMUCH_STATUS_FILE_ERROR;
756 message_id = g_strdup_printf ("notmuch-sha1-%s", sha1);
760 /* Now that we have a message ID, we get a message object,
761 * (which may or may not reference an existing document in the
764 /* Use NULL for owner since we want to free this locally. */
766 /* XXX: This call can fail by either out-of-memory or an
767 * "impossible" Xapian exception. We should rewrite it to
768 * allow us to propagate the error status. */
769 message = _notmuch_message_create_for_message_id (NULL, notmuch,
771 if (message == NULL) {
772 fprintf (stderr, "Internal error. This shouldn't happen.\n\n");
773 fprintf (stderr, "I mean, it's possible you ran out of memory, but then this code path is still an internal error since it should have detected that and propagated the status value up the stack.\n");
777 /* Has a message previously been added with the same ID? */
778 old_filename = notmuch_message_get_filename (message);
779 if (old_filename && strlen (old_filename)) {
780 ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
783 _notmuch_message_set_filename (message, filename);
784 _notmuch_message_add_term (message, "type", "mail");
787 /* Next, find the thread(s) to which this message belongs. */
788 parents = g_ptr_array_new ();
790 refs = notmuch_message_file_get_header (message_file, "references");
791 parse_references (parents, refs);
793 in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
794 parse_references (parents, in_reply_to);
796 for (i = 0; i < parents->len; i++)
797 _notmuch_message_add_term (message, "ref",
798 (char *) g_ptr_array_index (parents, i));
800 thread_ids = find_thread_ids (notmuch, parents, message_id);
804 for (i = 0; i < parents->len; i++)
805 g_free (g_ptr_array_index (parents, i));
806 g_ptr_array_free (parents, TRUE);
808 if (thread_ids->len) {
813 for (i = 0; i < thread_ids->len; i++) {
814 id = (char *) thread_ids->pdata[i];
815 _notmuch_message_add_thread_id (message, id);
817 thread_id = g_string_new (id);
819 g_string_append_printf (thread_id, ",%s", id);
823 g_string_free (thread_id, TRUE);
825 _notmuch_message_ensure_thread_id (message);
828 g_ptr_array_free (thread_ids, TRUE);
830 date = notmuch_message_file_get_header (message_file, "date");
831 _notmuch_message_set_date (message, date);
833 from = notmuch_message_file_get_header (message_file, "from");
834 subject = notmuch_message_file_get_header (message_file, "subject");
835 to = notmuch_message_file_get_header (message_file, "to");
841 ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
844 _notmuch_message_sync (message);
846 } catch (const Xapian::Error &error) {
847 fprintf (stderr, "A Xapian exception occurred: %s.\n",
848 error.get_msg().c_str());
849 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
855 notmuch_message_destroy (message);
857 notmuch_message_file_close (message_file);