#include <sys/time.h>
#include <signal.h>
-#include <xapian.h>
#include <glib.h> /* g_free, GPtrArray, GHashTable */
/* Here's the current schema for our database (for NOTMUCH_DATABASE_VERSION):
*
- * We currently have two different types of documents: mail and directory.
+ * We currently have two different types of documents (mail and
+ * directory) and also some metadata.
*
* Mail document
* -------------
*
* type: mail
*
- * id: Unique ID of mail, (from Message-ID header or generated
- * as "notmuch-sha1-<sha1_sum_of_entire_file>.
+ * id: Unique ID of mail. This is from the Message-ID header
+ * if present and not too long (see NOTMUCH_MESSAGE_ID_MAX).
+ * If it's present and too long, then we use
+ * "notmuch-sha1-<sha1_sum_of_message_id>".
+ * If this header is not present, we use
+ * "notmuch-sha1-<sha1_sum_of_entire_file>".
*
* thread: The ID of the thread to which the mail belongs
*
*
* Multiple terms of given prefix:
*
- * reference: All message IDs from In-Reply-To and Re ferences
+ * reference: All message IDs from In-Reply-To and References
* headers in the message.
*
* tag: Any tags associated with this message by the user.
*
* In addition, terms from the content of the message are added with
* "from", "to", "attachment", and "subject" prefixes for use by the
- * user in searching. But the database doesn't really care itself
- * about any of these.
+ * user in searching. Similarly, terms from the path of the mail
+ * message are added with a "folder" prefix. But the database doesn't
+ * really care itself about any of these.
*
* The data portion of a mail document is empty.
*
*
* The data portion of a directory document contains the path of the
* directory (relative to the database path).
+ *
+ * Database metadata
+ * -----------------
+ * Xapian allows us to store arbitrary name-value pairs as
+ * "metadata". We currently use the following metadata names with the
+ * given meanings:
+ *
+ * version The database schema version, (which is distinct
+ * from both the notmuch package version (see
+ * notmuch --version) and the libnotmuch library
+ * version. The version is stored as an base-10
+ * ASCII integer. The initial database version
+ * was 1, (though a schema existed before that
+ * were no "version" database value existed at
+ * all). Successive versions are allocated as
+ * changes are made to the database (such as by
+ * indexing new fields).
+ *
+ * last_thread_id The last thread ID generated. This is stored
+ * as a 16-byte hexadecimal ASCII representation
+ * of a 64-bit unsigned integer. The first ID
+ * generated is 1 and the value will be
+ * incremented for each thread ID.
+ *
+ * thread_id_* A pre-allocated thread ID for a particular
+ * message. This is actually an arbitrarily large
+ * family of metadata name. Any particular name is
+ * formed by concatenating "thread_id_" with a message
+ * ID (or the SHA1 sum of a message ID if it is very
+ * long---see description of 'id' in the mail
+ * document). The value stored is a thread ID.
+ *
+ * These thread ID metadata values are stored
+ * whenever a message references a parent message
+ * that does not yet exist in the database. A
+ * thread ID will be allocated and stored, and if
+ * the message is later added, the stored thread
+ * ID will be used (and the metadata value will
+ * be cleared).
+ *
+ * Even before a message is added, it's
+ * pre-allocated thread ID is useful so that all
+ * descendant messages that reference this common
+ * parent can be recognized as belonging to the
+ * same thread.
*/
/* With these prefix values we follow the conventions published here:
* nearly universal to all mail messages).
*/
-prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
+static prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
{ "type", "T" },
{ "reference", "XREFERENCE" },
{ "replyto", "XREPLYTO" },
{ "directory-direntry", "XDDIRENTRY" },
};
-prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
+static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
{ "thread", "G" },
{ "tag", "K" },
{ "is", "K" },
{ "id", "Q" }
};
-prefix_t PROBABILISTIC_PREFIX[]= {
+static prefix_t PROBABILISTIC_PREFIX[]= {
{ "from", "XFROM" },
{ "to", "XTO" },
{ "attachment", "XATTACHMENT" },
- { "subject", "XSUBJECT"}
+ { "subject", "XSUBJECT"},
+ { "folder", "XFOLDER"}
};
int
return notmuch->xapian_db->get_document (doc_id);
}
+/* Generate a compressed version of 'message_id' of the form:
+ *
+ * notmuch-sha1-<sha1_sum_of_message_id>
+ */
+static char *
+_message_id_compressed (void *ctx, const char *message_id)
+{
+ char *sha1, *compressed;
+
+ sha1 = notmuch_sha1_of_string (message_id);
+
+ compressed = talloc_asprintf (ctx, "notmuch-sha1-%s", sha1);
+ free (sha1);
+
+ return compressed;
+}
+
notmuch_message_t *
notmuch_database_find_message (notmuch_database_t *notmuch,
const char *message_id)
notmuch_private_status_t status;
unsigned int doc_id;
- status = _notmuch_database_find_unique_doc_id (notmuch, "id",
- message_id, &doc_id);
+ if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
+ message_id = _message_id_compressed (notmuch, message_id);
- if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
- return NULL;
+ try {
+ status = _notmuch_database_find_unique_doc_id (notmuch, "id",
+ message_id, &doc_id);
+
+ if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND)
+ return NULL;
- return _notmuch_message_create (notmuch, notmuch, doc_id, NULL);
+ return _notmuch_message_create (notmuch, notmuch, doc_id, NULL);
+ } catch (const Xapian::Error &error) {
+ fprintf (stderr, "A Xapian exception occurred finding message: %s.\n",
+ error.get_msg().c_str());
+ notmuch->exception_reported = TRUE;
+ return NULL;
+ }
}
/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
}
/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
- * comments, and the '<' and '>' delimeters.
+ * comments, and the '<' and '>' delimiters.
*
* If not NULL, then *next will be made to point to the first character
* not parsed, (possibly pointing to the final '\0' terminator.
}
}
+ notmuch->last_doc_id = notmuch->xapian_db->get_lastdocid ();
last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id");
if (last_thread_id.empty ()) {
notmuch->last_thread_id = 0;
/* Given a legal 'path' for the database, return the relative path.
*
- * The return value will be a pointer to the originl path contents,
+ * The return value will be a pointer to the original path contents,
* and will be either the original string (if 'path' was relative) or
* a portion of the string (if path was absolute and begins with the
* database path).
{
notmuch_status_t status;
- return _notmuch_directory_create (notmuch, path, &status);
+ try {
+ return _notmuch_directory_create (notmuch, path, &status);
+ } catch (const Xapian::Error &error) {
+ fprintf (stderr, "A Xapian exception occurred getting directory: %s.\n",
+ error.get_msg().c_str());
+ notmuch->exception_reported = TRUE;
+ return NULL;
+ }
+}
+
+/* Allocate a document ID that satisfies the following criteria:
+ *
+ * 1. The ID does not exist for any document in the Xapian database
+ *
+ * 2. The ID was not previously returned from this function
+ *
+ * 3. The ID is the smallest integer satisfying (1) and (2)
+ *
+ * This function will trigger an internal error if these constraints
+ * cannot all be satisfied, (that is, the pool of available document
+ * IDs has been exhausted).
+ */
+unsigned int
+_notmuch_database_generate_doc_id (notmuch_database_t *notmuch)
+{
+ assert (notmuch->last_doc_id >= notmuch->xapian_db->get_lastdocid ());
+
+ notmuch->last_doc_id++;
+
+ if (notmuch->last_doc_id == 0)
+ INTERNAL_ERROR ("Xapian document IDs are exhausted.\n");
+
+ return notmuch->last_doc_id;
+}
+
+static const char *
+_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
+{
+ /* 16 bytes (+ terminator) for hexadecimal representation of
+ * a 64-bit integer. */
+ static char thread_id[17];
+ Xapian::WritableDatabase *db;
+
+ db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+ notmuch->last_thread_id++;
+
+ sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id);
+
+ db->set_metadata ("last_thread_id", thread_id);
+
+ return thread_id;
+}
+
+static char *
+_get_metadata_thread_id_key (void *ctx, const char *message_id)
+{
+ if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
+ message_id = _message_id_compressed (ctx, message_id);
+
+ return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
+ message_id);
}
/* Find the thread ID to which the message with 'message_id' belongs.
*
- * Returns NULL if no message with message ID 'message_id' is in the
- * database.
+ * Always returns a newly talloced string belonging to 'ctx'.
*
- * Otherwise, returns a newly talloced string belonging to 'ctx'.
+ * Note: If there is no message in the database with the given
+ * 'message_id' then a new thread_id will be allocated for this
+ * message and stored in the database metadata, (where this same
+ * thread ID can be looked up if the message is added to the database
+ * later).
*/
static const char *
_resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
const char *message_id)
{
notmuch_message_t *message;
- const char *ret = NULL;
+ string thread_id_string;
+ const char *thread_id;
+ char *metadata_key;
+ Xapian::WritableDatabase *db;
message = notmuch_database_find_message (notmuch, message_id);
- if (message == NULL)
- goto DONE;
- ret = talloc_steal (ctx, notmuch_message_get_thread_id (message));
+ if (message) {
+ thread_id = talloc_steal (ctx, notmuch_message_get_thread_id (message));
- DONE:
- if (message)
notmuch_message_destroy (message);
- return ret;
+ return thread_id;
+ }
+
+ /* Message has not been seen yet.
+ *
+ * We may have seen a reference to it already, in which case, we
+ * can return the thread ID stored in the metadata. Otherwise, we
+ * generate a new thread ID and store it there.
+ */
+ db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+ metadata_key = _get_metadata_thread_id_key (ctx, message_id);
+ thread_id_string = notmuch->xapian_db->get_metadata (metadata_key);
+
+ if (thread_id_string.empty()) {
+ thread_id = _notmuch_database_generate_thread_id (notmuch);
+ db->set_metadata (metadata_key, thread_id);
+ } else {
+ thread_id = thread_id_string.c_str();
+ }
+
+ talloc_free (metadata_key);
+
+ return talloc_strdup (ctx, thread_id);
}
static notmuch_status_t
const char *parent_thread_id;
parent_message_id = (char *) l->data;
+
+ _notmuch_message_add_term (message, "reference",
+ parent_message_id);
+
parent_thread_id = _resolve_message_id_to_thread_id (notmuch,
message,
parent_message_id);
- if (parent_thread_id == NULL) {
- _notmuch_message_add_term (message, "reference",
- parent_message_id);
- } else {
- if (*thread_id == NULL) {
- *thread_id = talloc_strdup (message, parent_thread_id);
- _notmuch_message_add_term (message, "thread", *thread_id);
- } else if (strcmp (*thread_id, parent_thread_id)) {
- ret = _merge_threads (notmuch, *thread_id, parent_thread_id);
- if (ret)
- goto DONE;
- }
+ if (*thread_id == NULL) {
+ *thread_id = talloc_strdup (message, parent_thread_id);
+ _notmuch_message_add_term (message, "thread", *thread_id);
+ } else if (strcmp (*thread_id, parent_thread_id)) {
+ ret = _merge_threads (notmuch, *thread_id, parent_thread_id);
+ if (ret)
+ goto DONE;
}
}
return ret;
}
-static const char *
-_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
-{
- /* 16 bytes (+ terminator) for hexadecimal representation of
- * a 64-bit integer. */
- static char thread_id[17];
- Xapian::WritableDatabase *db;
-
- db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
-
- notmuch->last_thread_id++;
-
- sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id);
-
- db->set_metadata ("last_thread_id", thread_id);
-
- return thread_id;
-}
-
/* Given a (mostly empty) 'message' and its corresponding
* 'message_file' link it to existing threads in the database.
*
- * We first look at 'message_file' and its link-relevant headers
- * (References and In-Reply-To) for message IDs. We also look in the
- * database for existing message that reference 'message'. In either
- * case, we will assign to the current message the first thread_id
+ * The first check is in the metadata of the database to see if we
+ * have pre-allocated a thread_id in advance for this message, (which
+ * would have happened if a message was previously added that
+ * referenced this one).
+ *
+ * Second, we look at 'message_file' and its link-relevant headers
+ * (References and In-Reply-To) for message IDs.
+ *
+ * Finally, we look in the database for existing message that
+ * reference 'message'.
+ *
+ * In all cases, we assign to the current message the first thread_id
* found (through either parent or child). We will also merge any
* existing, distinct threads where this message belongs to both,
- * (which is not uncommon when mesages are processed out of order).
+ * (which is not uncommon when messages are processed out of order).
*
- * Finally, if not thread ID has been found through parent or child,
- * we call _notmuch_message_generate_thread_id to generate a new
- * generates a new thread ID if the message doesn't connect to any
- * existing threads.
+ * Finally, if no thread ID has been found through parent or child, we
+ * call _notmuch_message_generate_thread_id to generate a new thread
+ * ID. This should only happen for new, top-level messages, (no
+ * References or In-Reply-To header in this message, and no previously
+ * added message refers to this message).
*/
static notmuch_status_t
_notmuch_database_link_message (notmuch_database_t *notmuch,
notmuch_message_file_t *message_file)
{
notmuch_status_t status;
- const char *thread_id = NULL;
+ const char *message_id, *thread_id = NULL;
+ char *metadata_key;
+ string stored_id;
+
+ message_id = notmuch_message_get_message_id (message);
+ metadata_key = _get_metadata_thread_id_key (message, message_id);
+
+ /* Check if we have already seen related messages to this one.
+ * If we have then use the thread_id that we stored at that time.
+ */
+ stored_id = notmuch->xapian_db->get_metadata (metadata_key);
+ if (! stored_id.empty()) {
+ Xapian::WritableDatabase *db;
+
+ db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+ /* Clear the metadata for this message ID. We don't need it
+ * anymore. */
+ db->set_metadata (metadata_key, "");
+ thread_id = stored_id.c_str();
+
+ _notmuch_message_add_term (message, "thread", thread_id);
+ }
+ talloc_free (metadata_key);
status = _notmuch_database_link_message_to_parents (notmuch, message,
message_file,
if (message_id == NULL)
message_id = talloc_strdup (message_file, header);
- /* Reject a Message ID that's too long. */
- if (message_id && strlen (message_id) + 1 > NOTMUCH_TERM_MAX) {
+ /* If a message ID is too long, substitute its sha1 instead. */
+ if (message_id && strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) {
+ char *compressed = _message_id_compressed (message_file,
+ message_id);
talloc_free (message_id);
- message_id = NULL;
+ message_id = compressed;
}
}
_notmuch_message_sync (message);
} catch (const Xapian::Error &error) {
fprintf (stderr, "A Xapian exception occurred adding message: %s.\n",
- error.get_description().c_str());
+ error.get_msg().c_str());
notmuch->exception_reported = TRUE;
ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
goto DONE;
DONE:
if (message) {
- if (ret == NOTMUCH_STATUS_SUCCESS && message_ret)
+ if ((ret == NOTMUCH_STATUS_SUCCESS ||
+ ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret)
*message_ret = message;
else
notmuch_message_destroy (message);
const char *filename)
{
Xapian::WritableDatabase *db;
- void *local = talloc_new (notmuch);
+ void *local;
const char *prefix = _find_prefix ("file-direntry");
char *direntry, *term;
Xapian::PostingIterator i, end;
if (status)
return status;
+ local = talloc_new (notmuch);
+
db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
- status = _notmuch_database_filename_to_direntry (local, notmuch,
- filename, &direntry);
- if (status)
- return status;
+ try {
- term = talloc_asprintf (notmuch, "%s%s", prefix, direntry);
+ status = _notmuch_database_filename_to_direntry (local, notmuch,
+ filename, &direntry);
+ if (status)
+ return status;
- find_doc_ids_for_term (notmuch, term, &i, &end);
+ term = talloc_asprintf (local, "%s%s", prefix, direntry);
- for ( ; i != end; i++) {
- Xapian::TermIterator j;
+ find_doc_ids_for_term (notmuch, term, &i, &end);
- document = find_document_for_doc_id (notmuch, *i);
+ for ( ; i != end; i++) {
+ Xapian::TermIterator j;
+ notmuch_message_t *message;
+ notmuch_private_status_t private_status;
- document.remove_term (term);
+ message = _notmuch_message_create (local, notmuch,
+ *i, &private_status);
+ if (message == NULL)
+ return COERCE_STATUS (private_status,
+ "Inconsistent document ID in datbase.");
- j = document.termlist_begin ();
- j.skip_to (prefix);
+ _notmuch_message_remove_filename (message, filename);
+ _notmuch_message_sync (message);
- /* Was this the last file-direntry in the message? */
- if (j == document.termlist_end () ||
- strncmp ((*j).c_str (), prefix, strlen (prefix)))
- {
- db->delete_document (document.get_docid ());
- status = NOTMUCH_STATUS_SUCCESS;
- } else {
- db->replace_document (document.get_docid (), document);
- status = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+ /* Take care to find document after sync'ing filename removal. */
+ document = find_document_for_doc_id (notmuch, *i);
+ j = document.termlist_begin ();
+ j.skip_to (prefix);
+
+ /* Was this the last file-direntry in the message? */
+ if (j == document.termlist_end () ||
+ strncmp ((*j).c_str (), prefix, strlen (prefix)))
+ {
+ db->delete_document (document.get_docid ());
+ status = NOTMUCH_STATUS_SUCCESS;
+ } else {
+ status = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+ }
}
+ } catch (const Xapian::Error &error) {
+ fprintf (stderr, "Error: A Xapian exception occurred removing message: %s\n",
+ error.get_msg().c_str());
+ notmuch->exception_reported = TRUE;
+ status = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
}
talloc_free (local);
return status;
}
-notmuch_tags_t *
-_notmuch_convert_tags (void *ctx, Xapian::TermIterator &i,
- Xapian::TermIterator &end)
+notmuch_string_list_t *
+_notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i,
+ Xapian::TermIterator &end,
+ const char *prefix)
{
- const char *prefix = _find_prefix ("tag");
- notmuch_tags_t *tags;
- std::string tag;
-
- /* Currently this iteration is written with the assumption that
- * "tag" has a single-character prefix. */
- assert (strlen (prefix) == 1);
+ int prefix_len = strlen (prefix);
+ notmuch_string_list_t *list;
- tags = _notmuch_tags_create (ctx);
- if (unlikely (tags == NULL))
+ list = _notmuch_string_list_create (ctx);
+ if (unlikely (list == NULL))
return NULL;
- i.skip_to (prefix);
-
- while (i != end) {
- tag = *i;
-
- if (tag.empty () || tag[0] != *prefix)
+ for (i.skip_to (prefix); i != end; i++) {
+ /* Terminate loop at first term without desired prefix. */
+ if (strncmp ((*i).c_str (), prefix, prefix_len))
break;
- _notmuch_tags_add_tag (tags, tag.c_str () + 1);
-
- i++;
+ _notmuch_string_list_append (list, (*i).c_str () + prefix_len);
}
- _notmuch_tags_prepare_iterator (tags);
-
- return tags;
+ return list;
}
notmuch_tags_t *
notmuch_database_get_all_tags (notmuch_database_t *db)
{
Xapian::TermIterator i, end;
- i = db->xapian_db->allterms_begin();
- end = db->xapian_db->allterms_end();
- return _notmuch_convert_tags(db, i, end);
+ notmuch_string_list_t *tags;
+
+ try {
+ i = db->xapian_db->allterms_begin();
+ end = db->xapian_db->allterms_end();
+ tags = _notmuch_database_get_terms_with_prefix (db, i, end,
+ _find_prefix ("tag"));
+ _notmuch_string_list_sort (tags);
+ return _notmuch_tags_create (db, tags);
+ } catch (const Xapian::Error &error) {
+ fprintf (stderr, "A Xapian exception occurred getting tags: %s.\n",
+ error.get_msg().c_str());
+ db->exception_reported = TRUE;
+ return NULL;
+ }
}