* Mail document
* -------------
* A mail document is associated with a particular email message file
- * on disk. It is indexed with the following prefixed terms:
+ * on disk. It is indexed with the following prefixed terms which the
+ * database uses to construct threads, etc.:
*
* Single terms of given prefix:
*
*
* thread: The ID of the thread to which the mail belongs
*
+ * replyto: The ID from the In-Reply-To header of the mail (if any).
+ *
* Multiple terms of given prefix:
*
- * ref: All unresolved message IDs from In-Reply-To and
- * References headers in the message. (Once a referenced
- * message is added to the database and the thread IDs
- * are linked the corresponding "ref" term is dropped
- * from the message document.)
+ * reference: All message IDs from In-Reply-To and Re ferences
+ * headers in the message.
*
- * tag: Any tags associated with this message by the user.
+ * tag: Any tags associated with this message by the user.
*
* A mail document also has two values:
*
*
* MESSAGE_ID: The unique ID of the mail mess (see "id" above)
*
+ * In addition, terms from the content of the message are added with
+ * "from", "to", "attachment", and "subject" prefixes for use by the
+ * user in searching. But the database doesn't really care itself
+ * about any of these.
+ *
* Timestamp document
* ------------------
* A timestamp document is used by a client of the notmuch library to
prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
{ "type", "T" },
- { "ref", "XREFERENCE" },
+ { "reference", "XREFERENCE" },
{ "replyto", "XREPLYTO" },
{ "timestamp", "XTIMESTAMP" },
};
* Returns a newly talloc'ed string belonging to 'ctx'.
*
* Returns NULL if there is any error parsing the message-id. */
-char *
+static char *
_parse_message_id (void *ctx, const char *message_id, const char **next)
{
const char *s, *end;
}
/* Parse a References header value, putting a (talloc'ed under 'ctx')
- * copy of each referenced message-id into 'hash'. */
+ * copy of each referenced message-id into 'hash'.
+ *
+ * We explicitly avoid including any reference identical to
+ * 'message_id' in the result (to avoid mass confusion when a single
+ * message references itself cyclically---and yes, mail messages are
+ * not infrequent in the wild that do this---don't ask me why).
+*/
static void
parse_references (void *ctx,
+ const char *message_id,
GHashTable *hash,
const char *refs)
{
while (*refs) {
ref = _parse_message_id (ctx, refs, &refs);
- if (ref)
+ if (ref && strcmp (ref, message_id))
g_hash_table_insert (hash, ref, NULL);
}
}
const char **thread_id)
{
GHashTable *parents = NULL;
- const char *refs, *in_reply_to;
+ const char *refs, *in_reply_to, *in_reply_to_message_id;
GList *l, *keys = NULL;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
_my_talloc_free_for_g_hash, NULL);
refs = notmuch_message_file_get_header (message_file, "references");
- parse_references (message, parents, refs);
+ parse_references (message, notmuch_message_get_message_id (message),
+ parents, refs);
in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
- parse_references (message, parents, in_reply_to);
- _notmuch_message_add_term (message, "replyto",
- _parse_message_id (message, in_reply_to, NULL));
+ parse_references (message, notmuch_message_get_message_id (message),
+ parents, in_reply_to);
+
+ /* Carefully avoid adding any self-referential in-reply-to term. */
+ in_reply_to_message_id = _parse_message_id (message, in_reply_to, NULL);
+ if (in_reply_to_message_id &&
+ strcmp (in_reply_to_message_id,
+ notmuch_message_get_message_id (message)))
+ {
+ _notmuch_message_add_term (message, "replyto",
+ _parse_message_id (message, in_reply_to, NULL));
+ }
keys = g_hash_table_get_keys (parents);
for (l = keys; l; l = l->next) {
parent_message_id);
if (parent_thread_id == NULL) {
- _notmuch_message_add_term (message, "ref", parent_message_id);
+ _notmuch_message_add_term (message, "reference",
+ parent_message_id);
} else {
if (*thread_id == NULL) {
*thread_id = talloc_strdup (message, parent_thread_id);
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
notmuch_private_status_t private_status;
- find_doc_ids (notmuch, "ref", message_id, &child, &children_end);
+ find_doc_ids (notmuch, "reference", message_id, &child, &children_end);
for ( ; child != children_end; child++) {
*thread_id = talloc_strdup (message, child_thread_id);
_notmuch_message_add_term (message, "thread", *thread_id);
} else if (strcmp (*thread_id, child_thread_id)) {
- _notmuch_message_remove_term (child_message, "ref",
+ _notmuch_message_remove_term (child_message, "reference",
message_id);
_notmuch_message_sync (child_message);
ret = _merge_threads (notmuch, *thread_id, child_thread_id);
/* Given a (mostly empty) 'message' and its corresponding
* 'message_file' link it to existing threads in the database.
*
- * We first looke at 'message_file' and its link-relevant headers
+ * We first look at 'message_file' and its link-relevant headers
* (References and In-Reply-To) for message IDs. We also look in the
* database for existing message that reference 'message'.p
*
* (which may or may not reference an existing document in the
* database). */
- /* Use NULL for owner since we want to free this locally. */
- message = _notmuch_message_create_for_message_id (NULL,
- notmuch,
+ message = _notmuch_message_create_for_message_id (notmuch,
message_id,
&private_status);