1 /* message.c - Utility functions for parsing an email message for notmuch.
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see https://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
23 #include "notmuch-private.h"
25 #include <gmime/gmime.h>
27 #include <glib.h> /* GHashTable */
29 struct _notmuch_message_file {
34 /* Cache for decoded headers */
37 GMimeMessage *message;
41 _notmuch_message_file_destructor (notmuch_message_file_t *message)
44 g_hash_table_destroy (message->headers);
47 g_object_unref (message->message);
50 fclose (message->file);
55 /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
56 * the talloc owner. */
57 notmuch_message_file_t *
58 _notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
59 void *ctx, const char *filename)
61 notmuch_message_file_t *message;
63 message = talloc_zero (ctx, notmuch_message_file_t);
64 if (unlikely (message == NULL))
67 /* Only needed for error messages during parsing. */
68 message->filename = talloc_strdup (message, filename);
69 if (message->filename == NULL)
72 talloc_set_destructor (message, _notmuch_message_file_destructor);
74 message->file = fopen (filename, "r");
75 if (message->file == NULL)
81 _notmuch_database_log (notmuch, "Error opening %s: %s\n",
82 filename, strerror (errno));
83 _notmuch_message_file_close (message);
88 notmuch_message_file_t *
89 _notmuch_message_file_open (notmuch_database_t *notmuch,
92 return _notmuch_message_file_open_ctx (notmuch, NULL, filename);
96 _notmuch_message_file_get_filename (notmuch_message_file_t *message_file)
98 return message_file->filename;
102 _notmuch_message_file_close (notmuch_message_file_t *message)
104 talloc_free (message);
108 _is_mbox (FILE *file)
114 if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
115 strncmp (from_buf, "From ", 5) == 0)
124 _notmuch_message_file_parse (notmuch_message_file_t *message)
128 notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
129 static int initialized = 0;
132 if (message->message)
133 return NOTMUCH_STATUS_SUCCESS;
135 is_mbox = _is_mbox (message->file);
138 g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
142 message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
144 if (! message->headers)
145 return NOTMUCH_STATUS_OUT_OF_MEMORY;
147 stream = g_mime_stream_file_new (message->file);
149 /* We'll own and fclose the FILE* ourselves. */
150 g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), false);
152 parser = g_mime_parser_new_with_stream (stream);
153 g_mime_parser_set_scan_from (parser, is_mbox);
155 message->message = g_mime_parser_construct_message (parser);
156 if (! message->message) {
157 status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
161 if (is_mbox && ! g_mime_parser_eos (parser)) {
163 * This is a multi-message mbox. (For historical reasons, we
164 * do support single-message mboxes.)
166 status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
170 g_object_unref (stream);
171 g_object_unref (parser);
174 g_hash_table_destroy (message->headers);
175 message->headers = NULL;
177 if (message->message) {
178 g_object_unref (message->message);
179 message->message = NULL;
182 rewind (message->file);
189 _notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
190 GMimeMessage **mime_message)
192 notmuch_status_t status;
194 status = _notmuch_message_file_parse (message);
198 *mime_message = message->message;
200 return NOTMUCH_STATUS_SUCCESS;
204 * Get all instances of a header decoded and concatenated.
206 * The result must be freed using g_free().
208 * Return NULL on errors, empty string for non-existing headers.
212 _extend_header (char *combined, const char *value) {
215 decoded = g_mime_utils_header_decode_text (value);
225 char *tmp = g_strdup_printf ("%s %s", combined, decoded);
241 #if (GMIME_MAJOR_VERSION < 3)
243 _notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
246 GMimeHeaderList *headers;
247 GMimeHeaderIter *iter;
248 char *combined = NULL;
250 headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
254 iter = g_mime_header_iter_new ();
258 if (! g_mime_header_list_get_iter (headers, iter))
263 if (strcasecmp (g_mime_header_iter_get_name (iter), header) != 0)
266 /* Note that GMime retains ownership of value... */
267 value = g_mime_header_iter_get_value (iter);
269 combined = _extend_header (combined, value);
270 } while (g_mime_header_iter_next (iter));
272 /* Return empty string for non-existing headers. */
274 combined = g_strdup ("");
277 g_mime_header_iter_free (iter);
283 _notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
286 char *combined = NULL;
287 GMimeHeaderList *headers;
289 headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
294 for (int i=0; i < g_mime_header_list_get_count (headers); i++) {
296 GMimeHeader *g_header = g_mime_header_list_get_header_at (headers, i);
298 if (strcasecmp (g_mime_header_get_name (g_header), header) != 0)
301 /* GMime retains ownership of value, we hope */
302 value = g_mime_header_get_value (g_header);
304 combined = _extend_header (combined, value);
307 /* Return empty string for non-existing headers. */
309 combined = g_strdup ("");
316 _notmuch_message_file_get_header (notmuch_message_file_t *message,
322 if (_notmuch_message_file_parse (message))
325 /* If we have a cached decoded value, use it. */
326 value = g_hash_table_lookup (message->headers, header);
330 if (strcasecmp (header, "received") == 0) {
332 * The Received: header is special. We concatenate all
333 * instances of the header as we use this when analyzing the
334 * path the mail has taken from sender to recipient.
336 decoded = _notmuch_message_file_get_combined_header (message, header);
338 value = g_mime_object_get_header (GMIME_OBJECT (message->message),
341 decoded = g_mime_utils_header_decode_text (value);
343 decoded = g_strdup ("");
349 /* Cache the decoded value. We also own the strings. */
350 g_hash_table_insert (message->headers, xstrdup (header), decoded);
356 _notmuch_message_file_get_headers (notmuch_message_file_t *message_file,
357 const char **from_out,
358 const char **subject_out,
360 const char **date_out,
361 char **message_id_out)
363 notmuch_status_t ret;
365 const char *from, *to, *subject, *date;
366 char *message_id = NULL;
368 /* Parse message up front to get better error status. */
369 ret = _notmuch_message_file_parse (message_file);
373 /* Before we do any real work, (especially before doing a
374 * potential SHA-1 computation on the entire file's contents),
375 * let's make sure that what we're looking at looks like an
376 * actual email message.
378 from = _notmuch_message_file_get_header (message_file, "from");
379 subject = _notmuch_message_file_get_header (message_file, "subject");
380 to = _notmuch_message_file_get_header (message_file, "to");
381 date = _notmuch_message_file_get_header (message_file, "date");
383 if ((from == NULL || *from == '\0') &&
384 (subject == NULL || *subject == '\0') &&
385 (to == NULL || *to == '\0')) {
386 ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
390 /* Now that we're sure it's mail, the first order of business
391 * is to find a message ID (or else create one ourselves).
393 header = _notmuch_message_file_get_header (message_file, "message-id");
394 if (header && *header != '\0') {
395 message_id = _notmuch_message_id_parse (message_file, header, NULL);
397 /* So the header value isn't RFC-compliant, but it's
398 * better than no message-id at all.
400 if (message_id == NULL)
401 message_id = talloc_strdup (message_file, header);
404 if (message_id == NULL ) {
405 /* No message-id at all, let's generate one by taking a
406 * hash over the file's contents.
408 char *sha1 = _notmuch_sha1_of_file (_notmuch_message_file_get_filename (message_file));
410 /* If that failed too, something is really wrong. Give up. */
412 ret = NOTMUCH_STATUS_FILE_ERROR;
416 message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1);
420 if (ret == NOTMUCH_STATUS_SUCCESS) {
424 *subject_out = subject;
430 *message_id_out = message_id;