* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program. If not, see http://www.gnu.org/licenses/ .
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
*
* Author: Carl Worth <cworth@cworth.org>
*/
#include <glib.h> /* GHashTable */
-typedef struct {
- char *str;
- size_t size;
- size_t len;
-} header_value_closure_t;
-
struct _notmuch_message_file {
- /* File object */
- FILE *file;
+ /* open stream to (possibly gzipped) file */
+ GMimeStream *stream;
+ char *filename;
- /* Header storage */
- int restrict_headers;
+ /* Cache for decoded headers */
GHashTable *headers;
- int broken_headers;
- int good_headers;
- size_t header_size; /* Length of full message header in bytes. */
-
- /* Parsing state */
- char *line;
- size_t line_size;
- header_value_closure_t value;
- int parsing_started;
- int parsing_finished;
+ GMimeMessage *message;
};
-static int
-strcase_equal (const void *a, const void *b)
-{
- return strcasecmp (a, b) == 0;
-}
-
-static unsigned int
-strcase_hash (const void *ptr)
-{
- const char *s = ptr;
-
- /* This is the djb2 hash. */
- unsigned int hash = 5381;
- while (s && *s) {
- hash = ((hash << 5) + hash) + tolower (*s);
- s++;
- }
-
- return hash;
-}
-
static int
_notmuch_message_file_destructor (notmuch_message_file_t *message)
{
- if (message->line)
- free (message->line);
-
- if (message->value.size)
- free (message->value.str);
-
if (message->headers)
g_hash_table_destroy (message->headers);
- if (message->file)
- fclose (message->file);
+ if (message->message)
+ g_object_unref (message->message);
+
+ if (message->stream)
+ g_object_unref (message->stream);
return 0;
}
/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
* the talloc owner. */
notmuch_message_file_t *
-_notmuch_message_file_open_ctx (void *ctx, const char *filename)
+_notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
+ void *ctx, const char *filename)
{
notmuch_message_file_t *message;
if (unlikely (message == NULL))
return NULL;
- talloc_set_destructor (message, _notmuch_message_file_destructor);
+ const char *prefix = notmuch_config_get (notmuch, NOTMUCH_CONFIG_MAIL_ROOT);
- message->file = fopen (filename, "r");
- if (message->file == NULL)
+ if (prefix == NULL)
goto FAIL;
- message->headers = g_hash_table_new_full (strcase_hash,
- strcase_equal,
- free,
- free);
+ if (*filename == '/') {
+ if (strncmp (filename, prefix, strlen (prefix)) != 0) {
+ _notmuch_database_log (notmuch, "Error opening %s: path outside mail root\n",
+ filename);
+ errno = 0;
+ goto FAIL;
+ }
+ message->filename = talloc_strdup (message, filename);
+ } else {
+ message->filename = talloc_asprintf (message, "%s/%s", prefix, filename);
+ }
- message->parsing_started = 0;
- message->parsing_finished = 0;
+ if (message->filename == NULL)
+ goto FAIL;
+
+ talloc_set_destructor (message, _notmuch_message_file_destructor);
+
+ message->stream = g_mime_stream_gzfile_open (message->filename);
+ if (message->stream == NULL)
+ goto FAIL;
return message;
FAIL:
- fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
- notmuch_message_file_close (message);
+ if (errno)
+ _notmuch_database_log (notmuch, "Error opening %s: %s\n",
+ filename, strerror (errno));
+ _notmuch_message_file_close (message);
return NULL;
}
notmuch_message_file_t *
-notmuch_message_file_open (const char *filename)
+_notmuch_message_file_open (notmuch_database_t *notmuch,
+ const char *filename)
{
- return _notmuch_message_file_open_ctx (NULL, filename);
+ return _notmuch_message_file_open_ctx (notmuch, NULL, filename);
+}
+
+const char *
+_notmuch_message_file_get_filename (notmuch_message_file_t *message_file)
+{
+ return message_file->filename;
}
void
-notmuch_message_file_close (notmuch_message_file_t *message)
+_notmuch_message_file_close (notmuch_message_file_t *message)
{
talloc_free (message);
}
-void
-notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
- va_list va_headers)
+static bool
+_is_mbox (GMimeStream *stream)
{
- char *header;
+ char from_buf[5];
+ bool ret = false;
- if (message->parsing_started)
- INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
+ /* Is this mbox? */
+ if (g_mime_stream_read (stream, from_buf, sizeof (from_buf)) == sizeof (from_buf) &&
+ strncmp (from_buf, "From ", 5) == 0)
+ ret = true;
- while (1) {
- header = va_arg (va_headers, char*);
- if (header == NULL)
- break;
- g_hash_table_insert (message->headers,
- xstrdup (header), NULL);
- }
+ g_mime_stream_reset (stream);
- message->restrict_headers = 1;
+ return ret;
}
-void
-notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
+notmuch_status_t
+_notmuch_message_file_parse (notmuch_message_file_t *message)
{
- va_list va_headers;
+ GMimeParser *parser;
+ notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+ bool is_mbox;
- va_start (va_headers, message);
+ if (message->message)
+ return NOTMUCH_STATUS_SUCCESS;
- notmuch_message_file_restrict_headersv (message, va_headers);
-}
+ is_mbox = _is_mbox (message->stream);
-static void
-copy_header_unfolding (header_value_closure_t *value,
- const char *chunk)
-{
- char *last;
+ _notmuch_init ();
- if (chunk == NULL)
- return;
+ message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
+ free, g_free);
+ if (! message->headers)
+ return NOTMUCH_STATUS_OUT_OF_MEMORY;
- while (*chunk == ' ' || *chunk == '\t')
- chunk++;
+ parser = g_mime_parser_new_with_stream (message->stream);
+ g_mime_parser_set_scan_from (parser, is_mbox);
- if (value->len + 1 + strlen (chunk) + 1 > value->size) {
- unsigned int new_size = value->size;
- if (value->size == 0)
- new_size = strlen (chunk) + 1;
- else
- while (value->len + 1 + strlen (chunk) + 1 > new_size)
- new_size *= 2;
- value->str = xrealloc (value->str, new_size);
- value->size = new_size;
+ message->message = g_mime_parser_construct_message (parser, NULL);
+ if (! message->message) {
+ status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
+ goto DONE;
}
- last = value->str + value->len;
- if (value->len) {
- *last = ' ';
- last++;
- value->len++;
+ if (is_mbox && ! g_mime_parser_eos (parser)) {
+ /*
+ * This is a multi-message mbox. (For historical reasons, we
+ * do support single-message mboxes.)
+ */
+ status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
}
- strcpy (last, chunk);
- value->len += strlen (chunk);
+ DONE:
+ g_mime_stream_reset (message->stream);
+ g_object_unref (parser);
+
+ if (status) {
+ g_hash_table_destroy (message->headers);
+ message->headers = NULL;
+
+ if (message->message) {
+ g_object_unref (message->message);
+ message->message = NULL;
+ }
- last = value->str + value->len - 1;
- if (*last == '\n') {
- *last = '\0';
- value->len--;
}
+
+ return status;
}
-/* As a special-case, a value of NULL for header_desired will force
- * the entire header to be parsed if it is not parsed already. This is
- * used by the _notmuch_message_file_get_headers_end function. */
-const char *
-notmuch_message_file_get_header (notmuch_message_file_t *message,
- const char *header_desired)
+notmuch_status_t
+_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
+ GMimeMessage **mime_message)
{
- int contains;
- char *header, *decoded_value;
- const char *s, *colon;
- int match;
- static int initialized = 0;
-
- if (! initialized) {
- g_mime_init (0);
- initialized = 1;
- }
+ notmuch_status_t status;
- message->parsing_started = 1;
-
- if (header_desired == NULL)
- contains = 0;
- else
- contains = g_hash_table_lookup_extended (message->headers,
- header_desired, NULL,
- (gpointer *) &decoded_value);
-
- if (contains && decoded_value)
- return decoded_value;
-
- if (message->parsing_finished)
- return "";
-
-#define NEXT_HEADER_LINE(closure) \
- while (1) { \
- ssize_t bytes_read = getline (&message->line, \
- &message->line_size, \
- message->file); \
- if (bytes_read == -1) { \
- message->parsing_finished = 1; \
- break; \
- } \
- if (*message->line == '\n') { \
- message->parsing_finished = 1; \
- break; \
- } \
- if (closure && \
- (*message->line == ' ' || *message->line == '\t')) \
- { \
- copy_header_unfolding ((closure), message->line); \
- } \
- if (*message->line == ' ' || *message->line == '\t') \
- message->header_size += strlen (message->line); \
- else \
- break; \
- }
+ status = _notmuch_message_file_parse (message);
+ if (status)
+ return status;
- if (message->line == NULL)
- NEXT_HEADER_LINE (NULL);
+ *mime_message = message->message;
- while (1) {
+ return NOTMUCH_STATUS_SUCCESS;
+}
- if (message->parsing_finished)
- break;
+/*
+ * Get all instances of a header decoded and concatenated.
+ *
+ * The result must be freed using g_free().
+ *
+ * Return NULL on errors, empty string for non-existing headers.
+ */
- colon = strchr (message->line, ':');
+static char *
+_extend_header (char *combined, const char *value)
+{
+ char *decoded;
- if (colon == NULL) {
- message->broken_headers++;
- /* A simple heuristic for giving up on things that just
- * don't look like mail messages. */
- if (message->broken_headers >= 10 &&
- message->good_headers < 5)
- {
- message->parsing_finished = 1;
- continue;
- }
- NEXT_HEADER_LINE (NULL);
- continue;
+ decoded = g_mime_utils_header_decode_text (NULL, value);
+ if (! decoded) {
+ if (combined) {
+ g_free (combined);
+ combined = NULL;
+ }
+ goto DONE;
+ }
+
+ if (combined) {
+ char *tmp = g_strdup_printf ("%s %s", combined, decoded);
+ g_free (decoded);
+ g_free (combined);
+ if (! tmp) {
+ combined = NULL;
+ goto DONE;
}
- message->header_size += strlen (message->line);
+ combined = tmp;
+ } else {
+ combined = decoded;
+ }
+ DONE:
+ return combined;
+}
+
+static char *
+_notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
+ const char *header)
+{
+ char *combined = NULL;
+ GMimeHeaderList *headers;
+
+ headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
+ if (! headers)
+ return NULL;
- message->good_headers++;
- header = xstrndup (message->line, colon - message->line);
+ for (int i = 0; i < g_mime_header_list_get_count (headers); i++) {
+ const char *value;
+ GMimeHeader *g_header = g_mime_header_list_get_header_at (headers, i);
- if (message->restrict_headers &&
- ! g_hash_table_lookup_extended (message->headers,
- header, NULL, NULL))
- {
- free (header);
- NEXT_HEADER_LINE (NULL);
+ if (strcasecmp (g_mime_header_get_name (g_header), header) != 0)
continue;
- }
- s = colon + 1;
- while (*s == ' ' || *s == '\t')
- s++;
+ /* GMime retains ownership of value, we hope */
+ value = g_mime_header_get_value (g_header);
- message->value.len = 0;
- copy_header_unfolding (&message->value, s);
+ combined = _extend_header (combined, value);
+ }
- NEXT_HEADER_LINE (&message->value);
+ /* Return empty string for non-existing headers. */
+ if (! combined)
+ combined = g_strdup ("");
- if (header_desired == 0)
- match = 0;
- else
- match = (strcasecmp (header, header_desired) == 0);
+ return combined;
+}
- decoded_value = g_mime_utils_header_decode_text (message->value.str);
+const char *
+_notmuch_message_file_get_header (notmuch_message_file_t *message,
+ const char *header)
+{
+ const char *value;
+ char *decoded;
- g_hash_table_insert (message->headers, header, decoded_value);
+ if (_notmuch_message_file_parse (message))
+ return NULL;
- if (match)
- return decoded_value;
+ /* If we have a cached decoded value, use it. */
+ value = g_hash_table_lookup (message->headers, header);
+ if (value)
+ return value;
+
+ if (strcasecmp (header, "received") == 0) {
+ /*
+ * The Received: header is special. We concatenate all
+ * instances of the header as we use this when analyzing the
+ * path the mail has taken from sender to recipient.
+ */
+ decoded = _notmuch_message_file_get_combined_header (message, header);
+ } else {
+ value = g_mime_object_get_header (GMIME_OBJECT (message->message),
+ header);
+ if (value)
+ decoded = g_mime_utils_header_decode_text (NULL, value);
+ else
+ decoded = g_strdup ("");
}
- if (message->line)
- free (message->line);
- message->line = NULL;
+ if (! decoded)
+ return NULL;
+
+ /* Cache the decoded value. We also own the strings. */
+ g_hash_table_insert (message->headers, xstrdup (header), decoded);
+
+ return decoded;
+}
- if (message->value.size) {
- free (message->value.str);
- message->value.str = NULL;
- message->value.size = 0;
- message->value.len = 0;
+notmuch_status_t
+_notmuch_message_file_get_headers (notmuch_message_file_t *message_file,
+ const char **from_out,
+ const char **subject_out,
+ const char **to_out,
+ const char **date_out,
+ char **message_id_out)
+{
+ notmuch_status_t ret;
+ const char *header;
+ const char *from, *to, *subject, *date;
+ char *message_id = NULL;
+
+ /* Parse message up front to get better error status. */
+ ret = _notmuch_message_file_parse (message_file);
+ if (ret)
+ goto DONE;
+
+ /* Before we do any real work, (especially before doing a
+ * potential SHA-1 computation on the entire file's contents),
+ * let's make sure that what we're looking at looks like an
+ * actual email message.
+ */
+ from = _notmuch_message_file_get_header (message_file, "from");
+ subject = _notmuch_message_file_get_header (message_file, "subject");
+ to = _notmuch_message_file_get_header (message_file, "to");
+ date = _notmuch_message_file_get_header (message_file, "date");
+
+ if ((from == NULL || *from == '\0') &&
+ (subject == NULL || *subject == '\0') &&
+ (to == NULL || *to == '\0')) {
+ ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
+ goto DONE;
}
- /* We've parsed all headers and never found the one we're looking
- * for. It's probably just not there, but let's check that we
- * didn't make a mistake preventing us from seeing it. */
- if (message->restrict_headers && header_desired &&
- ! g_hash_table_lookup_extended (message->headers,
- header_desired, NULL, NULL))
- {
- INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
- "included in call to notmuch_message_file_restrict_headers\n",
- header_desired);
+ /* Now that we're sure it's mail, the first order of business
+ * is to find a message ID (or else create one ourselves).
+ */
+ header = _notmuch_message_file_get_header (message_file, "message-id");
+ if (header && *header != '\0') {
+ message_id = _notmuch_message_id_parse (message_file, header, NULL);
+
+ /* So the header value isn't RFC-compliant, but it's
+ * better than no message-id at all.
+ */
+ if (message_id == NULL)
+ message_id = talloc_strdup (message_file, header);
}
- return "";
+ if (message_id == NULL ) {
+ /* No message-id at all, let's generate one by taking a
+ * hash over the file's contents.
+ */
+ char *sha1 = _notmuch_sha1_of_file (_notmuch_message_file_get_filename (message_file));
+
+ /* If that failed too, something is really wrong. Give up. */
+ if (sha1 == NULL) {
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1);
+ free (sha1);
+ }
+ DONE:
+ if (ret == NOTMUCH_STATUS_SUCCESS) {
+ if (from_out)
+ *from_out = from;
+ if (subject_out)
+ *subject_out = subject;
+ if (to_out)
+ *to_out = to;
+ if (date_out)
+ *date_out = date;
+ if (message_id_out)
+ *message_id_out = message_id;
+ }
+ return ret;
}