1 /* message.c - Utility functions for parsing an email message for notmuch.
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
23 #include "notmuch-private.h"
25 #include <gmime/gmime.h>
27 #include <glib.h> /* GHashTable */
33 } header_value_closure_t;
35 struct _notmuch_message_file {
44 size_t header_size; /* Length of full message header in bytes. */
49 header_value_closure_t value;
56 strcase_equal (const void *a, const void *b)
58 return strcasecmp (a, b) == 0;
62 strcase_hash (const void *ptr)
66 /* This is the djb2 hash. */
67 unsigned int hash = 5381;
69 hash = ((hash << 5) + hash) + tolower (*s);
77 _notmuch_message_file_destructor (notmuch_message_file_t *message)
82 if (message->value.size)
83 free (message->value.str);
86 g_hash_table_destroy (message->headers);
89 fclose (message->file);
94 /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
95 * the talloc owner. */
96 notmuch_message_file_t *
97 _notmuch_message_file_open_ctx (void *ctx, const char *filename)
99 notmuch_message_file_t *message;
101 message = talloc_zero (ctx, notmuch_message_file_t);
102 if (unlikely (message == NULL))
105 talloc_set_destructor (message, _notmuch_message_file_destructor);
107 message->file = fopen (filename, "r");
108 if (message->file == NULL)
111 message->headers = g_hash_table_new_full (strcase_hash,
116 message->parsing_started = 0;
117 message->parsing_finished = 0;
122 fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
123 notmuch_message_file_close (message);
128 notmuch_message_file_t *
129 notmuch_message_file_open (const char *filename)
131 return _notmuch_message_file_open_ctx (NULL, filename);
135 notmuch_message_file_close (notmuch_message_file_t *message)
137 talloc_free (message);
141 notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
146 if (message->parsing_started)
147 INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
150 header = va_arg (va_headers, char*);
153 g_hash_table_insert (message->headers,
154 xstrdup (header), NULL);
157 message->restrict_headers = 1;
161 notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
165 va_start (va_headers, message);
167 notmuch_message_file_restrict_headersv (message, va_headers);
171 copy_header_unfolding (header_value_closure_t *value,
179 while (*chunk == ' ' || *chunk == '\t')
182 if (value->len + 1 + strlen (chunk) + 1 > value->size) {
183 unsigned int new_size = value->size;
184 if (value->size == 0)
185 new_size = strlen (chunk) + 1;
187 while (value->len + 1 + strlen (chunk) + 1 > new_size)
189 value->str = xrealloc (value->str, new_size);
190 value->size = new_size;
193 last = value->str + value->len;
200 strcpy (last, chunk);
201 value->len += strlen (chunk);
203 last = value->str + value->len - 1;
210 /* As a special-case, a value of NULL for header_desired will force
211 * the entire header to be parsed if it is not parsed already. This is
212 * used by the _notmuch_message_file_get_headers_end function.
213 * Another special case is the Received: header. For this header we
214 * want to concatenate all instances of the header instead of just
215 * hashing the first instance as we use this when analyzing the path
216 * the mail has taken from sender to recipient.
219 notmuch_message_file_get_header (notmuch_message_file_t *message,
220 const char *header_desired)
223 char *header, *decoded_value, *header_sofar, *combined_header;
224 const char *s, *colon;
225 int match, newhdr, hdrsofar, is_received;
226 static int initialized = 0;
228 is_received = (strcmp(header_desired,"received") == 0);
235 message->parsing_started = 1;
237 if (header_desired == NULL)
240 contains = g_hash_table_lookup_extended (message->headers,
241 header_desired, NULL,
242 (gpointer *) &decoded_value);
244 if (contains && decoded_value)
245 return decoded_value;
247 if (message->parsing_finished)
250 #define NEXT_HEADER_LINE(closure) \
252 ssize_t bytes_read = getline (&message->line, \
253 &message->line_size, \
255 if (bytes_read == -1) { \
256 message->parsing_finished = 1; \
259 if (*message->line == '\n') { \
260 message->parsing_finished = 1; \
264 (*message->line == ' ' || *message->line == '\t')) \
266 copy_header_unfolding ((closure), message->line); \
268 if (*message->line == ' ' || *message->line == '\t') \
269 message->header_size += strlen (message->line); \
274 if (message->line == NULL)
275 NEXT_HEADER_LINE (NULL);
279 if (message->parsing_finished)
282 colon = strchr (message->line, ':');
285 message->broken_headers++;
286 /* A simple heuristic for giving up on things that just
287 * don't look like mail messages. */
288 if (message->broken_headers >= 10 &&
289 message->good_headers < 5)
291 message->parsing_finished = 1;
294 NEXT_HEADER_LINE (NULL);
298 message->header_size += strlen (message->line);
300 message->good_headers++;
302 header = xstrndup (message->line, colon - message->line);
304 if (message->restrict_headers &&
305 ! g_hash_table_lookup_extended (message->headers,
309 NEXT_HEADER_LINE (NULL);
314 while (*s == ' ' || *s == '\t')
317 message->value.len = 0;
318 copy_header_unfolding (&message->value, s);
320 NEXT_HEADER_LINE (&message->value);
322 if (header_desired == NULL)
325 match = (strcasecmp (header, header_desired) == 0);
327 decoded_value = g_mime_utils_header_decode_text (message->value.str);
328 header_sofar = (char *)g_hash_table_lookup (message->headers, header);
329 /* we treat the Received: header special - we want to concat ALL of
330 * the Received: headers we encounter.
331 * for everything else we return the first instance of a header */
332 if (strcasecmp(header, "received") == 0) {
333 if (header_sofar == NULL) {
334 /* first Received: header we encountered; just add it */
335 g_hash_table_insert (message->headers, header, decoded_value);
337 /* we need to add the header to those we already collected */
338 newhdr = strlen(decoded_value);
339 hdrsofar = strlen(header_sofar);
340 combined_header = xmalloc(hdrsofar + newhdr + 2);
341 strncpy(combined_header,header_sofar,hdrsofar);
342 *(combined_header+hdrsofar) = ' ';
343 strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1);
344 free (decoded_value);
345 g_hash_table_insert (message->headers, header, combined_header);
348 if (header_sofar == NULL) {
349 /* Only insert if we don't have a value for this header, yet. */
350 g_hash_table_insert (message->headers, header, decoded_value);
353 free (decoded_value);
354 decoded_value = header_sofar;
357 /* if we found a match we can bail - unless of course we are
358 * collecting all the Received: headers */
359 if (match && !is_received)
360 return decoded_value;
363 if (message->parsing_finished) {
364 fclose (message->file);
365 message->file = NULL;
369 free (message->line);
370 message->line = NULL;
372 if (message->value.size) {
373 free (message->value.str);
374 message->value.str = NULL;
375 message->value.size = 0;
376 message->value.len = 0;
379 /* For the Received: header we actually might end up here even
380 * though we found the header (as we force continued parsing
381 * in that case). So let's check if that's the header we were
382 * looking for and return the value that we found (if any)
385 return (char *)g_hash_table_lookup (message->headers, "received");
387 /* We've parsed all headers and never found the one we're looking
388 * for. It's probably just not there, but let's check that we
389 * didn't make a mistake preventing us from seeing it. */
390 if (message->restrict_headers && header_desired &&
391 ! g_hash_table_lookup_extended (message->headers,
392 header_desired, NULL, NULL))
394 INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
395 "included in call to notmuch_message_file_restrict_headers\n",