1 /* message.c - Utility functions for parsing an email message for notmuch.
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
23 #include "notmuch-private.h"
25 #include <glib.h> /* GHashTable */
31 } header_value_closure_t;
33 struct _notmuch_message_file {
42 size_t header_size; /* Length of full message header in bytes. */
47 header_value_closure_t value;
54 strcase_equal (const void *a, const void *b)
56 return strcasecmp (a, b) == 0;
60 strcase_hash (const void *ptr)
64 /* This is the djb2 hash. */
65 unsigned int hash = 5381;
67 hash = ((hash << 5) + hash) + tolower (*s);
75 _notmuch_message_file_destructor (notmuch_message_file_t *message)
80 if (message->value.size)
81 free (message->value.str);
84 g_hash_table_destroy (message->headers);
87 fclose (message->file);
92 /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
93 * the talloc owner. */
94 notmuch_message_file_t *
95 _notmuch_message_file_open_ctx (void *ctx, const char *filename)
97 notmuch_message_file_t *message;
99 message = talloc_zero (ctx, notmuch_message_file_t);
100 if (unlikely (message == NULL))
103 talloc_set_destructor (message, _notmuch_message_file_destructor);
105 message->file = fopen (filename, "r");
106 if (message->file == NULL)
109 message->headers = g_hash_table_new_full (strcase_hash,
114 message->parsing_started = 0;
115 message->parsing_finished = 0;
120 fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
121 notmuch_message_file_close (message);
126 notmuch_message_file_t *
127 notmuch_message_file_open (const char *filename)
129 return _notmuch_message_file_open_ctx (NULL, filename);
133 notmuch_message_file_close (notmuch_message_file_t *message)
135 talloc_free (message);
139 notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
144 if (message->parsing_started)
145 INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
148 header = va_arg (va_headers, char*);
151 g_hash_table_insert (message->headers,
152 xstrdup (header), NULL);
155 message->restrict_headers = 1;
159 notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
163 va_start (va_headers, message);
165 notmuch_message_file_restrict_headersv (message, va_headers);
169 copy_header_unfolding (header_value_closure_t *value,
177 while (*chunk == ' ' || *chunk == '\t')
180 if (value->len + 1 + strlen (chunk) + 1 > value->size) {
181 unsigned int new_size = value->size;
182 if (value->size == 0)
183 new_size = strlen (chunk) + 1;
185 while (value->len + 1 + strlen (chunk) + 1 > new_size)
187 value->str = xrealloc (value->str, new_size);
188 value->size = new_size;
191 last = value->str + value->len;
198 strcpy (last, chunk);
199 value->len += strlen (chunk);
201 last = value->str + value->len - 1;
208 /* As a special-case, a value of NULL for header_desired will force
209 * the entire header to be parsed if it is not parsed already. This is
210 * used by the _notmuch_message_file_get_headers_end function. */
212 notmuch_message_file_get_header (notmuch_message_file_t *message,
213 const char *header_desired)
216 char *header, *value;
217 const char *s, *colon;
220 message->parsing_started = 1;
222 if (header_desired == NULL)
225 contains = g_hash_table_lookup_extended (message->headers,
226 header_desired, NULL,
227 (gpointer *) &value);
229 if (contains && value)
232 if (message->parsing_finished)
235 #define NEXT_HEADER_LINE(closure) \
237 ssize_t bytes_read = getline (&message->line, \
238 &message->line_size, \
240 if (bytes_read == -1) { \
241 message->parsing_finished = 1; \
244 if (*message->line == '\n') { \
245 message->parsing_finished = 1; \
249 (*message->line == ' ' || *message->line == '\t')) \
251 copy_header_unfolding ((closure), message->line); \
253 if (*message->line == ' ' || *message->line == '\t') \
254 message->header_size += strlen (message->line); \
259 if (message->line == NULL)
260 NEXT_HEADER_LINE (NULL);
264 if (message->parsing_finished)
267 colon = strchr (message->line, ':');
270 message->broken_headers++;
271 /* A simple heuristic for giving up on things that just
272 * don't look like mail messages. */
273 if (message->broken_headers >= 10 &&
274 message->good_headers < 5)
276 message->parsing_finished = 1;
279 NEXT_HEADER_LINE (NULL);
283 message->header_size += strlen (message->line);
285 message->good_headers++;
287 header = xstrndup (message->line, colon - message->line);
289 if (message->restrict_headers &&
290 ! g_hash_table_lookup_extended (message->headers,
294 NEXT_HEADER_LINE (NULL);
299 while (*s == ' ' || *s == '\t')
302 message->value.len = 0;
303 copy_header_unfolding (&message->value, s);
305 NEXT_HEADER_LINE (&message->value);
307 if (header_desired == 0)
310 match = (strcasecmp (header, header_desired) == 0);
312 value = xstrdup (message->value.str);
314 g_hash_table_insert (message->headers, header, value);
321 free (message->line);
322 message->line = NULL;
324 if (message->value.size) {
325 free (message->value.str);
326 message->value.str = NULL;
327 message->value.size = 0;
328 message->value.len = 0;
331 /* We've parsed all headers and never found the one we're looking
332 * for. It's probably just not there, but let's check that we
333 * didn't make a mistake preventing us from seeing it. */
334 if (message->restrict_headers && header_desired &&
335 ! g_hash_table_lookup_extended (message->headers,
336 header_desired, NULL, NULL))
338 INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
339 "included in call to notmuch_message_file_restrict_headers\n",
347 notmuch_message_file_get_header_size (notmuch_message_file_t *message)
349 if (! message->parsing_finished)
350 notmuch_message_file_get_header (message, NULL);
352 if (! message->parsing_finished)
353 INTERNAL_ERROR ("Parsing for NULL header did not force parsing to finish.\n");
355 return message->header_size;
359 notmuch_message_file_get_all_headers (notmuch_message_file_t *message)
361 char *headers = NULL;
362 size_t header_size = notmuch_message_file_get_header_size (message);
364 if (header_size == 0)
367 headers = talloc_size (message, header_size + 1);
368 if (unlikely (headers == NULL))
371 rewind (message->file);
372 if (fread (headers, 1, header_size, message->file) != header_size) {
373 fprintf (stderr, "Error: Short read occurred trying to read message header.\n");
374 talloc_free (headers);
378 headers[header_size] = '\0';