1 /* message.c - Utility functions for parsing an email message for notmuch.
3 * Copyright © 2009 Carl Worth
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/ .
18 * Author: Carl Worth <cworth@cworth.org>
23 #include "notmuch-private.h"
25 #include <glib.h> /* GHashTable */
31 } header_value_closure_t;
33 struct _notmuch_message_file {
46 header_value_closure_t value;
53 strcase_equal (const void *a, const void *b)
55 return strcasecmp (a, b) == 0;
59 strcase_hash (const void *ptr)
63 /* This is the djb2 hash. */
64 unsigned int hash = 5381;
66 hash = ((hash << 5) + hash) + tolower (*s);
74 _notmuch_message_file_destructor (notmuch_message_file_t *message)
79 if (message->value.size)
80 free (message->value.str);
83 g_hash_table_destroy (message->headers);
86 fclose (message->file);
91 /* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
92 * the talloc owner. */
93 notmuch_message_file_t *
94 _notmuch_message_file_open_ctx (void *ctx, const char *filename)
96 notmuch_message_file_t *message;
98 message = talloc_zero (ctx, notmuch_message_file_t);
99 if (unlikely (message == NULL))
102 talloc_set_destructor (message, _notmuch_message_file_destructor);
104 message->file = fopen (filename, "r");
105 if (message->file == NULL)
108 message->headers = g_hash_table_new_full (strcase_hash,
113 message->parsing_started = 0;
114 message->parsing_finished = 0;
119 fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
120 notmuch_message_file_close (message);
125 notmuch_message_file_t *
126 notmuch_message_file_open (const char *filename)
128 return _notmuch_message_file_open_ctx (NULL, filename);
132 notmuch_message_file_close (notmuch_message_file_t *message)
134 talloc_free (message);
138 notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
143 if (message->parsing_started)
144 INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
147 header = va_arg (va_headers, char*);
150 g_hash_table_insert (message->headers,
151 xstrdup (header), NULL);
154 message->restrict_headers = 1;
158 notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
162 va_start (va_headers, message);
164 notmuch_message_file_restrict_headersv (message, va_headers);
168 copy_header_unfolding (header_value_closure_t *value,
176 while (*chunk == ' ' || *chunk == '\t')
179 if (value->len + 1 + strlen (chunk) + 1 > value->size) {
180 unsigned int new_size = value->size;
181 if (value->size == 0)
182 new_size = strlen (chunk) + 1;
184 while (value->len + 1 + strlen (chunk) + 1 > new_size)
186 value->str = xrealloc (value->str, new_size);
187 value->size = new_size;
190 last = value->str + value->len;
197 strcpy (last, chunk);
198 value->len += strlen (chunk);
200 last = value->str + value->len - 1;
208 notmuch_message_file_get_header (notmuch_message_file_t *message,
209 const char *header_desired)
212 char *header, *value;
213 const char *s, *colon;
216 message->parsing_started = 1;
218 contains = g_hash_table_lookup_extended (message->headers,
219 header_desired, NULL,
220 (gpointer *) &value);
221 if (contains && value)
224 if (message->parsing_finished)
227 #define NEXT_HEADER_LINE(closure) \
229 ssize_t bytes_read = getline (&message->line, \
230 &message->line_size, \
232 if (bytes_read == -1) { \
233 message->parsing_finished = 1; \
236 if (*message->line == '\n') { \
237 message->parsing_finished = 1; \
241 (*message->line == ' ' || *message->line == '\t')) \
243 copy_header_unfolding ((closure), message->line); \
245 } while (*message->line == ' ' || *message->line == '\t');
247 if (message->line == NULL)
248 NEXT_HEADER_LINE (NULL);
252 if (message->parsing_finished)
255 colon = strchr (message->line, ':');
258 message->broken_headers++;
259 /* A simple heuristic for giving up on things that just
260 * don't look like mail messages. */
261 if (message->broken_headers >= 10 &&
262 message->good_headers < 5)
264 message->parsing_finished = 1;
267 NEXT_HEADER_LINE (NULL);
271 message->good_headers++;
273 header = xstrndup (message->line, colon - message->line);
275 if (message->restrict_headers &&
276 ! g_hash_table_lookup_extended (message->headers,
280 NEXT_HEADER_LINE (NULL);
285 while (*s == ' ' || *s == '\t')
288 message->value.len = 0;
289 copy_header_unfolding (&message->value, s);
291 NEXT_HEADER_LINE (&message->value);
293 match = (strcasecmp (header, header_desired) == 0);
295 value = xstrdup (message->value.str);
297 g_hash_table_insert (message->headers, header, value);
304 free (message->line);
305 message->line = NULL;
307 if (message->value.size) {
308 free (message->value.str);
309 message->value.str = NULL;
310 message->value.size = 0;
311 message->value.len = 0;
314 /* We've parsed all headers and never found the one we're looking
315 * for. It's probably just not there, but let's check that we
316 * didn't make a mistake preventing us from seeing it. */
317 if (message->restrict_headers &&
318 ! g_hash_table_lookup_extended (message->headers,
319 header_desired, NULL, NULL))
321 INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
322 "included in call to notmuch_message_file_restrict_headers\n",