2 * Copyright © 2009 Carl Worth
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see http://www.gnu.org/licenses/ .
17 * Author: Carl Worth <cworth@cworth.org>
28 #include <gmime/gmime.h>
34 #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
36 /* These prefix values are specifically chosen to be compatible
37 * with sup, (http://sup.rubyforge.org), written by
38 * William Morgan <wmorgan-sup@masanjin.net>, and released
39 * under the GNU GPL v2.
47 prefix_t NORMAL_PREFIX[] = {
50 { "from_name", "FN" },
56 prefix_t BOOLEAN_PREFIX[] = {
58 { "from_email", "FE" },
64 { "attachment_extension", "O" },
70 /* Similarly, these value numbers are also chosen to be sup
74 NOTMUCH_VALUE_MESSAGE_ID = 0,
75 NOTMUCH_VALUE_THREAD = 1,
76 NOTMUCH_VALUE_DATE = 2
80 find_prefix (const char *name)
84 for (i = 0; i < ARRAY_SIZE (NORMAL_PREFIX); i++)
85 if (strcmp (name, NORMAL_PREFIX[i].name) == 0)
86 return NORMAL_PREFIX[i].prefix;
88 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX); i++)
89 if (strcmp (name, BOOLEAN_PREFIX[i].name) == 0)
90 return BOOLEAN_PREFIX[i].prefix;
95 int TERM_COMBINED = 0;
98 add_term (Xapian::Document doc,
99 const char *prefix_name,
108 prefix = find_prefix (prefix_name);
110 term = g_strdup_printf ("%s%s", prefix, value);
118 gen_terms (Xapian::TermGenerator term_gen,
119 const char *prefix_name,
127 prefix = find_prefix (prefix_name);
129 term_gen.index_text (text, 1, prefix);
133 gen_terms_address_name (Xapian::TermGenerator term_gen,
134 InternetAddress *address,
135 const char *prefix_name)
139 name = internet_address_get_name (address);
142 gen_terms (term_gen, prefix_name, name);
146 gen_terms_address_names (Xapian::TermGenerator term_gen,
147 InternetAddressList *addresses,
148 const char *address_type)
151 InternetAddress *address;
153 for (i = 0; i < internet_address_list_length (addresses); i++) {
154 address = internet_address_list_get_address (addresses, i);
155 gen_terms_address_name (term_gen, address, address_type);
156 gen_terms_address_name (term_gen, address, "name");
157 gen_terms_address_name (term_gen, address, "body");
162 add_term_address_addr (Xapian::Document doc,
163 InternetAddress *address,
164 const char *prefix_name)
166 InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address);
169 addr = internet_address_mailbox_get_addr (mailbox);
172 add_term (doc, prefix_name, addr);
176 add_terms_address_addrs (Xapian::Document doc,
177 InternetAddressList *addresses,
178 const char *address_type)
181 InternetAddress *address;
183 for (i = 0; i < internet_address_list_length (addresses); i++) {
184 address = internet_address_list_get_address (addresses, i);
185 add_term_address_addr (doc, address, address_type);
186 add_term_address_addr (doc, address, "email");
190 /* Generate terms for the body of a message, given the filename of the
191 * message and the offset at which the headers of the message end,
192 * (and hence the body begins). */
194 gen_terms_body (Xapian::TermGenerator term_gen,
195 const char * filename,
199 GIOStatus gio_status;
200 GError *error = NULL;
201 char *p, *body_line = NULL, *prev_line = NULL;
203 channel = g_io_channel_new_file (filename, "r", &error);
204 if (channel == NULL) {
205 fprintf (stderr, "Error: %s\n", error->message);
209 gio_status = g_io_channel_seek_position (channel, body_offset,
211 if (gio_status != G_IO_STATUS_NORMAL) {
212 fprintf (stderr, "Error: %s\n", error->message);
220 gio_status = g_io_channel_read_line (channel, &body_line,
222 if (gio_status == G_IO_STATUS_EOF)
224 if (gio_status != G_IO_STATUS_NORMAL) {
225 fprintf (stderr, "Error: %s\n", error->message);
229 if (strlen (body_line) == 0)
232 /* If the line looks like it might be introducing a quote,
233 * save it until we see if the next line begins a quote. */
234 p = body_line + strlen (body_line) - 1;
235 while (p > body_line and isspace (*p))
238 prev_line = body_line;
243 /* Skip quoted lines, (and previous lines that introduced them) */
244 if (body_line[0] == '>') {
252 /* Now that we're not looking at a quote we can add the prev_line */
254 gen_terms (term_gen, "body", prev_line);
259 /* Skip signatures */
260 /* XXX: Should only do this if "near" the end of the message. */
261 if (strncmp (body_line, "-- ", 3) == 0)
264 gen_terms (term_gen, "body", body_line);
270 g_io_channel_close (channel);
275 main (int argc, char **argv)
279 GMimeMessage *message;
280 InternetAddressList *addresses;
282 const char *database_path, *filename;
285 const char *value, *from;
288 struct tm gm_time_tm;
289 char date_str[16]; /* YYYYMMDDHHMMSS + 1 for Y100k compatibility ;-) */
292 fprintf (stderr, "Usage: %s <path-to-xapian-database> <mail-message>\n",
297 database_path = argv[1];
300 file = fopen (filename, "r");
302 fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
308 stream = g_mime_stream_file_new (file);
310 parser = g_mime_parser_new_with_stream (stream);
312 message = g_mime_parser_construct_message (parser);
315 Xapian::WritableDatabase db;
316 Xapian::TermGenerator term_gen;
317 Xapian::Document doc;
319 doc = Xapian::Document ();
321 doc.set_data (filename);
323 db = Xapian::WritableDatabase (database_path,
324 Xapian::DB_CREATE_OR_OPEN);
326 term_gen = Xapian::TermGenerator ();
327 term_gen.set_stemmer (Xapian::Stem ("english"));
329 term_gen.set_document (doc);
331 from = g_mime_message_get_sender (message);
332 addresses = internet_address_list_parse_string (from);
334 gen_terms_address_names (term_gen, addresses, "from_name");
336 addresses = g_mime_message_get_all_recipients (message);
337 gen_terms_address_names (term_gen, addresses, "to_name");
339 value = g_mime_message_get_subject (message);
340 gen_terms (term_gen, "subject", value);
341 gen_terms (term_gen, "body", value);
343 gen_terms_body (term_gen, filename,
344 g_mime_parser_get_headers_end (parser));
346 from = g_mime_message_get_sender (message);
347 addresses = internet_address_list_parse_string (from);
349 add_terms_address_addrs (doc, addresses, "from_email");
351 add_terms_address_addrs (doc,
352 g_mime_message_get_all_recipients (message),
355 g_mime_message_get_date (message, &time, NULL);
357 gmtime_r (&time, &gm_time_tm);
359 if (strftime (date_str, sizeof (date_str),
360 "%Y%m%d%H%M%S", &gm_time_tm) == 0) {
361 fprintf (stderr, "Internal error formatting time\n");
365 add_term (doc, "date", date_str);
367 add_term (doc, "label", "inbox");
368 add_term (doc, "label", "unread");
369 add_term (doc, "type", "mail");
371 value = g_mime_message_get_message_id (message);
372 add_term (doc, "msgid", value);
374 add_term (doc, "source_id", "1");
376 add_term (doc, "thread", value);
378 doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, value);
379 doc.add_value (NOTMUCH_VALUE_THREAD, value);
381 doc.add_value (NOTMUCH_VALUE_DATE, Xapian::sortable_serialise (time));
383 db.add_document (doc);
385 } catch (const Xapian::Error &error) {
386 cerr << "A Xapian exception occurred: " << error.get_msg () << endl;
390 g_object_unref (message);
391 g_object_unref (parser);
392 g_object_unref (stream);