2 * Copyright © 2009 Carl Worth
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see http://www.gnu.org/licenses/ .
17 * Author: Carl Worth <cworth@cworth.org>
28 #include <gmime/gmime.h>
34 #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
36 /* These prefix values are specifically chosen to be compatible
37 * with sup, (http://sup.rubyforge.org), written by
38 * William Morgan <wmorgan-sup@masanjin.net>, and released
39 * under the GNU GPL v2.
47 prefix_t NORMAL_PREFIX[] = {
50 { "from_name", "FN" },
56 prefix_t BOOLEAN_PREFIX[] = {
58 { "from_email", "FE" },
64 { "attachment_extension", "O" },
70 /* Similarly, these value numbers are also chosen to be sup
74 NOTMUCH_VALUE_MESSAGE_ID = 0,
75 NOTMUCH_VALUE_THREAD = 1,
76 NOTMUCH_VALUE_DATE = 2
80 find_prefix (const char *name)
84 for (i = 0; i < ARRAY_SIZE (NORMAL_PREFIX); i++)
85 if (strcmp (name, NORMAL_PREFIX[i].name) == 0)
86 return NORMAL_PREFIX[i].prefix;
88 for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX); i++)
89 if (strcmp (name, BOOLEAN_PREFIX[i].name) == 0)
90 return BOOLEAN_PREFIX[i].prefix;
95 int TERM_COMBINED = 0;
98 add_term (Xapian::Document doc,
99 const char *prefix_name,
108 prefix = find_prefix (prefix_name);
110 term = g_strdup_printf ("%s%s", prefix, value);
118 gen_terms (Xapian::TermGenerator term_gen,
119 const char *prefix_name,
127 prefix = find_prefix (prefix_name);
129 term_gen.index_text (text, 1, prefix);
133 gen_terms_address_name (Xapian::TermGenerator term_gen,
134 InternetAddress *address,
135 const char *prefix_name)
139 name = internet_address_get_name (address);
142 gen_terms (term_gen, prefix_name, name);
146 gen_terms_address_names (Xapian::TermGenerator term_gen,
147 InternetAddressList *addresses,
148 const char *address_type)
151 InternetAddress *address;
153 for (i = 0; i < internet_address_list_length (addresses); i++) {
154 address = internet_address_list_get_address (addresses, i);
155 gen_terms_address_name (term_gen, address, address_type);
156 gen_terms_address_name (term_gen, address, "name");
157 gen_terms_address_name (term_gen, address, "body");
162 add_term_address_addr (Xapian::Document doc,
163 InternetAddress *address,
164 const char *prefix_name)
166 InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address);
169 addr = internet_address_mailbox_get_addr (mailbox);
172 add_term (doc, prefix_name, addr);
176 add_terms_address_addrs (Xapian::Document doc,
177 InternetAddressList *addresses,
178 const char *address_type)
181 InternetAddress *address;
183 for (i = 0; i < internet_address_list_length (addresses); i++) {
184 address = internet_address_list_get_address (addresses, i);
185 add_term_address_addr (doc, address, address_type);
186 add_term_address_addr (doc, address, "email");
190 /* Generate terms for the body of a message, given the filename of the
191 * message and the offset at which the headers of the message end,
192 * (and hence the body begins). */
194 gen_terms_body (Xapian::TermGenerator term_gen,
195 const char * filename,
199 GIOStatus gio_status;
200 GError *error = NULL;
201 char *body_line = NULL;
203 channel = g_io_channel_new_file (filename, "r", &error);
204 if (channel == NULL) {
205 fprintf (stderr, "Error: %s\n", error->message);
209 gio_status = g_io_channel_seek_position (channel, body_offset,
211 if (gio_status != G_IO_STATUS_NORMAL) {
212 fprintf (stderr, "Error: %s\n", error->message);
220 gio_status = g_io_channel_read_line (channel, &body_line,
222 if (gio_status == G_IO_STATUS_EOF)
224 if (gio_status != G_IO_STATUS_NORMAL) {
225 fprintf (stderr, "Error: %s\n", error->message);
229 if (body_line[0] == '>')
232 if (strncmp (body_line, "-- ", 3) == 0)
235 gen_terms (term_gen, "body", body_line);
241 g_io_channel_close (channel);
246 main (int argc, char **argv)
250 GMimeMessage *message;
251 InternetAddressList *addresses;
253 const char *database_path, *filename;
256 const char *value, *from;
259 struct tm gm_time_tm;
260 char date_str[16]; /* YYYYMMDDHHMMSS + 1 for Y100k compatibility ;-) */
263 fprintf (stderr, "Usage: %s <path-to-xapian-database> <mail-message>\n",
268 database_path = argv[1];
271 file = fopen (filename, "r");
273 fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
279 stream = g_mime_stream_file_new (file);
281 parser = g_mime_parser_new_with_stream (stream);
283 message = g_mime_parser_construct_message (parser);
286 Xapian::WritableDatabase db;
287 Xapian::TermGenerator term_gen;
288 Xapian::Document doc;
290 doc = Xapian::Document ();
292 doc.set_data (filename);
294 db = Xapian::WritableDatabase (database_path,
295 Xapian::DB_CREATE_OR_OPEN);
297 term_gen = Xapian::TermGenerator ();
298 term_gen.set_stemmer (Xapian::Stem ("english"));
300 term_gen.set_document (doc);
302 from = g_mime_message_get_sender (message);
303 addresses = internet_address_list_parse_string (from);
305 gen_terms_address_names (term_gen, addresses, "from_name");
307 addresses = g_mime_message_get_all_recipients (message);
308 gen_terms_address_names (term_gen, addresses, "to_name");
310 value = g_mime_message_get_subject (message);
311 gen_terms (term_gen, "subject", value);
312 gen_terms (term_gen, "body", value);
314 gen_terms_body (term_gen, filename,
315 g_mime_parser_get_headers_end (parser));
317 from = g_mime_message_get_sender (message);
318 addresses = internet_address_list_parse_string (from);
320 add_terms_address_addrs (doc, addresses, "from_email");
322 add_terms_address_addrs (doc,
323 g_mime_message_get_all_recipients (message),
326 g_mime_message_get_date (message, &time, NULL);
328 gmtime_r (&time, &gm_time_tm);
330 if (strftime (date_str, sizeof (date_str),
331 "%Y%m%d%H%M%S", &gm_time_tm) == 0) {
332 fprintf (stderr, "Internal error formatting time\n");
336 add_term (doc, "date", date_str);
338 add_term (doc, "label", "inbox");
339 add_term (doc, "label", "unread");
340 add_term (doc, "type", "mail");
342 value = g_mime_message_get_message_id (message);
343 add_term (doc, "msgid", value);
345 add_term (doc, "source_id", "1");
347 add_term (doc, "thread", value);
349 doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, value);
350 doc.add_value (NOTMUCH_VALUE_THREAD, value);
352 doc.add_value (NOTMUCH_VALUE_DATE, Xapian::sortable_serialise (time));
354 db.add_document (doc);
356 } catch (const Xapian::Error &error) {
357 cerr << "A Xapian exception occurred: " << error.get_msg () << endl;
361 g_object_unref (message);
362 g_object_unref (parser);
363 g_object_unref (stream);