From: Sebastian Poeplau Date: Tue, 7 Aug 2018 12:48:22 +0000 (+0200) Subject: lib: detect mislabeled Windows-1252 parts X-Git-Tag: debian/0.28_rc0-1~38 X-Git-Url: https://git.cworth.org/git?p=notmuch;a=commitdiff_plain;h=0f08bf71666c11c5d4ff675f4013cd741d4ed18e lib: detect mislabeled Windows-1252 parts Use GMime functionality to detect mislabeled messages and apply the correct (Windows) encoding instead. --- diff --git a/notmuch-show.c b/notmuch-show.c index 1072ea55..c3a3783a 100644 --- a/notmuch-show.c +++ b/notmuch-show.c @@ -272,6 +272,7 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out, GMimeContentType *content_type = g_mime_object_get_content_type (GMIME_OBJECT (part)); GMimeStream *stream_filter = NULL; GMimeFilter *crlf_filter = NULL; + GMimeFilter *windows_filter = NULL; GMimeDataWrapper *wrapper; const char *charset; @@ -282,13 +283,37 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out, if (stream_out == NULL) return; + charset = g_mime_object_get_content_type_parameter (part, "charset"); + charset = charset ? g_mime_charset_canon_name (charset) : NULL; + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); + if (wrapper && charset && !g_ascii_strncasecmp (charset, "iso-8859-", 9)) { + GMimeStream *null_stream = NULL; + GMimeStream *null_stream_filter = NULL; + + /* Check for mislabeled Windows encoding */ + null_stream = g_mime_stream_null_new (); + null_stream_filter = g_mime_stream_filter_new (null_stream); + windows_filter = g_mime_filter_windows_new (charset); + g_mime_stream_filter_add(GMIME_STREAM_FILTER (null_stream_filter), + windows_filter); + g_mime_data_wrapper_write_to_stream (wrapper, null_stream_filter); + charset = g_mime_filter_windows_real_charset( + (GMimeFilterWindows *) windows_filter); + + if (null_stream_filter) + g_object_unref (null_stream_filter); + if (null_stream) + g_object_unref (null_stream); + /* Keep a reference to windows_filter in order to prevent the + * charset string from deallocation. */ + } + stream_filter = g_mime_stream_filter_new (stream_out); crlf_filter = g_mime_filter_crlf_new (false, false); g_mime_stream_filter_add(GMIME_STREAM_FILTER (stream_filter), crlf_filter); g_object_unref (crlf_filter); - charset = g_mime_object_get_content_type_parameter (part, "charset"); if (charset) { GMimeFilter *charset_filter; charset_filter = g_mime_filter_charset_new (charset, "UTF-8"); @@ -313,11 +338,12 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out, } } - wrapper = g_mime_part_get_content_object (GMIME_PART (part)); if (wrapper && stream_filter) g_mime_data_wrapper_write_to_stream (wrapper, stream_filter); if (stream_filter) g_object_unref(stream_filter); + if (windows_filter) + g_object_unref (windows_filter); } static const char* diff --git a/test/T300-encoding.sh b/test/T300-encoding.sh index 4a6bfd2f..1e9d2a3d 100755 --- a/test/T300-encoding.sh +++ b/test/T300-encoding.sh @@ -45,7 +45,6 @@ output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize) test_expect_equal "$output" "thread:0000000000000005 2001-01-05 [1/1] Notmuch Test Suite; encodedword withoutspace (inbox unread)" test_begin_subtest "Mislabeled Windows-1252 encoding" -test_subtest_known_broken add_message '[content-type]="text/plain; charset=iso-8859-1"' \ "[body]=$'This text contains \x93Windows-1252\x94 character codes.'" cat < EXPECTED