1 /* date.c - Date-parsing utility for the notmuch mail system.
3 * Copyright © 2000-2009 Jeffrey Stedfast
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see http://www.gnu.org/licenses/
19 /* This code was originally written by from Jeffrey Stedfast
20 * as part of his GMime library (http://spruce.sourceforge.net/gmime/)
22 * Carl Worth <cworth@cworth.org> imported it into notmuch and removed
37 #ifdef HAVE_SYS_PARAM_H
38 #include <sys/param.h> /* for MAXHOSTNAMELEN */
40 #define MAXHOSTNAMELEN 64
42 #ifdef HAVE_UTSNAME_DOMAINNAME
43 #include <sys/utsname.h> /* for uname() */
45 #include <sys/types.h>
47 #include <unistd.h> /* Unix header for getpid() */
53 #define getpid() _getpid()
61 #include "gmime-utils.h"
62 #include "gmime-table-private.h"
63 #include "gmime-parse-utils.h"
64 #include "gmime-part.h"
65 #include "gmime-charset.h"
66 #include "gmime-iconv.h"
67 #include "gmime-iconv-utils.h"
69 #ifdef ENABLE_WARNINGS
73 #endif /* ENABLE_WARNINGS */
79 * SECTION: gmime-utils
81 * @short_description: MIME utility functions
84 * Utility functions to parse, encode and decode various MIME tokens
88 extern gboolean _g_mime_enable_rfc2047_workarounds (void);
90 #define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
92 /* date parser macros */
93 #define NUMERIC_CHARS "1234567890"
94 #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
95 #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
96 #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
97 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
98 #define TIME_CHARS "1234567890:"
100 #define DATE_TOKEN_NON_NUMERIC (1 << 0)
101 #define DATE_TOKEN_NON_WEEKDAY (1 << 1)
102 #define DATE_TOKEN_NON_MONTH (1 << 2)
103 #define DATE_TOKEN_NON_TIME (1 << 3)
104 #define DATE_TOKEN_HAS_COLON (1 << 4)
105 #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
106 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
107 #define DATE_TOKEN_HAS_SIGN (1 << 7)
109 static unsigned char tohex[16] = {
110 '0', '1', '2', '3', '4', '5', '6', '7',
111 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
114 static unsigned char gmime_datetok_table[256] = {
115 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
116 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
117 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
118 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
119 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
120 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
121 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
122 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
123 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
124 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
125 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
126 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
127 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
128 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
129 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
130 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
133 /* hrm, is there a library for this shit? */
140 { "EST", -500 }, /* these are all US timezones. bloody yanks */
155 static char *tm_months[] = {
156 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
157 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
160 static char *tm_days[] = {
161 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
166 * g_mime_utils_header_format_date:
167 * @date: time_t date representation
168 * @tz_offset: Timezone offset
170 * Allocates a string buffer containing the rfc822 formatted date
171 * string represented by @time and @tz_offset.
173 * Returns: a valid string representation of the date.
176 g_mime_utils_header_format_date (time_t date, int tz_offset)
180 date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
182 #if defined (HAVE_GMTIME_R)
183 gmtime_r (&date, &tm);
184 #elif defined (HAVE_GMTIME_S)
185 gmtime_s (&tm, &date);
187 memcpy (&tm, gmtime (&date), sizeof (tm));
190 return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d",
191 tm_days[tm.tm_wday], tm.tm_mday,
192 tm_months[tm.tm_mon],
194 tm.tm_hour, tm.tm_min, tm.tm_sec,
198 /* This is where it gets ugly... */
200 typedef struct _date_token {
201 struct _date_token *next;
207 #define date_token_free(tok) g_slice_free (date_token, tok)
208 #define date_token_new() g_slice_new (date_token)
211 datetok (const char *date)
213 date_token *tokens = NULL, *token, *tail = (date_token *) &tokens;
214 const char *start, *end;
219 /* kill leading whitespace */
220 while (*start == ' ' || *start == '\t')
226 mask = gmime_datetok_table[(unsigned char) *start];
228 /* find the end of this token */
230 while (*end && !strchr ("-/,\t\r\n ", *end))
231 mask |= gmime_datetok_table[(unsigned char) *end++];
234 token = date_token_new ();
236 token->start = start;
237 token->len = end - start;
254 decode_int (const char *in, size_t inlen)
256 register const char *inptr;
257 int sign = 1, val = 0;
266 } else if (*inptr == '+')
269 for ( ; inptr < inend; inptr++) {
270 if (!(*inptr >= '0' && *inptr <= '9'))
273 val = (val * 10) + (*inptr - '0');
283 get_days_in_month (int month, int year)
300 if (g_date_is_leap_year (year))
311 get_wday (const char *in, size_t inlen)
315 g_return_val_if_fail (in != NULL, -1);
320 for (wday = 0; wday < 7; wday++) {
321 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
325 return -1; /* unknown week day */
329 get_mday (const char *in, size_t inlen)
333 g_return_val_if_fail (in != NULL, -1);
335 mday = decode_int (in, inlen);
337 if (mday < 0 || mday > 31)
344 get_month (const char *in, size_t inlen)
348 g_return_val_if_fail (in != NULL, -1);
353 for (i = 0; i < 12; i++) {
354 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
358 return -1; /* unknown month */
362 get_year (const char *in, size_t inlen)
366 g_return_val_if_fail (in != NULL, -1);
368 if ((year = decode_int (in, inlen)) == -1)
372 year += (year < 70) ? 2000 : 1900;
381 get_time (const char *in, size_t inlen, int *hour, int *min, int *sec)
383 register const char *inptr;
384 int *val, colons = 0;
387 *hour = *min = *sec = 0;
391 for (inptr = in; inptr < inend; inptr++) {
404 } else if (!(*inptr >= '0' && *inptr <= '9'))
407 *val = (*val * 10) + (*inptr - '0');
414 get_tzone (date_token **token)
416 const char *inptr, *inend;
420 for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
421 inptr = (*token)->start;
422 inlen = (*token)->len;
423 inend = inptr + inlen;
425 if (*inptr == '+' || *inptr == '-') {
426 return decode_int (inptr, inlen);
430 if (*(inend - 1) == ')')
436 for (t = 0; t < 15; t++) {
437 size_t len = strlen (tz_offsets[t].name);
442 if (!strncmp (inptr, tz_offsets[t].name, len))
443 return tz_offsets[t].offset;
452 mktime_utc (struct tm *tm)
460 #if defined (G_OS_WIN32)
462 if (tm->tm_isdst > 0) {
468 #elif defined (HAVE_TM_GMTOFF)
470 #elif defined (HAVE_TIMEZONE)
471 if (tm->tm_isdst > 0) {
472 #if defined (HAVE_ALTZONE)
474 #else /* !defined (HAVE_ALTZONE) */
475 tz = (timezone - 3600);
480 #elif defined (HAVE__TIMEZONE)
483 #error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
490 parse_rfc822_date (date_token *tokens, int *tzone)
492 int hour, min, sec, offset, n;
497 g_return_val_if_fail (tokens != NULL, (time_t) 0);
501 memset ((void *) &tm, 0, sizeof (struct tm));
503 if ((n = get_wday (token->start, token->len)) != -1) {
504 /* not all dates may have this... */
510 if (!token || (n = get_mday (token->start, token->len)) == -1)
517 if (!token || (n = get_month (token->start, token->len)) == -1)
524 if (!token || (n = get_year (token->start, token->len)) == -1)
527 tm.tm_year = n - 1900;
530 /* get the hour/min/sec */
531 if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
539 /* get the timezone */
540 if (!token || (n = get_tzone (&token)) == -1) {
541 /* I guess we assume tz is GMT? */
547 t = mktime_utc (&tm);
549 /* t is now GMT of the time we want, but not offset by the timezone ... */
551 /* this should convert the time to the GMT equiv time */
552 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
561 #define date_token_mask(t) (((date_token *) t)->mask)
562 #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
563 #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
564 #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
565 #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
566 #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
567 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
568 #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
571 parse_broken_date (date_token *tokens, int *tzone)
573 gboolean got_wday, got_month, got_tzone;
574 int hour, min, sec, offset, n;
579 memset ((void *) &tm, 0, sizeof (struct tm));
580 got_wday = got_month = got_tzone = FALSE;
585 if (is_weekday (token) && !got_wday) {
586 if ((n = get_wday (token->start, token->len)) != -1) {
587 d(printf ("weekday; "));
594 if (is_month (token) && !got_month) {
595 if ((n = get_month (token->start, token->len)) != -1) {
596 d(printf ("month; "));
603 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
604 if (get_time (token->start, token->len, &hour, &min, &sec)) {
605 d(printf ("time; "));
613 if (is_tzone (token) && !got_tzone) {
614 date_token *t = token;
616 if ((n = get_tzone (&t)) != -1) {
617 d(printf ("tzone; "));
624 if (is_numeric (token)) {
625 if (token->len == 4 && !tm.tm_year) {
626 if ((n = get_year (token->start, token->len)) != -1) {
627 d(printf ("year; "));
628 tm.tm_year = n - 1900;
632 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
633 if (!got_month && token->next && is_numeric (token->next)) {
634 if ((n = decode_int (token->start, token->len)) > 12) {
642 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
644 d(printf ("mday; "));
647 } else if (!tm.tm_year) {
648 if ((n = get_year (token->start, token->len)) != -1) {
649 d(printf ("2-digit year; "));
650 tm.tm_year = n - 1900;
666 t = mktime_utc (&tm);
668 /* t is now GMT of the time we want, but not offset by the timezone ... */
670 /* this should convert the time to the GMT equiv time */
671 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
681 gmime_datetok_table_init (void)
685 memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
687 for (i = 0; i < 256; i++) {
688 if (!strchr (NUMERIC_CHARS, i))
689 gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
691 if (!strchr (WEEKDAY_CHARS, i))
692 gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
694 if (!strchr (MONTH_CHARS, i))
695 gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
697 if (!strchr (TIME_CHARS, i))
698 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
700 if (!strchr (TIMEZONE_ALPHA_CHARS, i))
701 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
703 if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
704 gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
706 if (((char) i) == ':')
707 gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
709 if (strchr ("+-", i))
710 gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
713 printf ("static unsigned char gmime_datetok_table[256] = {");
714 for (i = 0; i < 256; i++) {
717 printf ("%3d,", gmime_datetok_table[i]);
725 * g_mime_utils_header_decode_date:
726 * @str: input date string
727 * @tz_offset: timezone offset
729 * Decodes the rfc822 date string and saves the GMT offset into
730 * @tz_offset if non-NULL.
732 * Returns: the time_t representation of the date string specified by
733 * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
734 * of the timezone offset will be stored.
737 g_mime_utils_header_decode_date (const char *str, int *tz_offset)
739 date_token *token, *tokens;
742 if (!(tokens = datetok (str))) {
749 if (!(date = parse_rfc822_date (tokens, tz_offset)))
750 date = parse_broken_date (tokens, tz_offset);
755 tokens = tokens->next;
756 date_token_free (token);
764 * g_mime_utils_generate_message_id:
765 * @fqdn: Fully qualified domain name
767 * Generates a unique Message-Id.
769 * Returns: a unique string in an addr-spec format suitable for use as
773 g_mime_utils_generate_message_id (const char *fqdn)
775 #ifdef G_THREADS_ENABLED
776 static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
777 #define MUTEX_LOCK() g_static_mutex_lock (&mutex)
778 #define MUTEX_UNLOCK() g_static_mutex_unlock (&mutex)
781 #define MUTEX_UNLOCK()
783 static unsigned long int count = 0;
784 const char *hostname = NULL;
789 #ifdef HAVE_UTSNAME_DOMAINNAME
794 hostname = unam.nodename;
796 if (unam.domainname[0])
797 name = g_strdup_printf ("%s.%s", hostname, unam.domainname);
798 #else /* ! HAVE_UTSNAME_DOMAINNAME */
799 char host[MAXHOSTNAMELEN + 1];
801 #ifdef HAVE_GETHOSTNAME
802 host[MAXHOSTNAMELEN] = '\0';
803 if (gethostname (host, MAXHOSTNAMELEN) == 0) {
804 #ifdef HAVE_GETDOMAINNAME
805 size_t domainlen = MAXHOSTNAMELEN;
809 domain = g_malloc (domainlen);
811 while ((rv = getdomainname (domain, domainlen)) == -1 && errno == EINVAL) {
812 domainlen += MAXHOSTNAMELEN;
813 domain = g_realloc (domain, domainlen);
816 if (rv == 0 && domain[0]) {
818 name = g_strdup_printf ("%s.%s", host, domain);
824 #endif /* HAVE_GETDOMAINNAME */
828 #endif /* HAVE_GETHOSTNAME */
830 #endif /* HAVE_UTSNAME_DOMAINNAME */
832 #ifdef HAVE_GETADDRINFO
833 if (!name && hostname[0]) {
834 /* we weren't able to get a domain name */
835 struct addrinfo hints, *res;
837 memset (&hints, 0, sizeof (hints));
838 hints.ai_flags = AI_CANONNAME;
840 if (getaddrinfo (hostname, NULL, &hints, &res) == 0) {
841 name = g_strdup (res->ai_canonname);
845 #endif /* HAVE_GETADDRINFO */
847 fqdn = name != NULL ? name : (hostname[0] ? hostname : "localhost.localdomain");
851 msgid = g_strdup_printf ("%lu.%lu.%lu@%s", (unsigned long int) time (NULL),
852 (unsigned long int) getpid (), count++, fqdn);
861 decode_addrspec (const char **in)
863 const char *word, *inptr;
870 if (!(word = decode_word (&inptr))) {
871 w(g_warning ("No local-part in addr-spec: %s", *in));
875 addrspec = g_string_new ("");
876 g_string_append_len (addrspec, word, (size_t) (inptr - word));
878 /* get the rest of the local-part */
879 decode_lwsp (&inptr);
880 while (*inptr == '.') {
881 g_string_append_c (addrspec, *inptr++);
882 if ((word = decode_word (&inptr))) {
883 g_string_append_len (addrspec, word, (size_t) (inptr - word));
884 decode_lwsp (&inptr);
886 w(g_warning ("Invalid local-part in addr-spec: %s", *in));
891 /* we should be at the '@' now... */
892 if (*inptr++ != '@') {
893 w(g_warning ("Invalid addr-spec; missing '@': %s", *in));
897 g_string_append_c (addrspec, '@');
898 if (!decode_domain (&inptr, addrspec)) {
899 w(g_warning ("No domain in addr-spec: %s", *in));
904 g_string_free (addrspec, FALSE);
912 g_string_free (addrspec, TRUE);
918 decode_msgid (const char **in)
920 const char *inptr = *in;
923 decode_lwsp (&inptr);
925 w(g_warning ("Invalid msg-id; missing '<': %s", *in));
930 decode_lwsp (&inptr);
931 if ((msgid = decode_addrspec (&inptr))) {
932 decode_lwsp (&inptr);
934 w(g_warning ("Invalid msg-id; missing '>': %s", *in));
941 w(g_warning ("Invalid msg-id; missing addr-spec: %s", *in));
943 while (*inptr && *inptr != '>')
946 msgid = g_strndup (*in, (size_t) (inptr - *in));
955 * g_mime_utils_decode_message_id:
956 * @message_id: string containing a message-id
958 * Decodes a msg-id as defined by rfc822.
960 * Returns: the addr-spec portion of the msg-id.
963 g_mime_utils_decode_message_id (const char *message_id)
965 g_return_val_if_fail (message_id != NULL, NULL);
967 return decode_msgid (&message_id);
972 * g_mime_references_decode:
973 * @text: string containing a list of msg-ids
975 * Decodes a list of msg-ids as in the References and/or In-Reply-To
976 * headers defined in rfc822.
978 * Returns: a list of referenced msg-ids.
981 g_mime_references_decode (const char *text)
983 GMimeReferences *refs, *tail, *ref;
984 const char *word, *inptr = text;
987 g_return_val_if_fail (text != NULL, NULL);
990 tail = (GMimeReferences *) &refs;
993 decode_lwsp (&inptr);
995 /* looks like a msg-id */
996 if ((msgid = decode_msgid (&inptr))) {
997 ref = g_new (GMimeReferences, 1);
1003 w(g_warning ("Invalid References header: %s", inptr));
1006 } else if (*inptr) {
1007 /* looks like part of a phrase */
1008 if (!(word = decode_word (&inptr))) {
1009 w(g_warning ("Invalid References header: %s", inptr));
1020 * g_mime_references_append:
1021 * @refs: the address of a #GMimeReferences list
1022 * @msgid: a message-id string
1024 * Appends a reference to msgid to the list of references.
1027 g_mime_references_append (GMimeReferences **refs, const char *msgid)
1029 GMimeReferences *ref;
1031 g_return_if_fail (refs != NULL);
1032 g_return_if_fail (msgid != NULL);
1034 ref = (GMimeReferences *) refs;
1038 ref->next = g_new (GMimeReferences, 1);
1039 ref->next->msgid = g_strdup (msgid);
1040 ref->next->next = NULL;
1045 * g_mime_references_free:
1046 * @refs: a #GMimeReferences list
1048 * Frees the #GMimeReferences list.
1051 g_mime_references_free (GMimeReferences *refs)
1053 GMimeReferences *ref, *next;
1058 g_free (ref->msgid);
1066 * g_mime_references_clear:
1067 * @refs: address of a #GMimeReferences list
1069 * Clears the #GMimeReferences list and resets it to %NULL.
1072 g_mime_references_clear (GMimeReferences **refs)
1074 g_return_if_fail (refs != NULL);
1076 g_mime_references_free (*refs);
1082 * g_mime_references_get_next:
1083 * @ref: a #GMimeReferences list
1085 * Advances to the next reference node in the #GMimeReferences list.
1087 * Returns: the next reference node in the #GMimeReferences list.
1089 const GMimeReferences *
1090 g_mime_references_get_next (const GMimeReferences *ref)
1092 return ref ? ref->next : NULL;
1097 * g_mime_references_get_message_id:
1098 * @ref: a #GMimeReferences list
1100 * Gets the Message-Id reference from the #GMimeReferences node.
1102 * Returns: the Message-Id reference from the #GMimeReferences node.
1105 g_mime_references_get_message_id (const GMimeReferences *ref)
1107 return ref ? ref->msgid : NULL;
1112 is_rfc2047_token (const char *inptr, size_t len)
1114 if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0)
1120 /* skip past the charset */
1121 while (*inptr != '?' && len > 0) {
1126 if (*inptr != '?' || len < 4)
1129 if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B')
1142 header_fold (const char *in, gboolean structured)
1144 gboolean last_was_lwsp = FALSE;
1145 register const char *inptr;
1146 size_t len, outlen, i;
1153 if (len <= GMIME_FOLD_LEN + 1)
1154 return g_strdup (in);
1156 out = g_string_new ("");
1157 fieldlen = strcspn (inptr, ": \t\n");
1158 g_string_append_len (out, inptr, fieldlen);
1162 while (*inptr && *inptr != '\n') {
1163 len = strcspn (inptr, " \t\n");
1165 if (len > 1 && outlen + len > GMIME_FOLD_LEN) {
1166 if (outlen > 1 && out->len > fieldlen + 2) {
1167 if (last_was_lwsp) {
1169 out->str[out->len - 1] = '\t';
1171 g_string_insert_c (out, out->len - 1, '\n');
1173 g_string_append (out, "\n\t");
1177 if (!structured && !is_rfc2047_token (inptr, len)) {
1178 /* check for very long words, just cut them up */
1179 while (outlen + len > GMIME_FOLD_LEN) {
1180 for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
1181 g_string_append_c (out, inptr[i]);
1182 inptr += GMIME_FOLD_LEN - outlen;
1183 len -= GMIME_FOLD_LEN - outlen;
1184 g_string_append (out, "\n\t");
1188 g_string_append_len (out, inptr, len);
1192 last_was_lwsp = FALSE;
1193 } else if (len > 0) {
1194 g_string_append_len (out, inptr, len);
1197 last_was_lwsp = FALSE;
1199 last_was_lwsp = TRUE;
1200 if (*inptr == '\t') {
1201 /* tabs are a good place to fold, odds
1202 are that this is where the previous
1204 g_string_append (out, "\n\t");
1206 while (is_blank (*inptr))
1209 g_string_append_c (out, *inptr++);
1215 if (*inptr == '\n' && out->str[out->len - 1] != '\n')
1216 g_string_append_c (out, '\n');
1219 g_string_free (out, FALSE);
1226 * g_mime_utils_structured_header_fold:
1227 * @str: input string
1229 * Folds a structured header according to the rules in rfc822.
1231 * Returns: an allocated string containing the folded header.
1234 g_mime_utils_structured_header_fold (const char *str)
1236 return header_fold (str, TRUE);
1241 * g_mime_utils_unstructured_header_fold:
1242 * @str: input string
1244 * Folds an unstructured header according to the rules in rfc822.
1246 * Returns: an allocated string containing the folded header.
1249 g_mime_utils_unstructured_header_fold (const char *str)
1251 return header_fold (str, FALSE);
1256 * g_mime_utils_header_fold:
1257 * @str: input string
1259 * Folds a structured header according to the rules in rfc822.
1261 * Returns: an allocated string containing the folded header.
1264 g_mime_utils_header_fold (const char *str)
1266 return header_fold (str, TRUE);
1271 * g_mime_utils_header_printf:
1272 * @format: string format
1273 * @Varargs: arguments
1275 * Allocates a buffer containing a formatted header specified by the
1278 * Returns: an allocated string containing the folded header specified
1279 * by @format and the following arguments.
1282 g_mime_utils_header_printf (const char *format, ...)
1287 va_start (ap, format);
1288 buf = g_strdup_vprintf (format, ap);
1291 ret = header_fold (buf, TRUE);
1298 need_quotes (const char *string)
1300 gboolean quoted = FALSE;
1308 else if (*inptr == '"')
1310 else if (!quoted && (is_tspecial (*inptr) || *inptr == '.'))
1321 * g_mime_utils_quote_string:
1322 * @str: input string
1324 * Quotes @string as needed according to the rules in rfc2045.
1326 * Returns: an allocated string containing the escaped and quoted (if
1327 * needed to be) input string. The decision to quote the string is
1328 * based on whether or not the input string contains any 'tspecials'
1329 * as defined by rfc2045.
1332 g_mime_utils_quote_string (const char *str)
1339 out = g_string_new ("");
1341 if ((quote = need_quotes (str)))
1342 g_string_append_c (out, '"');
1344 for (c = str; *c; c++) {
1345 if ((*c == '"' && quote) || *c == '\\')
1346 g_string_append_c (out, '\\');
1348 g_string_append_c (out, *c);
1352 g_string_append_c (out, '"');
1355 g_string_free (out, FALSE);
1362 * g_mime_utils_unquote_string:
1363 * @str: input string
1365 * Unquotes and unescapes a string.
1368 g_mime_utils_unquote_string (char *str)
1370 /* if the string is quoted, unquote it */
1371 register char *inptr = str;
1372 int escaped = FALSE;
1379 if (*inptr == '\\') {
1385 } else if (*inptr == '"') {
1404 * g_mime_utils_text_is_8bit:
1405 * @text: text to check for 8bit chars
1408 * Determines if @text contains 8bit characters within the first @len
1411 * Returns: %TRUE if the text contains 8bit characters or %FALSE
1415 g_mime_utils_text_is_8bit (const unsigned char *text, size_t len)
1417 register const unsigned char *inptr;
1418 const unsigned char *inend;
1420 g_return_val_if_fail (text != NULL, FALSE);
1423 for (inptr = text; *inptr && inptr < inend; inptr++)
1424 if (*inptr > (unsigned char) 127)
1432 * g_mime_utils_best_encoding:
1433 * @text: text to encode
1436 * Determines the best content encoding for the first @len bytes of
1439 * Returns: a #GMimeContentEncoding that is determined to be the best
1440 * encoding type for the specified block of text. ("best" in this
1441 * particular case means smallest output size)
1443 GMimeContentEncoding
1444 g_mime_utils_best_encoding (const unsigned char *text, size_t len)
1446 const unsigned char *ch, *inend;
1450 for (ch = text; ch < inend; ch++)
1451 if (*ch > (unsigned char) 127)
1454 if ((float) count <= len * 0.17)
1455 return GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE;
1457 return GMIME_CONTENT_ENCODING_BASE64;
1463 * @cd: iconv converter
1464 * @inbuf: input text buffer to convert
1465 * @inleft: length of the input buffer
1466 * @outp: pointer to output buffer
1467 * @outlenp: pointer to output buffer length
1468 * @ninval: the number of invalid bytes in @inbuf
1470 * Converts the input buffer from one charset to another using the
1471 * @cd. On completion, @outp will point to the output buffer
1472 * containing the converted text (nul-terminated), @outlenp will be
1473 * the size of the @outp buffer (note: not the strlen() of @outp) and
1474 * @ninval will contain the number of bytes which could not be
1477 * Bytes which cannot be converted from @inbuf will appear as '?'
1478 * characters in the output buffer.
1480 * If *@outp is non-NULL, then it is assumed that it points to a
1481 * pre-allocated buffer of length *@outlenp. This is done so that the
1482 * same output buffer can be reused multiple times.
1484 * Returns: the string length of the output buffer.
1487 charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size_t *outlenp, size_t *ninval)
1489 size_t outlen, outleft, rc, n = 0;
1492 if (*outp == NULL) {
1493 outleft = outlen = (inleft * 2) + 16;
1494 outbuf = out = g_malloc (outlen + 1);
1496 outleft = outlen = *outlenp;
1497 outbuf = out = *outp;
1501 rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft);
1502 if (rc == (size_t) -1) {
1503 if (errno == EINVAL) {
1504 /* incomplete sequence at the end of the input buffer */
1510 /* seems that GnuWin32's libiconv 1.9 does not set errno in
1511 * the E2BIG case, so we have to fake it */
1512 if (outleft <= inleft)
1516 if (errno == E2BIG) {
1517 /* need to grow the output buffer */
1518 outlen += (inleft * 2) + 16;
1519 rc = (size_t) (outbuf - out);
1520 out = g_realloc (out, outlen + 1);
1521 outleft = outlen - rc;
1524 /* invalid byte(-sequence) in the input buffer */
1532 } while (inleft > 0);
1534 iconv (cd, NULL, NULL, &outbuf, &outleft);
1541 return (outbuf - out);
1545 #define USER_CHARSETS_INCLUDE_UTF8 (1 << 0)
1546 #define USER_CHARSETS_INCLUDE_LOCALE (1 << 1)
1550 * g_mime_utils_decode_8bit:
1551 * @text: input text in unknown 8bit/multibyte character set
1552 * @len: input text length
1554 * Attempts to convert text in an unknown 8bit/multibyte charset into
1555 * UTF-8 by finding the charset which will convert the most bytes into
1556 * valid UTF-8 characters as possible. If no exact match can be found,
1557 * it will choose the best match and convert invalid byte sequences
1558 * into question-marks (?) in the returned string buffer.
1560 * Returns: a UTF-8 string representation of @text.
1563 g_mime_utils_decode_8bit (const char *text, size_t len)
1565 const char **charsets, **user_charsets, *locale, *best;
1566 size_t outleft, outlen, min, ninval;
1567 unsigned int included = 0;
1572 g_return_val_if_fail (text != NULL, NULL);
1574 locale = g_mime_locale_charset ();
1575 if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
1576 included |= USER_CHARSETS_INCLUDE_LOCALE;
1578 if ((user_charsets = g_mime_user_charsets ())) {
1579 while (user_charsets[i])
1583 charsets = g_alloca (sizeof (char *) * (i + 3));
1586 if (user_charsets) {
1587 while (user_charsets[i]) {
1588 /* keep a record of whether or not the user-supplied
1589 * charsets include UTF-8 and/or the default fallback
1590 * charset so that we avoid doubling our efforts for
1591 * these 2 charsets. We could have used a hash table
1592 * to keep track of unique charsets, but we can
1593 * (hopefully) assume that user_charsets is a unique
1594 * list of charsets with no duplicates. */
1595 if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
1596 included |= USER_CHARSETS_INCLUDE_UTF8;
1598 if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
1599 included |= USER_CHARSETS_INCLUDE_LOCALE;
1601 charsets[i] = user_charsets[i];
1606 if (!(included & USER_CHARSETS_INCLUDE_UTF8))
1607 charsets[i++] = "UTF-8";
1609 if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
1610 charsets[i++] = locale;
1617 outleft = (len * 2) + 16;
1618 out = g_malloc (outleft + 1);
1620 for (i = 0; charsets[i]; i++) {
1621 if ((cd = g_mime_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1)
1624 outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
1626 g_mime_iconv_close (cd);
1629 return g_realloc (out, outlen + 1);
1637 /* if we get here, then none of the charsets fit the 8bit text flawlessly...
1638 * try to find the one that fit the best and use that to convert what we can,
1639 * replacing any byte we can't convert with a '?' */
1641 if ((cd = g_mime_iconv_open ("UTF-8", best)) == (iconv_t) -1) {
1642 /* this shouldn't happen... but if we are here, then
1643 * it did... the only thing we can do at this point
1644 * is replace the 8bit garbage and pray */
1645 register const char *inptr = text;
1646 const char *inend = inptr + len;
1649 while (inptr < inend) {
1650 if (is_ascii (*inptr))
1651 *outbuf++ = *inptr++;
1658 return g_realloc (out, (size_t) (outbuf - out));
1661 outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
1663 g_mime_iconv_close (cd);
1665 return g_realloc (out, outlen + 1);
1669 /* this decodes rfc2047's version of quoted-printable */
1671 quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
1673 register const unsigned char *inptr;
1674 register unsigned char *outptr;
1675 const unsigned char *inend;
1676 unsigned char c, c1;
1682 while (inptr < inend) {
1685 if (inend - inptr >= 2) {
1686 c = toupper (*inptr++);
1687 c1 = toupper (*inptr++);
1688 *outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
1689 | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
1691 /* data was truncated */
1694 } else if (c == '_') {
1695 /* _'s are an rfc2047 shortcut for encoding spaces */
1702 return (ssize_t) (outptr - out);
1705 #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
1708 rfc2047_decode_word (const char *in, size_t inlen)
1710 const unsigned char *instart = (const unsigned char *) in;
1711 const register unsigned char *inptr = instart + 2;
1712 const unsigned char *inend = instart + inlen - 2;
1713 unsigned char *decoded;
1714 const char *charset;
1723 /* skip over the charset */
1724 if (!(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
1733 len = (size_t) (inend - inptr);
1734 decoded = g_alloca (len);
1735 declen = g_mime_encoding_base64_decode_step (inptr, len, decoded, &state, &save);
1738 d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
1745 len = (size_t) (inend - inptr);
1746 decoded = g_alloca (len);
1747 declen = quoted_decode (inptr, len, decoded);
1750 d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
1755 d(fprintf (stderr, "unknown encoding\n"));
1759 len = (inptr - 3) - (instart + 2);
1760 charenc = g_alloca (len + 1);
1761 memcpy (charenc, in + 2, len);
1762 charenc[len] = '\0';
1765 /* rfc2231 updates rfc2047 encoded words...
1766 * The ABNF given in RFC 2047 for encoded-words is:
1767 * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
1768 * This specification changes this ABNF to:
1769 * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
1772 /* trim off the 'language' part if it's there... */
1773 if ((p = strchr (charset, '*')))
1776 /* slight optimization? */
1777 if (!g_ascii_strcasecmp (charset, "UTF-8")) {
1778 p = (char *) decoded;
1781 //while (!g_utf8_validate (p, len, (const char **) &p)) {
1782 // len = declen - (p - (char *) decoded);
1786 return g_strndup ((char *) decoded, declen);
1789 if (!charset[0] || (cd = g_mime_iconv_open ("UTF-8", charset)) == (iconv_t) -1) {
1790 w(g_warning ("Cannot convert from %s to UTF-8, header display may "
1791 "be corrupt: %s", charset[0] ? charset : "unspecified charset",
1792 g_strerror (errno)));
1794 return g_mime_utils_decode_8bit ((char *) decoded, declen);
1798 buf = g_malloc (len + 1);
1800 charset_convert (cd, (char *) decoded, declen, &buf, &len, &ninval);
1802 g_mime_iconv_close (cd);
1806 g_warning ("Failed to completely convert \"%.*s\" to UTF-8, display may be "
1807 "corrupt: %s", declen, decoded, g_strerror (errno));
1816 * g_mime_utils_header_decode_text:
1817 * @text: header text to decode
1819 * Decodes an rfc2047 encoded 'text' header.
1821 * Note: See g_mime_set_user_charsets() for details on how charset
1822 * conversion is handled for unencoded 8bit text and/or wrongly
1823 * specified rfc2047 encoded-word tokens.
1825 * Returns: a newly allocated UTF-8 string representing the the decoded
1829 g_mime_utils_header_decode_text (const char *text)
1831 gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
1832 register const char *inptr = text;
1833 gboolean encoded = FALSE;
1834 const char *lwsp, *word;
1841 return g_strdup ("");
1843 out = g_string_sized_new (strlen (text) + 1);
1845 while (*inptr != '\0') {
1847 while (is_lwsp (*inptr))
1850 nlwsp = (size_t) (inptr - lwsp);
1852 if (*inptr != '\0') {
1856 if (enable_rfc2047_workarounds) {
1857 if (!strncmp (inptr, "=?", 2)) {
1860 /* skip past the charset (if one is even declared, sigh) */
1861 while (*inptr && *inptr != '?') {
1862 ascii = ascii && is_ascii (*inptr);
1866 /* sanity check encoding type */
1867 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?')
1872 /* find the end of the rfc2047 encoded word token */
1873 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
1874 ascii = ascii && is_ascii (*inptr);
1878 if (!strncmp (inptr, "?=", 2))
1882 /* stop if we encounter a possible rfc2047 encoded
1883 * token even if it's inside another word, sigh. */
1884 while (*inptr && !is_lwsp (*inptr) &&
1885 strncmp (inptr, "=?", 2) != 0) {
1886 ascii = ascii && is_ascii (*inptr);
1891 while (*inptr && !is_lwsp (*inptr)) {
1892 ascii = ascii && is_ascii (*inptr);
1897 n = (size_t) (inptr - word);
1898 if (is_rfc2047_encoded_word (word, n)) {
1899 if ((decoded = rfc2047_decode_word (word, n))) {
1900 /* rfc2047 states that you must ignore all
1901 * whitespace between encoded words */
1903 g_string_append_len (out, lwsp, nlwsp);
1905 g_string_append (out, decoded);
1910 /* append lwsp and invalid rfc2047 encoded-word token */
1911 g_string_append_len (out, lwsp, nlwsp + n);
1916 g_string_append_len (out, lwsp, nlwsp);
1918 /* append word token */
1920 /* *sigh* I hate broken mailers... */
1921 decoded = g_mime_utils_decode_8bit (word, n);
1922 g_string_append (out, decoded);
1925 g_string_append_len (out, word, n);
1931 /* appending trailing lwsp */
1932 g_string_append_len (out, lwsp, nlwsp);
1938 g_string_free (out, FALSE);
1945 * g_mime_utils_header_decode_phrase:
1946 * @phrase: header to decode
1948 * Decodes an rfc2047 encoded 'phrase' header.
1950 * Note: See g_mime_set_user_charsets() for details on how charset
1951 * conversion is handled for unencoded 8bit text and/or wrongly
1952 * specified rfc2047 encoded-word tokens.
1954 * Returns: a newly allocated UTF-8 string representing the the decoded
1958 g_mime_utils_header_decode_phrase (const char *phrase)
1960 register const char *inptr = phrase;
1961 gboolean encoded = FALSE;
1962 const char *lwsp, *text;
1969 return g_strdup ("");
1971 out = g_string_sized_new (strlen (phrase) + 1);
1973 while (*inptr != '\0') {
1975 while (is_lwsp (*inptr))
1978 nlwsp = (size_t) (inptr - lwsp);
1981 if (is_atom (*inptr)) {
1982 while (is_atom (*inptr))
1985 n = (size_t) (inptr - text);
1986 if (is_rfc2047_encoded_word (text, n)) {
1987 if ((decoded = rfc2047_decode_word (text, n))) {
1988 /* rfc2047 states that you must ignore all
1989 * whitespace between encoded words */
1991 g_string_append_len (out, lwsp, nlwsp);
1993 g_string_append (out, decoded);
1998 /* append lwsp and invalid rfc2047 encoded-word token */
1999 g_string_append_len (out, lwsp, nlwsp + n);
2003 /* append lwsp and atom token */
2004 g_string_append_len (out, lwsp, nlwsp + n);
2008 g_string_append_len (out, lwsp, nlwsp);
2011 while (*inptr && !is_lwsp (*inptr)) {
2012 ascii = ascii && is_ascii (*inptr);
2016 n = (size_t) (inptr - text);
2019 /* *sigh* I hate broken mailers... */
2020 decoded = g_mime_utils_decode_8bit (text, n);
2021 g_string_append (out, decoded);
2024 g_string_append_len (out, text, n);
2032 g_string_free (out, FALSE);
2038 /* rfc2047 version of quoted-printable */
2040 quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask)
2042 register const unsigned char *inptr = (const unsigned char *) in;
2043 const unsigned char *inend = inptr + len;
2044 register unsigned char *outptr = out;
2047 while (inptr < inend) {
2051 } else if (c != '_' && gmime_special_table[c] & safemask) {
2055 *outptr++ = tohex[(c >> 4) & 0xf];
2056 *outptr++ = tohex[c & 0xf];
2060 return (outptr - out);
2064 rfc2047_encode_word (GString *string, const char *word, size_t len,
2065 const char *charset, gushort safemask)
2067 register char *inptr, *outptr;
2068 iconv_t cd = (iconv_t) -1;
2069 unsigned char *encoded;
2076 if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
2077 cd = g_mime_iconv_open (charset, "UTF-8");
2079 if (cd != (iconv_t) -1) {
2080 uword = g_mime_iconv_strndup (cd, (char *) word, len);
2081 g_mime_iconv_close (cd);
2085 len = strlen (uword);
2091 switch (g_mime_utils_best_encoding ((const unsigned char *) word, len)) {
2092 case GMIME_CONTENT_ENCODING_BASE64:
2093 enclen = GMIME_BASE64_ENCODE_LEN (len);
2094 encoded = g_alloca (enclen + 1);
2098 pos = g_mime_encoding_base64_encode_close ((const unsigned char *) word, len, encoded, &state, &save);
2099 encoded[pos] = '\0';
2101 /* remove \n chars as headers need to be wrapped differently */
2102 if (G_UNLIKELY ((inptr = strchr ((char *) encoded, '\n')))) {
2104 while (G_LIKELY (*inptr)) {
2105 if (G_LIKELY (*inptr != '\n'))
2115 case GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE:
2116 enclen = GMIME_QP_ENCODE_LEN (len);
2117 encoded = g_alloca (enclen + 1);
2121 pos = quoted_encode (word, len, encoded, safemask);
2122 encoded[pos] = '\0';
2128 g_assert_not_reached ();
2133 g_string_append_printf (string, "=?%s?%c?%s?=", charset, encoding, encoded);
2143 typedef struct _rfc822_word {
2144 struct _rfc822_word *next;
2145 const char *start, *end;
2150 #define rfc822_word_free(word) g_slice_free (rfc822_word, word)
2151 #define rfc822_word_new() g_slice_new (rfc822_word)
2153 /* okay, so 'unstructured text' fields don't actually contain 'word'
2154 * tokens, but we can group stuff similarly... */
2155 static rfc822_word *
2156 rfc2047_encode_get_rfc822_words (const char *in, gboolean phrase)
2158 rfc822_word *words, *tail, *word;
2159 rfc822_word_t type = WORD_ATOM;
2160 const char *inptr, *start, *last;
2161 int count = 0, encoding = 0;
2164 tail = (rfc822_word *) &words;
2166 last = start = inptr = in;
2167 while (inptr && *inptr) {
2168 const char *newinptr;
2171 newinptr = g_utf8_next_char (inptr);
2172 c = g_utf8_get_char (inptr);
2173 if (newinptr == NULL || !g_unichar_validate (c)) {
2174 w(g_warning ("Invalid UTF-8 sequence encountered"));
2181 if (c < 256 && is_lwsp (c)) {
2183 word = rfc822_word_new ();
2185 word->start = start;
2188 word->encoding = encoding;
2200 if (phrase && c < 128) {
2201 /* phrases can have qstring words */
2203 type = MAX (type, WORD_QSTRING);
2204 } else if (c > 127 && c < 256) {
2206 encoding = MAX (encoding, 1);
2207 } else if (c >= 256) {
2212 if (count >= GMIME_FOLD_PREENCODED) {
2213 word = rfc822_word_new ();
2215 word->start = start;
2218 word->encoding = encoding;
2224 /* Note: don't reset 'type' as it
2225 * needs to be preserved when breaking
2236 word = rfc822_word_new ();
2238 word->start = start;
2241 word->encoding = encoding;
2248 printf ("rfc822 word tokens:\n");
2251 printf ("\t'%.*s'; type=%d, encoding=%d\n",
2252 word->end - word->start, word->start,
2253 word->type, word->encoding);
2262 #define MERGED_WORD_LT_FOLDLEN(wlen, type) ((type) == WORD_2047 ? (wlen) < GMIME_FOLD_PREENCODED : (wlen) < (GMIME_FOLD_LEN - 8))
2265 should_merge_words (rfc822_word *word, rfc822_word *next)
2267 switch (word->type) {
2269 if (next->type == WORD_2047)
2272 return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, next->type));
2274 /* avoid merging with words that need to be rfc2047 encoded */
2275 if (next->type == WORD_2047)
2278 return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_QSTRING));
2280 if (next->type == WORD_ATOM) {
2281 /* whether we merge or not is dependent upon:
2282 * 1. the number of atoms in a row after 'word'
2283 * 2. if there is another encword after the string of atoms.
2287 while (next && next->type == WORD_ATOM) {
2292 /* if all the words after the encword are atoms, don't merge */
2293 if (!next || natoms > 3)
2297 /* avoid merging with qstrings */
2298 if (next->type == WORD_QSTRING)
2301 return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_2047));
2308 rfc2047_encode_merge_rfc822_words (rfc822_word **wordsp)
2310 rfc822_word *word, *next, *words = *wordsp;
2312 /* first pass: merge qstrings with adjacent qstrings and encwords with adjacent encwords */
2314 while (word && word->next) {
2317 if (word->type != WORD_ATOM && word->type == next->type &&
2318 MERGED_WORD_LT_FOLDLEN (next->end - word->start, word->type)) {
2319 /* merge the words */
2320 word->encoding = MAX (word->encoding, next->encoding);
2322 word->end = next->end;
2323 word->next = next->next;
2325 rfc822_word_free (next);
2333 /* second pass: now merge atoms with the other words */
2335 while (word && word->next) {
2338 if (should_merge_words (word, next)) {
2339 /* the resulting word type is the MAX of the 2 types */
2340 word->type = MAX (word->type, next->type);
2342 word->encoding = MAX (word->encoding, next->encoding);
2344 word->end = next->end;
2345 word->next = next->next;
2347 rfc822_word_free (next);
2359 g_string_append_len_quoted (GString *out, const char *in, size_t len)
2361 register const char *inptr;
2364 g_string_append_c (out, '"');
2369 while (inptr < inend) {
2370 if (*inptr == '"' || *inptr == '\\')
2371 g_string_append_c (out, '\\');
2373 g_string_append_c (out, *inptr);
2378 g_string_append_c (out, '"');
2382 rfc2047_encode (const char *in, gushort safemask)
2384 rfc822_word *words, *word, *prev = NULL;
2385 const char **charsets, *charset;
2393 if (!(words = rfc2047_encode_get_rfc822_words (in, safemask & IS_PSAFE)))
2394 return g_strdup (in);
2396 rfc2047_encode_merge_rfc822_words (&words);
2398 charsets = g_mime_user_charsets ();
2400 out = g_string_new ("");
2402 /* output words now with spaces between them */
2405 /* append correct number of spaces between words */
2406 if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) {
2407 /* one or both of the words are not encoded so we write the spaces out untouched */
2408 len = word->start - prev->end;
2409 g_string_append_len (out, prev->end, len);
2412 switch (word->type) {
2414 g_string_append_len (out, word->start, (size_t) (word->end - word->start));
2417 g_assert (safemask & IS_PSAFE);
2418 g_string_append_len_quoted (out, word->start, (size_t) (word->end - word->start));
2421 if (prev && prev->type == WORD_2047) {
2422 /* include the whitespace chars between these 2 words in the
2423 resulting rfc2047 encoded word. */
2424 len = word->end - prev->end;
2427 /* encoded words need to be separated by linear whitespace */
2428 g_string_append_c (out, ' ');
2430 len = word->end - word->start;
2431 start = word->start;
2434 switch (word->encoding) {
2435 case 0: /* us-ascii */
2436 rfc2047_encode_word (out, start, len, "us-ascii", safemask);
2438 case 1: /* iso-8859-1 */
2439 rfc2047_encode_word (out, start, len, "iso-8859-1", safemask);
2443 g_mime_charset_init (&mask);
2444 g_mime_charset_step (&mask, start, len);
2446 for (i = 0; charsets && charsets[i]; i++) {
2447 if (g_mime_charset_can_encode (&mask, charsets[i], start, len)) {
2448 charset = charsets[i];
2454 charset = g_mime_charset_best_name (&mask);
2456 rfc2047_encode_word (out, start, len, charset, safemask);
2463 rfc822_word_free (prev);
2469 rfc822_word_free (prev);
2472 g_string_free (out, FALSE);
2479 * g_mime_utils_header_encode_phrase:
2480 * @phrase: phrase to encode
2482 * Encodes a 'phrase' header according to the rules in rfc2047.
2484 * Returns: the encoded 'phrase'. Useful for encoding internet
2488 g_mime_utils_header_encode_phrase (const char *phrase)
2493 return rfc2047_encode (phrase, IS_PSAFE);
2498 * g_mime_utils_header_encode_text:
2499 * @text: text to encode
2501 * Encodes a 'text' header according to the rules in rfc2047.
2503 * Returns: the encoded header. Useful for encoding
2504 * headers like "Subject".
2507 g_mime_utils_header_encode_text (const char *text)
2512 return rfc2047_encode (text, IS_ESAFE);