2 * parse time string - user friendly date and time parser
3 * Copyright © 2012 Jani Nikula
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
18 * Author: Jani Nikula <jani@nikula.org>
33 #include <sys/types.h>
36 #include "parse-time-string.h"
39 * IMPLEMENTATION DETAILS
41 * At a high level, the parsing is done in two phases: 1) actual
42 * parsing of the input string and storing the parsed data into
43 * 'struct state', and 2) processing of the data in 'struct state'
44 * according to current time (or provided reference time) and
45 * rounding. This is evident in the main entry point function
46 * parse_time_string().
48 * 1) The parsing phase - parse_input()
50 * Parsing is greedy and happens from left to right. The parsing is as
51 * unambiguous as possible; only unambiguous date/time formats are
52 * accepted. Redundant or contradictory absolute date/time in the
53 * input (e.g. date specified multiple times/ways) is not
54 * accepted. Relative date/time on the other hand just accumulates if
55 * present multiple times (e.g. "5 days 5 days" just turns into 10
58 * Parsing decisions are made on the input format, not value. For
59 * example, "20/5/2005" fails because the recognized format here is
60 * MM/D/YYYY, even though the values would suggest DD/M/YYYY.
62 * Parsing is mostly stateless in the sense that parsing decisions are
63 * not made based on the values of previously parsed data, or whether
64 * certain data is present in the first place. (There are a few
65 * exceptions to the latter part, though, such as parsing of time zone
66 * that would otherwise look like plain time.)
68 * When the parser encounters a number that is not greedily parsed as
69 * part of a format, the interpretation is postponed until the next
70 * token is parsed. The parser for the next token may consume the
71 * previously postponed number. For example, when parsing "20 May" the
72 * meaning of "20" is not known until "May" is parsed. If the parser
73 * for the next token does not consume the postponed number, the
74 * number is handled as a "lone" number before parser for the next
77 * 2) The processing phase - create_output()
79 * Once the parser in phase 1 has finished, 'struct state' contains
80 * all the information from the input string, and it's no longer
81 * needed. Since the parser does not even handle the concept of "now",
82 * the processing initializes the fields referring to the current
85 * If requested, the result is rounded towards past or future. The
86 * idea behind rounding is to support parsing date/time ranges in an
87 * obvious way. For example, for a range defined as two dates (without
88 * time), one would typically want to have an inclusive range from the
89 * beginning of start date to the end of the end date. The caller
90 * would use rounding towards past in the start date, and towards
91 * future in the end date.
93 * The absolute date and time is shifted by the relative date and
94 * time, and time zone adjustments are made. Daylight saving time
95 * (DST) is specifically *not* handled at all.
97 * Finally, the result is stored to time_t.
100 #define unused(x) x __attribute__ ((unused))
102 /* XXX: Redefine these to add i18n support. The keyword table uses
103 * N_() to mark strings to be translated; they are accessed
104 * dynamically using _(). */
105 #define _(s) (s) /* i18n: define as gettext (s) */
106 #define N_(s) (s) /* i18n: define as gettext_noop (s) */
108 #define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
111 * Field indices in the tm and set arrays of struct state.
113 * NOTE: There's some code that depends on the ordering of this enum.
116 /* Keep SEC...YEAR in this order. */
117 TM_ABS_SEC, /* seconds */
118 TM_ABS_MIN, /* minutes */
119 TM_ABS_HOUR, /* hours */
120 TM_ABS_MDAY, /* day of the month */
121 TM_ABS_MON, /* month */
122 TM_ABS_YEAR, /* year */
124 TM_WDAY, /* day of the week. special: may be relative */
125 TM_ABS_ISDST, /* daylight saving time */
127 TM_AMPM, /* am vs. pm */
128 TM_TZ, /* timezone in minutes */
130 /* Keep SEC...YEAR in this order. */
131 TM_REL_SEC, /* seconds relative to absolute or reference time */
132 TM_REL_MIN, /* minutes ... */
133 TM_REL_HOUR, /* hours ... */
134 TM_REL_DAY, /* days ... */
135 TM_REL_MON, /* months ... */
136 TM_REL_YEAR, /* years ... */
137 TM_REL_WEEK, /* weeks ... */
139 TM_NONE, /* not a field */
142 TM_FIRST_ABS = TM_ABS_SEC,
143 TM_FIRST_REL = TM_REL_SEC,
146 /* Values for the set array of struct state. */
148 FIELD_UNSET, /* The field has not been touched by parser. */
149 FIELD_SET, /* The field has been set by parser. */
150 FIELD_NOW, /* The field will be set to reference time. */
154 next_abs_field (enum field field)
156 /* NOTE: Depends on the enum ordering. */
157 return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
161 abs_to_rel_field (enum field field)
163 assert (field <= TM_ABS_YEAR);
165 /* NOTE: Depends on the enum ordering. */
166 return field + (TM_FIRST_REL - TM_FIRST_ABS);
169 /* Get the smallest acceptable value for field. */
171 get_field_epoch_value (enum field field)
173 if (field == TM_ABS_MDAY || field == TM_ABS_MON)
175 else if (field == TM_ABS_YEAR)
181 /* The parsing state. */
183 int tm[TM_SIZE]; /* parsed date and time */
184 enum field_set set[TM_SIZE]; /* set status of tm */
186 enum field last_field; /* Previously set field. */
189 int postponed_length; /* Number of digits in postponed value. */
191 char postponed_delim; /* The delimiter preceding postponed number. */
195 * Helpers for postponed numbers.
197 * postponed_length is the number of digits in postponed value. 0
198 * means there is no postponed number. -1 means there is a postponed
199 * number, but it comes from a keyword, and it doesn't have digits.
202 get_postponed_length (struct state *state)
204 return state->postponed_length;
208 * Consume a previously postponed number. Return true if a number was
209 * in fact postponed, false otherwise. Store the postponed number's
210 * value in *v, length in the input string in *n (or -1 if the number
211 * was written out and parsed as a keyword), and the preceding
212 * delimiter to *d. If a number was not postponed, *v, *n and *d are
216 consume_postponed_number (struct state *state, int *v, int *n, char *d)
218 if (! state->postponed_length)
222 *n = state->postponed_length;
225 *v = state->postponed_value;
228 *d = state->postponed_delim;
230 state->postponed_length = 0;
231 state->postponed_value = 0;
232 state->postponed_delim = 0;
237 static int parse_postponed_number (struct state *state, enum field next_field);
240 * Postpone a number to be handled later. If one exists already,
241 * handle it first. n may be -1 to indicate a keyword that has no
245 set_postponed_number (struct state *state, int v, int n)
248 char d = state->delim;
250 /* Parse a previously postponed number, if any. */
251 r = parse_postponed_number (state, TM_NONE);
255 state->postponed_length = n;
256 state->postponed_value = v;
257 state->postponed_delim = d;
263 set_delim (struct state *state, char delim)
265 state->delim = delim;
269 unset_delim (struct state *state)
275 * Field set/get/mod helpers.
278 /* Return true if field has been set. */
280 is_field_set (struct state *state, enum field field)
282 assert (field < ARRAY_SIZE (state->tm));
284 return state->set[field] != FIELD_UNSET;
288 unset_field (struct state *state, enum field field)
290 assert (field < ARRAY_SIZE (state->tm));
292 state->set[field] = FIELD_UNSET;
293 state->tm[field] = 0;
297 * Set field to value. A field can only be set once to ensure the
298 * input does not contain redundant and potentially conflicting data.
301 set_field (struct state *state, enum field field, int value)
305 /* Fields can only be set once. */
306 if (is_field_set (state, field))
307 return -PARSE_TIME_ERR_ALREADYSET;
309 state->set[field] = FIELD_SET;
311 /* Parse a previously postponed number, if any. */
312 r = parse_postponed_number (state, field);
318 state->tm[field] = value;
319 state->last_field = field;
325 * Mark n fields in fields to be set to the reference date/time in the
326 * specified time zone, or local timezone if not specified. The fields
327 * will be initialized after parsing is complete and timezone is
331 set_fields_to_now (struct state *state, enum field *fields, size_t n)
336 for (i = 0; i < n; i++) {
337 r = set_field (state, fields[i], 0);
340 state->set[fields[i]] = FIELD_NOW;
346 /* Modify field by adding value to it. To be used on relative fields,
347 * which can be modified multiple times (to accumulate). */
349 add_to_field (struct state *state, enum field field, int value)
353 assert (field < ARRAY_SIZE (state->tm));
355 state->set[field] = FIELD_SET;
357 /* Parse a previously postponed number, if any. */
358 r = parse_postponed_number (state, field);
364 state->tm[field] += value;
365 state->last_field = field;
371 * Get field value. Make sure the field is set before query. It's most
372 * likely an error to call this while parsing (for example fields set
373 * as FIELD_NOW will only be set to some value after parsing).
376 get_field (struct state *state, enum field field)
378 assert (field < ARRAY_SIZE (state->tm));
380 return state->tm[field];
387 is_valid_12hour (int h)
389 return h >= 1 && h <= 12;
393 is_valid_time (int h, int m, int s)
395 /* Allow 24:00:00 to denote end of day. */
396 if (h == 24 && m == 0 && s == 0)
399 return h >= 0 && h <= 23 && m >= 0 && m <= 59 && s >= 0 && s <= 59;
403 is_valid_mday (int mday)
405 return mday >= 1 && mday <= 31;
409 is_valid_mon (int mon)
411 return mon >= 1 && mon <= 12;
415 is_valid_year (int year)
421 is_valid_date (int year, int mon, int mday)
423 return is_valid_year (year) && is_valid_mon (mon) && is_valid_mday (mday);
426 /* Unset indicator for time and date set helpers. */
429 /* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
431 set_abs_time (struct state *state, int hour, int min, int sec)
436 if ((r = set_field (state, TM_ABS_HOUR, hour)))
441 if ((r = set_field (state, TM_ABS_MIN, min)))
446 if ((r = set_field (state, TM_ABS_SEC, sec)))
453 /* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
455 set_abs_date (struct state *state, int year, int mon, int mday)
460 if ((r = set_field (state, TM_ABS_YEAR, year)))
465 if ((r = set_field (state, TM_ABS_MON, mon)))
470 if ((r = set_field (state, TM_ABS_MDAY, mday)))
478 * Keyword parsing and handling.
481 typedef int (*setter_t)(struct state *state, struct keyword *kw);
484 const char *name; /* keyword */
485 enum field field; /* field to set, or FIELD_NONE if N/A */
486 int value; /* value to set, or 0 if N/A */
487 setter_t set; /* function to use for setting, if non-NULL */
491 * Setter callback functions for keywords.
494 kw_set_rel (struct state *state, struct keyword *kw)
498 /* Get a previously set multiplier, if any. */
499 consume_postponed_number (state, &multiplier, NULL, NULL);
501 /* Accumulate relative field values. */
502 return add_to_field (state, kw->field, multiplier * kw->value);
506 kw_set_number (struct state *state, struct keyword *kw)
508 /* -1 = no length, from keyword. */
509 return set_postponed_number (state, kw->value, -1);
513 kw_set_month (struct state *state, struct keyword *kw)
515 int n = get_postponed_length (state);
517 /* Consume postponed number if it could be mday. This handles "20
519 if (n == 1 || n == 2) {
522 consume_postponed_number (state, &v, NULL, NULL);
524 if (! is_valid_mday (v))
525 return -PARSE_TIME_ERR_INVALIDDATE;
527 r = set_field (state, TM_ABS_MDAY, v);
532 return set_field (state, kw->field, kw->value);
536 kw_set_ampm (struct state *state, struct keyword *kw)
538 int n = get_postponed_length (state);
540 /* Consume postponed number if it could be hour. This handles
542 if (n == 1 || n == 2) {
545 consume_postponed_number (state, &v, NULL, NULL);
547 if (! is_valid_12hour (v))
548 return -PARSE_TIME_ERR_INVALIDTIME;
550 r = set_abs_time (state, v, 0, 0);
555 return set_field (state, kw->field, kw->value);
559 kw_set_timeofday (struct state *state, struct keyword *kw)
561 return set_abs_time (state, kw->value, 0, 0);
565 kw_set_today (struct state *state, unused (struct keyword *kw))
567 enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };
569 return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
573 kw_set_now (struct state *state, unused (struct keyword *kw))
575 enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };
577 return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
581 kw_set_ordinal (struct state *state, struct keyword *kw)
585 /* Require a postponed number. */
586 if (! consume_postponed_number (state, &v, &n, NULL))
587 return -PARSE_TIME_ERR_DATEFORMAT;
589 /* Ordinals are mday. */
590 if (n != 1 && n != 2)
591 return -PARSE_TIME_ERR_DATEFORMAT;
593 /* Be strict about st, nd, rd, and lax about th. */
594 if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
595 return -PARSE_TIME_ERR_INVALIDDATE;
596 else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
597 return -PARSE_TIME_ERR_INVALIDDATE;
598 else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
599 return -PARSE_TIME_ERR_INVALIDDATE;
600 else if (strcasecmp (kw->name, "th") == 0 && ! is_valid_mday (v))
601 return -PARSE_TIME_ERR_INVALIDDATE;
603 return set_field (state, TM_ABS_MDAY, v);
607 kw_ignore (unused (struct state *state), unused (struct keyword *kw))
615 * A keyword may optionally contain a '|' to indicate the minimum
616 * match length. Without one, full match is required. It's advisable
617 * to keep the minimum match parts unique across all keywords. If
618 * they're not, the first match wins.
620 * If keyword begins with '*', then the matching will be case
621 * sensitive. Otherwise the matching is case insensitive.
623 * If .set is NULL, the field specified by .field will be set to
626 * Note: Observe how "m" and "mi" match minutes, "M" and "mo" and
627 * "mont" match months, but "mon" matches Monday.
629 static struct keyword keywords[] = {
631 { N_ ("sun|day"), TM_WDAY, 0, NULL },
632 { N_ ("mon|day"), TM_WDAY, 1, NULL },
633 { N_ ("tue|sday"), TM_WDAY, 2, NULL },
634 { N_ ("wed|nesday"), TM_WDAY, 3, NULL },
635 { N_ ("thu|rsday"), TM_WDAY, 4, NULL },
636 { N_ ("fri|day"), TM_WDAY, 5, NULL },
637 { N_ ("sat|urday"), TM_WDAY, 6, NULL },
640 { N_ ("jan|uary"), TM_ABS_MON, 1, kw_set_month },
641 { N_ ("feb|ruary"), TM_ABS_MON, 2, kw_set_month },
642 { N_ ("mar|ch"), TM_ABS_MON, 3, kw_set_month },
643 { N_ ("apr|il"), TM_ABS_MON, 4, kw_set_month },
644 { N_ ("may"), TM_ABS_MON, 5, kw_set_month },
645 { N_ ("jun|e"), TM_ABS_MON, 6, kw_set_month },
646 { N_ ("jul|y"), TM_ABS_MON, 7, kw_set_month },
647 { N_ ("aug|ust"), TM_ABS_MON, 8, kw_set_month },
648 { N_ ("sep|tember"), TM_ABS_MON, 9, kw_set_month },
649 { N_ ("oct|ober"), TM_ABS_MON, 10, kw_set_month },
650 { N_ ("nov|ember"), TM_ABS_MON, 11, kw_set_month },
651 { N_ ("dec|ember"), TM_ABS_MON, 12, kw_set_month },
654 { N_ ("y|ears"), TM_REL_YEAR, 1, kw_set_rel },
655 { N_ ("mo|nths"), TM_REL_MON, 1, kw_set_rel },
656 { N_ ("*M"), TM_REL_MON, 1, kw_set_rel },
657 { N_ ("w|eeks"), TM_REL_WEEK, 1, kw_set_rel },
658 { N_ ("d|ays"), TM_REL_DAY, 1, kw_set_rel },
659 { N_ ("h|ours"), TM_REL_HOUR, 1, kw_set_rel },
660 { N_ ("hr|s"), TM_REL_HOUR, 1, kw_set_rel },
661 { N_ ("mi|nutes"), TM_REL_MIN, 1, kw_set_rel },
662 { N_ ("mins"), TM_REL_MIN, 1, kw_set_rel },
663 { N_ ("*m"), TM_REL_MIN, 1, kw_set_rel },
664 { N_ ("s|econds"), TM_REL_SEC, 1, kw_set_rel },
665 { N_ ("secs"), TM_REL_SEC, 1, kw_set_rel },
668 { N_ ("one"), TM_NONE, 1, kw_set_number },
669 { N_ ("two"), TM_NONE, 2, kw_set_number },
670 { N_ ("three"), TM_NONE, 3, kw_set_number },
671 { N_ ("four"), TM_NONE, 4, kw_set_number },
672 { N_ ("five"), TM_NONE, 5, kw_set_number },
673 { N_ ("six"), TM_NONE, 6, kw_set_number },
674 { N_ ("seven"), TM_NONE, 7, kw_set_number },
675 { N_ ("eight"), TM_NONE, 8, kw_set_number },
676 { N_ ("nine"), TM_NONE, 9, kw_set_number },
677 { N_ ("ten"), TM_NONE, 10, kw_set_number },
678 { N_ ("dozen"), TM_NONE, 12, kw_set_number },
679 { N_ ("hundred"), TM_NONE, 100, kw_set_number },
681 /* Special number forms. */
682 { N_ ("this"), TM_NONE, 0, kw_set_number },
683 { N_ ("last"), TM_NONE, 1, kw_set_number },
685 /* Other special keywords. */
686 { N_ ("yesterday"), TM_REL_DAY, 1, kw_set_rel },
687 { N_ ("today"), TM_NONE, 0, kw_set_today },
688 { N_ ("now"), TM_NONE, 0, kw_set_now },
689 { N_ ("noon"), TM_NONE, 12, kw_set_timeofday },
690 { N_ ("midnight"), TM_NONE, 0, kw_set_timeofday },
691 { N_ ("am"), TM_AMPM, 0, kw_set_ampm },
692 { N_ ("a.m."), TM_AMPM, 0, kw_set_ampm },
693 { N_ ("pm"), TM_AMPM, 1, kw_set_ampm },
694 { N_ ("p.m."), TM_AMPM, 1, kw_set_ampm },
695 { N_ ("st"), TM_NONE, 0, kw_set_ordinal },
696 { N_ ("nd"), TM_NONE, 0, kw_set_ordinal },
697 { N_ ("rd"), TM_NONE, 0, kw_set_ordinal },
698 { N_ ("th"), TM_NONE, 0, kw_set_ordinal },
699 { N_ ("ago"), TM_NONE, 0, kw_ignore },
701 /* Timezone codes: offset in minutes. XXX: Add more codes. */
702 { N_ ("pst"), TM_TZ, -8 * 60, NULL },
703 { N_ ("mst"), TM_TZ, -7 * 60, NULL },
704 { N_ ("cst"), TM_TZ, -6 * 60, NULL },
705 { N_ ("est"), TM_TZ, -5 * 60, NULL },
706 { N_ ("ast"), TM_TZ, -4 * 60, NULL },
707 { N_ ("nst"), TM_TZ, -(3 * 60 + 30), NULL },
709 { N_ ("gmt"), TM_TZ, 0, NULL },
710 { N_ ("utc"), TM_TZ, 0, NULL },
712 { N_ ("wet"), TM_TZ, 0, NULL },
713 { N_ ("cet"), TM_TZ, 1 * 60, NULL },
714 { N_ ("eet"), TM_TZ, 2 * 60, NULL },
715 { N_ ("fet"), TM_TZ, 3 * 60, NULL },
717 { N_ ("wat"), TM_TZ, 1 * 60, NULL },
718 { N_ ("cat"), TM_TZ, 2 * 60, NULL },
719 { N_ ("eat"), TM_TZ, 3 * 60, NULL },
723 * Compare strings str and keyword. Return the number of matching
724 * chars on match, 0 for no match.
726 * All of the alphabetic characters (isalpha) in str up to the first
727 * non-alpha character (or end of string) must match the
728 * keyword. Consequently, the value returned on match is the number of
729 * consecutive alphabetic characters in str.
731 * Abbreviated match is accepted if the keyword contains a '|'
732 * character, and str matches keyword up to that character. Any alpha
733 * characters after that in str must still match the keyword following
734 * the '|' character. If no '|' is present, all of keyword must match.
736 * Excessive, consecutive, and misplaced (at the beginning or end) '|'
737 * characters in keyword are handled gracefully. Only the first one
740 * If match_case is true, the matching is case sensitive.
743 match_keyword (const char *str, const char *keyword, bool match_case)
746 bool prefix_matched = false;
749 while (*keyword == '|') {
750 prefix_matched = true;
754 if (! *s || ! isalpha ((unsigned char) *s) || ! *keyword)
761 if (tolower ((unsigned char) *s) !=
762 tolower ((unsigned char) *keyword))
769 /* did not match all of the keyword in input string */
770 if (*s && isalpha ((unsigned char) *s))
773 /* did not match enough of keyword */
774 if (*keyword && ! prefix_matched)
781 * Parse a keyword. Return < 0 on error, number of parsed chars on
785 parse_keyword (struct state *state, const char *s)
789 struct keyword *kw = NULL;
792 for (i = 0; i < ARRAY_SIZE (keywords); i++) {
793 const char *keyword = _ (keywords[i].name);
796 /* Match case if keyword begins with '*'. */
797 if (*keyword == '*') {
802 n = match_keyword (s, keyword, mcase);
810 return -PARSE_TIME_ERR_KEYWORD;
813 r = kw->set (state, kw);
815 r = set_field (state, kw->field, kw->value);
824 * Non-keyword parsers and their helpers.
828 set_user_tz (struct state *state, char sign, int hour, int min)
830 int tz = hour * 60 + min;
832 assert (sign == '+' || sign == '-');
834 if (hour < 0 || hour > 14 || min < 0 || min > 59 || min % 15)
835 return -PARSE_TIME_ERR_INVALIDTIME;
840 return set_field (state, TM_TZ, tz);
844 * Parse a previously postponed number if one exists. Independent
845 * parsing of a postponed number when it wasn't consumed during
846 * parsing of the following token.
849 parse_postponed_number (struct state *state, unused (enum field next_field))
854 /* Bail out if there's no postponed number. */
855 if (! consume_postponed_number (state, &v, &n, &d))
858 if (n == 1 || n == 2) {
859 /* Notable exception: Previous field affects parsing. This
860 * handles "January 20". */
861 if (state->last_field == TM_ABS_MON) {
863 if (! is_valid_mday (v))
864 return -PARSE_TIME_ERR_INVALIDDATE;
866 return set_field (state, TM_ABS_MDAY, v);
868 /* XXX: Only allow if last field is hour, min, or sec? */
869 if (d == '+' || d == '-') {
871 return set_user_tz (state, d, v, 0);
875 /* Notable exception: Value affects parsing. Time zones are
876 * always at most 1400 and we don't understand years before
878 if (! is_valid_year (v)) {
879 if (d == '+' || d == '-') {
881 return set_user_tz (state, d, v / 100, v % 100);
885 return set_field (state, TM_ABS_YEAR, v);
889 int hour = v / 10000;
890 int min = (v / 100) % 100;
893 if (! is_valid_time (hour, min, sec))
894 return -PARSE_TIME_ERR_INVALIDTIME;
896 return set_abs_time (state, hour, min, sec);
899 int year = v / 10000;
900 int mon = (v / 100) % 100;
903 if (! is_valid_date (year, mon, mday))
904 return -PARSE_TIME_ERR_INVALIDDATE;
906 return set_abs_date (state, year, mon, mday);
909 return -PARSE_TIME_ERR_FORMAT;
912 static int tm_get_field (const struct tm *tm, enum field field);
915 set_timestamp (struct state *state, time_t t)
921 if (gmtime_r (&t, &tm) == NULL)
922 return -PARSE_TIME_ERR_LIB;
924 for (f = TM_ABS_SEC; f != TM_NONE; f = next_abs_field (f)) {
925 r = set_field (state, f, tm_get_field (&tm, f));
930 r = set_field (state, TM_TZ, 0);
934 /* XXX: Prevent TM_AMPM with timestamp, e.g. "@123456 pm" */
939 /* Parse a single number. Typically postpone parsing until later. */
941 parse_single_number (struct state *state, unsigned long v,
946 if (state->delim == '@')
947 return set_timestamp (state, (time_t) v);
950 return -PARSE_TIME_ERR_FORMAT;
952 return set_postponed_number (state, v, n);
964 return c == '/' || c == '-' || c == '.';
970 return is_time_sep (c) || is_date_sep (c);
973 /* Two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep
976 expand_year (unsigned long year, size_t n)
979 return (year < 70 ? 2000 : 1900) + year;
987 /* Parse a date number triplet. */
989 parse_date (struct state *state, char sep,
990 unsigned long v1, unsigned long v2, unsigned long v3,
991 size_t n1, size_t n2, size_t n3)
993 int year = UNSET, mon = UNSET, mday = UNSET;
995 assert (is_date_sep (sep));
998 case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
999 if (n1 != 1 && n1 != 2)
1000 return -PARSE_TIME_ERR_DATEFORMAT;
1002 if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
1003 /* M[M]/D[D][/YY[YY]] */
1004 year = expand_year (v3, n3);
1007 } else if (n2 == 4 && n3 == 0) {
1012 return -PARSE_TIME_ERR_DATEFORMAT;
1016 case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
1017 if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
1023 } else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
1024 /* DD-MM[-YY[YY]] */
1025 year = expand_year (v3, n3);
1028 } else if (n1 == 2 && n2 == 4 && n3 == 0) {
1033 return -PARSE_TIME_ERR_DATEFORMAT;
1037 case '.': /* Date: D[D].M[M][.[YY[YY]]] */
1038 if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
1039 (n3 != 0 && n3 != 2 && n3 != 4))
1040 return -PARSE_TIME_ERR_DATEFORMAT;
1042 year = expand_year (v3, n3);
1048 if (year != UNSET && ! is_valid_year (year))
1049 return -PARSE_TIME_ERR_INVALIDDATE;
1051 if (mon != UNSET && ! is_valid_mon (mon))
1052 return -PARSE_TIME_ERR_INVALIDDATE;
1054 if (mday != UNSET && ! is_valid_mday (mday))
1055 return -PARSE_TIME_ERR_INVALIDDATE;
1057 return set_abs_date (state, year, mon, mday);
1060 /* Parse a time number triplet. */
1062 parse_time (struct state *state, char sep,
1063 unsigned long v1, unsigned long v2, unsigned long v3,
1064 size_t n1, size_t n2, size_t n3)
1066 assert (is_time_sep (sep));
1068 if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
1069 return -PARSE_TIME_ERR_TIMEFORMAT;
1072 * Notable exception: Previously set fields affect
1073 * parsing. Interpret (+|-)HH:MM as time zone only if hour and
1074 * minute have been set.
1076 * XXX: This could be fixed by restricting the delimiters
1077 * preceding time. For '+' it would be justified, but for '-' it
1078 * might be inconvenient. However prefer to allow '-' as an
1079 * insignificant delimiter preceding time for convenience, and
1080 * handle '+' the same way for consistency between positive and
1081 * negative time zones.
1083 if (is_field_set (state, TM_ABS_HOUR) &&
1084 is_field_set (state, TM_ABS_MIN) &&
1085 n1 == 2 && n2 == 2 && n3 == 0 &&
1086 (state->delim == '+' || state->delim == '-')) {
1087 return set_user_tz (state, state->delim, v1, v2);
1090 if (! is_valid_time (v1, v2, n3 ? v3 : 0))
1091 return -PARSE_TIME_ERR_INVALIDTIME;
1093 return set_abs_time (state, v1, v2, n3 ? (int) v3 : UNSET);
1096 /* strtoul helper that assigns length. */
1097 static unsigned long
1098 strtoul_len (const char *s, const char **endp, size_t *len)
1100 unsigned long val = strtoul (s, (char **) endp, 10);
1107 * Parse a (group of) number(s). Return < 0 on error, number of parsed
1111 parse_number (struct state *state, const char *s)
1114 unsigned long v1, v2, v3 = 0;
1115 size_t n1, n2, n3 = 0;
1119 v1 = strtoul_len (p, &p, &n1);
1121 if (! is_sep (*p) || ! isdigit ((unsigned char) *(p + 1))) {
1122 /* A single number. */
1123 r = parse_single_number (state, v1, n1);
1131 v2 = strtoul_len (p + 1, &p, &n2);
1133 /* A group of two or three numbers? */
1134 if (*p == sep && isdigit ((unsigned char) *(p + 1)))
1135 v3 = strtoul_len (p + 1, &p, &n3);
1137 if (is_time_sep (sep))
1138 r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
1140 r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);
1149 * Parse delimiter(s). Throw away all except the last one, which is
1150 * stored for parsing the next non-delimiter. Return < 0 on error,
1151 * number of parsed chars on success.
1153 * XXX: We might want to be more strict here.
1156 parse_delim (struct state *state, const char *s)
1161 * Skip non-alpha and non-digit, and store the last for further
1164 while (*p && ! isalnum ((unsigned char) *p)) {
1165 set_delim (state, *p);
1173 * Parse a date/time string. Return < 0 on error, number of parsed
1177 parse_input (struct state *state, const char *s)
1184 if (isalpha ((unsigned char) *p)) {
1185 n = parse_keyword (state, p);
1186 } else if (isdigit ((unsigned char) *p)) {
1187 n = parse_number (state, p);
1189 n = parse_delim (state, p);
1194 n = -PARSE_TIME_ERR;
1202 /* Parse a previously postponed number, if any. */
1203 r = parse_postponed_number (state, TM_NONE);
1211 * Processing the parsed input.
1215 * Initialize reference time to tm. Use time zone in state if
1216 * specified, otherwise local time. Use now for reference time if
1217 * non-NULL, otherwise current time.
1220 initialize_now (struct state *state, const time_t *ref, struct tm *tm)
1227 if (time (&t) == (time_t) -1)
1228 return -PARSE_TIME_ERR_LIB;
1231 if (is_field_set (state, TM_TZ)) {
1232 /* Some other time zone. */
1234 /* Adjust now according to the TZ. */
1235 t += get_field (state, TM_TZ) * 60;
1237 /* It's not gm, but this doesn't mess with the TZ. */
1238 if (gmtime_r (&t, tm) == NULL)
1239 return -PARSE_TIME_ERR_LIB;
1242 if (localtime_r (&t, tm) == NULL)
1243 return -PARSE_TIME_ERR_LIB;
1250 * Normalize tm according to mktime(3); if structure members are
1251 * outside their valid interval, they will be normalized (so that, for
1252 * example, 40 October is changed into 9 November), and tm_wday and
1253 * tm_yday are set to values determined from the contents of the other
1256 * Both mktime(3) and localtime_r(3) use local time, but they cancel
1257 * each other out here, making this function agnostic to time zone.
1260 normalize_tm (struct tm *tm)
1262 time_t t = mktime (tm);
1264 if (t == (time_t) -1)
1265 return -PARSE_TIME_ERR_LIB;
1267 if (! localtime_r (&t, tm))
1268 return -PARSE_TIME_ERR_LIB;
1273 /* Get field out of a struct tm. */
1275 tm_get_field (const struct tm *tm, enum field field)
1278 case TM_ABS_SEC: return tm->tm_sec;
1279 case TM_ABS_MIN: return tm->tm_min;
1280 case TM_ABS_HOUR: return tm->tm_hour;
1281 case TM_ABS_MDAY: return tm->tm_mday;
1282 case TM_ABS_MON: return tm->tm_mon + 1; /* 0- to 1-based */
1283 case TM_ABS_YEAR: return 1900 + tm->tm_year;
1284 case TM_WDAY: return tm->tm_wday;
1285 case TM_ABS_ISDST: return tm->tm_isdst;
1294 /* Modify hour according to am/pm setting. */
1296 fixup_ampm (struct state *state)
1298 int hour, hdiff = 0;
1300 if (! is_field_set (state, TM_AMPM))
1303 if (! is_field_set (state, TM_ABS_HOUR))
1304 return -PARSE_TIME_ERR_TIMEFORMAT;
1306 hour = get_field (state, TM_ABS_HOUR);
1307 if (! is_valid_12hour (hour))
1308 return -PARSE_TIME_ERR_INVALIDTIME;
1310 if (get_field (state, TM_AMPM)) {
1315 /* 12am is midnight, beginning of day. */
1320 add_to_field (state, TM_REL_HOUR, -hdiff);
1325 /* Combine absolute and relative fields, and round. */
1327 create_output (struct state *state, time_t *t_out, const time_t *ref,
1330 struct tm tm = { .tm_isdst = -1 };
1335 int week_round = PARSE_TIME_NO_ROUND;
1337 r = initialize_now (state, ref, &now);
1341 /* Initialize fields flagged as "now" to reference time. */
1342 for (f = TM_ABS_SEC; f != TM_NONE; f = next_abs_field (f)) {
1343 if (state->set[f] == FIELD_NOW) {
1344 state->tm[f] = tm_get_field (&now, f);
1345 state->set[f] = FIELD_SET;
1350 * If WDAY is set but MDAY is not, we consider WDAY relative
1352 * XXX: This fails on stuff like "two months monday" because two
1353 * months ago wasn't the same day as today. Postpone until we know
1356 if (is_field_set (state, TM_WDAY) &&
1357 ! is_field_set (state, TM_ABS_MDAY)) {
1358 int wday = get_field (state, TM_WDAY);
1359 int today = tm_get_field (&now, TM_WDAY);
1363 rel_days = today - wday;
1365 rel_days = today + 7 - wday;
1367 /* This also prevents special week rounding from happening. */
1368 add_to_field (state, TM_REL_DAY, rel_days);
1370 unset_field (state, TM_WDAY);
1373 r = fixup_ampm (state);
1378 * Iterate fields from most accurate to least accurate, and set
1379 * unset fields according to requested rounding.
1381 for (f = TM_ABS_SEC; f != TM_NONE; f = next_abs_field (f)) {
1382 if (round != PARSE_TIME_NO_ROUND) {
1383 enum field r = abs_to_rel_field (f);
1385 if (is_field_set (state, f) || is_field_set (state, r)) {
1386 if (round >= PARSE_TIME_ROUND_UP && f != TM_ABS_SEC) {
1388 * This is the most accurate field
1389 * specified. Round up adjusting it towards
1392 add_to_field (state, r, -1);
1395 * Go back a second if the result is to be used
1396 * for inclusive comparisons.
1398 if (round == PARSE_TIME_ROUND_UP_INCLUSIVE)
1399 add_to_field (state, TM_REL_SEC, 1);
1401 round = PARSE_TIME_NO_ROUND; /* No more rounding. */
1403 if (f == TM_ABS_MDAY &&
1404 is_field_set (state, TM_REL_WEEK)) {
1405 /* Week is most accurate. */
1407 round = PARSE_TIME_NO_ROUND;
1409 set_field (state, f, get_field_epoch_value (f));
1414 if (! is_field_set (state, f))
1415 set_field (state, f, tm_get_field (&now, f));
1418 /* Special case: rounding with week accuracy. */
1419 if (week_round != PARSE_TIME_NO_ROUND) {
1420 /* Temporarily set more accurate fields to now. */
1421 set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
1422 set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
1423 set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
1424 set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
1428 * Set all fields. They may contain out of range values before
1429 * normalization by mktime(3).
1431 tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
1432 tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
1433 tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
1434 tm.tm_mday = get_field (state, TM_ABS_MDAY) -
1435 get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
1436 tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
1437 tm.tm_mon--; /* 1- to 0-based */
1438 tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;
1441 * It's always normal time.
1443 * XXX: This is probably not a solution that universally
1444 * works. Just make sure DST is not taken into account. We don't
1445 * want rounding to be affected by DST.
1449 /* Special case: rounding with week accuracy. */
1450 if (week_round != PARSE_TIME_NO_ROUND) {
1451 /* Normalize to get proper tm.wday. */
1452 r = normalize_tm (&tm);
1456 /* Set more accurate fields back to zero. */
1462 /* Monday is the true 1st day of week, but this is easier. */
1463 if (week_round >= PARSE_TIME_ROUND_UP) {
1464 tm.tm_mday += 7 - tm.tm_wday;
1465 if (week_round == PARSE_TIME_ROUND_UP_INCLUSIVE)
1468 tm.tm_mday -= tm.tm_wday;
1472 if (is_field_set (state, TM_TZ)) {
1473 /* tm is in specified TZ, convert to UTC for timegm(3). */
1474 tm.tm_min -= get_field (state, TM_TZ);
1477 /* tm is in local time. */
1481 if (t == (time_t) -1)
1482 return -PARSE_TIME_ERR_LIB;
1489 /* Internally, all errors are < 0. parse_time_string() returns errors > 0. */
1490 #define EXTERNAL_ERR(r) (-r)
1493 parse_time_string (const char *s, time_t *t, const time_t *ref, int round)
1495 struct state state = { .last_field = TM_NONE };
1499 return EXTERNAL_ERR (-PARSE_TIME_ERR);
1501 r = parse_input (&state, s);
1503 return EXTERNAL_ERR (r);
1505 r = create_output (&state, t, ref, round);
1507 return EXTERNAL_ERR (r);