X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fparse-sexp.cc;h=9f6e0b77f268c19e63e3cb026a5ace2453dccf0f;hb=afe85e65786df1df9abf261393b4b4e6e2e86009;hp=66dbdb4155b619c8974ac282bdf38f90614fde20;hpb=be7e83de96b706af418fc9f139ded4d50bf342f6;p=notmuch diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 66dbdb41..9f6e0b77 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -1,25 +1,284 @@ -#include -#include "notmuch-private.h" +#include "database-private.h" #if HAVE_SFSEXP #include "sexp.h" - +#include "unicode-util.h" /* _sexp is used for file scope symbols to avoid clashing with * definitions from sexp.h */ +typedef enum { + SEXP_FLAG_NONE = 0, + SEXP_FLAG_FIELD = 1 << 0, + SEXP_FLAG_BOOLEAN = 1 << 1, + SEXP_FLAG_SINGLE = 1 << 2, + SEXP_FLAG_WILDCARD = 1 << 3, + SEXP_FLAG_REGEX = 1 << 4, + SEXP_FLAG_DO_REGEX = 1 << 5, + SEXP_FLAG_EXPAND = 1 << 6, + SEXP_FLAG_DO_EXPAND = 1 << 7, +} _sexp_flag_t; + +/* + * define bitwise operators to hide casts */ + +inline _sexp_flag_t +operator| (_sexp_flag_t a, _sexp_flag_t b) +{ + return static_cast<_sexp_flag_t>( + static_cast(a) | static_cast(b)); +} + +inline _sexp_flag_t +operator& (_sexp_flag_t a, _sexp_flag_t b) +{ + return static_cast<_sexp_flag_t>( + static_cast(a) & static_cast(b)); +} + +typedef struct { + const char *name; + Xapian::Query::op xapian_op; + Xapian::Query initial; + _sexp_flag_t flags; +} _sexp_prefix_t; + +static _sexp_prefix_t prefixes[] = +{ + { "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_NONE }, + { "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND }, + { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, + { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, + { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "matching", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_DO_EXPAND }, + { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, + { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND }, + { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, + SEXP_FLAG_NONE }, + { "of", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_DO_EXPAND }, + { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_NONE }, + { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, + { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX }, + { "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX }, + { "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE }, + { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND }, + { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND }, + { } +}; + +static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch, + const _sexp_prefix_t *parent, + const sexp_t *sx, + Xapian::Query &output); + +static notmuch_status_t +_sexp_combine_query (notmuch_database_t *notmuch, + const _sexp_prefix_t *parent, + Xapian::Query::op operation, + Xapian::Query left, + const sexp_t *sx, + Xapian::Query &output) +{ + Xapian::Query subquery; + + notmuch_status_t status; + + /* if we run out elements, return accumulator */ + + if (! sx) { + output = left; + return NOTMUCH_STATUS_SUCCESS; + } + + status = _sexp_to_xapian_query (notmuch, parent, sx, subquery); + if (status) + return status; + + return _sexp_combine_query (notmuch, + parent, + operation, + Xapian::Query (operation, left, subquery), + sx->next, output); +} + +static notmuch_status_t +_sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output) +{ + Xapian::Utf8Iterator p (phrase); + Xapian::Utf8Iterator end; + std::vector terms; + + while (p != end) { + Xapian::Utf8Iterator start; + while (p != end && ! Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p == end) + break; + + start = p; + + while (p != end && Xapian::Unicode::is_wordchar (*p)) + p++; + + if (p != start) { + std::string word (start, p); + word = Xapian::Unicode::tolower (word); + terms.push_back (term_prefix + word); + } + } + output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ()); + return NOTMUCH_STATUS_SUCCESS; +} + +static notmuch_status_t +_sexp_parse_wildcard (notmuch_database_t *notmuch, + const _sexp_prefix_t *parent, + std::string match, + Xapian::Query &output) +{ + + std::string term_prefix = parent ? _find_prefix (parent->name) : ""; + + if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) { + _notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + output = Xapian::Query (Xapian::Query::OP_WILDCARD, + term_prefix + Xapian::Unicode::tolower (match)); + return NOTMUCH_STATUS_SUCCESS; +} + +static notmuch_status_t +_sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, const sexp_t *sx, + Xapian::Query &output) +{ + Xapian::Stem stem = *(notmuch->stemmer); + + if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) { + std::string term = Xapian::Unicode::tolower (sx->val); + + output = Xapian::Query ("Z" + term_prefix + stem (term)); + return NOTMUCH_STATUS_SUCCESS; + } else { + return _sexp_parse_phrase (term_prefix, sx->val, output); + } + +} + +notmuch_status_t +_sexp_parse_regex (notmuch_database_t *notmuch, + const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent, + std::string val, Xapian::Query &output) +{ + if (! parent) { + _notmuch_database_log (notmuch, "illegal '%s' outside field\n", + prefix->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + if (! (parent->flags & SEXP_FLAG_REGEX)) { + _notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n", + prefix->name, parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + std::string msg; /* ignored */ + + return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name, + val, output, msg); +} + + +static notmuch_status_t +_sexp_expand_query (notmuch_database_t *notmuch, + const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent, + const sexp_t *sx, Xapian::Query &output) +{ + Xapian::Query subquery; + notmuch_status_t status; + std::string msg; + + if (! (parent->flags & SEXP_FLAG_EXPAND)) { + _notmuch_database_log (notmuch, "'%s' unsupported inside '%s'\n", prefix->name, parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + status = _sexp_combine_query (notmuch, NULL, prefix->xapian_op, prefix->initial, sx, subquery); + if (status) + return status; + + status = _notmuch_query_expand (notmuch, parent->name, subquery, output, msg); + if (status) { + _notmuch_database_log (notmuch, "error expanding query %s\n", msg.c_str ()); + } + return status; +} + /* Here we expect the s-expression to be a proper list, with first * element defining and operation, or as a special case the empty * list */ static notmuch_status_t -_sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx, +_sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx, Xapian::Query &output) { - if (sx->ty == SEXP_VALUE) { - output = Xapian::Query (Xapian::Unicode::tolower (sx->val)); - return NOTMUCH_STATUS_SUCCESS; + std::string term_prefix = parent ? _find_prefix (parent->name) : ""; + + if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) { + return _sexp_parse_wildcard (notmuch, parent, "", output); + } + + if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) { + output = Xapian::Query (term_prefix + sx->val); + return NOTMUCH_STATUS_SUCCESS; + } + if (parent) { + return _sexp_parse_one_term (notmuch, term_prefix, sx, output); + } else { + Xapian::Query accumulator; + for (_sexp_prefix_t *prefix = prefixes; prefix->name; prefix++) { + if (prefix->flags & SEXP_FLAG_FIELD) { + notmuch_status_t status; + Xapian::Query subquery; + term_prefix = _find_prefix (prefix->name); + status = _sexp_parse_one_term (notmuch, term_prefix, sx, subquery); + if (status) + return status; + accumulator = Xapian::Query (Xapian::Query::OP_OR, accumulator, subquery); + } + } + output = accumulator; + return NOTMUCH_STATUS_SUCCESS; + } } /* Empty list */ @@ -28,11 +287,47 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx, return NOTMUCH_STATUS_SUCCESS; } - if (sx->list->ty == SEXP_VALUE) - _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val); - else + if (sx->list->ty == SEXP_LIST) { _notmuch_database_log (notmuch, "unexpected list in field/operation position\n", sx->list->val); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) { + if (strcmp (prefix->name, sx->list->val) == 0) { + if (prefix->flags & SEXP_FLAG_FIELD) { + if (parent) { + _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n", + prefix->name, parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + parent = prefix; + } + + if ((prefix->flags & SEXP_FLAG_SINGLE) && + (! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) { + _notmuch_database_log (notmuch, "'%s' expects single atom as argument\n", + prefix->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + if (prefix->xapian_op == Xapian::Query::OP_WILDCARD) + return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output); + + if (prefix->flags & SEXP_FLAG_DO_REGEX) { + return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output); + } + + if (prefix->flags & SEXP_FLAG_DO_EXPAND) { + return _sexp_expand_query (notmuch, prefix, parent, sx->list->next, output); + } + + return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, + sx->list->next, output); + } + } + + _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val); return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; } @@ -50,6 +345,6 @@ _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *q return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; } - return _sexp_to_xapian_query (notmuch, sx, output); + return _sexp_to_xapian_query (notmuch, NULL, sx, output); } #endif