From 8322f536f5d304cc10caa2e061a36df0aa1996c4 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 24 Aug 2021 08:17:23 -0700 Subject: [PATCH] lib/parse-sexp: add term prefix backed fields We use "boolean" to describe fields that should generate terms literally without stemming or phrase splitting. This terminology might not be ideal but it is already enshrined in notmuch-search-terms(7). --- doc/man7/notmuch-sexp-queries.rst | 18 +++++- lib/parse-sexp.cc | 49 ++++++++++++++++ test/T081-sexpr-search.sh | 94 +++++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+), 1 deletion(-) diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst index b763876d..6e68fcc3 100644 --- a/doc/man7/notmuch-sexp-queries.rst +++ b/doc/man7/notmuch-sexp-queries.rst @@ -81,6 +81,14 @@ string) into words, ignore punctuation. Phrase splitting is applied to terms in phrase (probabilistic) fields. Both phrase splitting and stemming apply only in phrase fields. +Each term or phrase field has an associated combining operator +(``and`` or ``or``) used to combine the queries from each element of +the tail of the list. This is generally ``or`` for those fields where +a message has one such attribute, and ``and`` otherwise. + +Term or phrase fields can contain arbitrarily complex queries made up +from terms, operators, and modifiers, but not other fields. + .. _field-table: .. table:: Fields with supported modifiers @@ -112,7 +120,7 @@ stemming apply only in phrase fields. +------------+-----------+-----------+-----------+-----------+----------+ | mimetype | or | phrase | yes | yes | no | +------------+-----------+-----------+-----------+-----------+----------+ - | path | or | term | yes | yes | yes | + | path | or | term | no | yes | yes | +------------+-----------+-----------+-----------+-----------+----------+ | property | and | term | yes | yes | yes | +------------+-----------+-----------+-----------+-----------+----------+ @@ -151,10 +159,18 @@ EXAMPLES Match the *phrase* "quick" followed by "fox" in phrase fields (or outside a field). Match the literal string in a term field. +``(id 1234@invalid blah@test)`` + Matches Message-Id "1234@invalid" *or* Message-Id "blah@test" + ``(subject quick "brown fox")`` Match messages whose subject contains "quick" (anywhere, stemmed) and the phrase "brown fox". +``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))`` + Match in the "To" or "Cc" headers, "bob@example.com", + "mallory@example.org", and also "bob@example.com.au" since it + contains the adjacent triple "bob", "example", "com". + NOTES ===== diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 0917f505..26b7e5f1 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -10,8 +10,26 @@ typedef enum { SEXP_FLAG_NONE = 0, SEXP_FLAG_FIELD = 1 << 0, + SEXP_FLAG_BOOLEAN = 1 << 1, } _sexp_flag_t; +/* + * define bitwise operators to hide casts */ + +inline _sexp_flag_t +operator| (_sexp_flag_t a, _sexp_flag_t b) +{ + return static_cast<_sexp_flag_t>( + static_cast(a) | static_cast(b)); +} + +inline _sexp_flag_t +operator& (_sexp_flag_t a, _sexp_flag_t b) +{ + return static_cast<_sexp_flag_t>( + static_cast(a) & static_cast(b)); +} + typedef struct { const char *name; Xapian::Query::op xapian_op; @@ -23,12 +41,39 @@ static _sexp_prefix_t prefixes[] = { { "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_NONE }, + { "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, + { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, + { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, + { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, SEXP_FLAG_NONE }, { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, SEXP_FLAG_NONE }, + { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD + | SEXP_FLAG_BOOLEAN }, { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD }, + { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll, + SEXP_FLAG_FIELD }, { } }; @@ -110,6 +155,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent std::string term = Xapian::Unicode::tolower (sx->val); Xapian::Stem stem = *(notmuch->stemmer); std::string term_prefix = parent ? _find_prefix (parent->name) : ""; + if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) { + output = Xapian::Query (term_prefix + sx->val); + return NOTMUCH_STATUS_SUCCESS; + } if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) { output = Xapian::Query ("Z" + term_prefix + stem (term)); return NOTMUCH_STATUS_SUCCESS; diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index 4a051a50..96d58ee2 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -101,6 +101,99 @@ thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread) EOF test_expect_equal_file EXPECTED OUTPUT +test_begin_subtest "Search by 'attachment'" +notmuch search attachment:notmuch-help.patch > EXPECTED +notmuch search --query=sexp '(attachment notmuch-help.patch)' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'body'" +add_message '[subject]="body search"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [body]=bodysearchtest +output=$(notmuch search --query=sexp '(body bodysearchtest)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)" + +test_begin_subtest "Search by 'body' (phrase)" +add_message '[subject]="body search (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="body search (phrase)"' +add_message '[subject]="negative result"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="This phrase should not match the body search"' +output=$(notmuch search --query=sexp '(body "body search phrase")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (phrase) (inbox unread)" + +test_begin_subtest "Search by 'body' (utf-8):" +add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="message body utf8: bödý"' +output=$(notmuch search --query=sexp '(body bödý)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)" + +test_begin_subtest "Search by 'from'" +add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom +output=$(notmuch search --query=sexp '(from searchbyfrom)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom; search by from (inbox unread)" + +test_begin_subtest "Search by 'from' (address)" +add_message '[subject]="search by from (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom@example.com +output=$(notmuch search --query=sexp '(from searchbyfrom@example.com)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom@example.com; search by from (address) (inbox unread)" + +test_begin_subtest "Search by 'from' (name)" +add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[from]="Search By From Name "' +output=$(notmuch search --query=sexp '(from "Search By From Name")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" + +test_begin_subtest "Search by 'from' (name and address)" +output=$(notmuch search --query=sexp '(from "Search By From Name ")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" + +add_message '[dir]=bad' '[subject]="To the bone"' +add_message '[dir]=.' '[subject]="Top level"' +add_message '[dir]=bad/news' '[subject]="Bears"' +mkdir -p "${MAIL_DIR}/duplicate/bad/news" +cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news" + +add_message '[dir]=things' '[subject]="These are a few"' +add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"' +add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"' + +test_begin_subtest "Search by 'folder' (multiple)" +output=$(notmuch search --query=sexp '(folder bad bad/news things/bad)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) +thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" + +test_begin_subtest "Search by 'folder': top level." +notmuch search folder:'""' > EXPECTED +notmuch search --query=sexp '(folder "")' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'id'" +add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +output=$(notmuch search --query=sexp "(id ${gen_msg_id})" | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)" + +test_begin_subtest "Search by 'id' (or)" +add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +output=$(notmuch search --query=sexp "(id non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)" + +test_begin_subtest "Search by 'is' (multiple)" +notmuch tag -inbox tag:searchbytag +notmuch search is:inbox AND is:unread | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(is inbox unread)' | notmuch_search_sanitize > OUTPUT +notmuch tag +inbox tag:searchbytag +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'mid'" +add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +output=$(notmuch search --query=sexp "(mid ${gen_msg_id})" | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)" + +test_begin_subtest "Search by 'mid' (or)" +add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +output=$(notmuch search --query=sexp "(mid non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)" + +test_begin_subtest "Search by 'mimetype'" +notmuch search mimetype:text/html > EXPECTED +notmuch search --query=sexp '(mimetype text html)' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + test_begin_subtest "Search by 'subject' (utf-8, phrase-token):" output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)" @@ -118,6 +211,7 @@ notmuch search --query=sexp '(subject (or utf8 "compatibility issues"))' | notmu cat < EXPECTED thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread) thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread) +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread) EOF test_expect_equal_file EXPECTED OUTPUT -- 2.43.0