]> git.cworth.org Git - notmuch-old/blob - lib/parse-sexp.cc
lib/parse-sexp: 'starts-with' wildcard searches
[notmuch-old] / lib / parse-sexp.cc
1 #include "database-private.h"
2
3 #if HAVE_SFSEXP
4 #include "sexp.h"
5 #include "unicode-util.h"
6
7 /* _sexp is used for file scope symbols to avoid clashing with
8  * definitions from sexp.h */
9
10 typedef enum {
11     SEXP_FLAG_NONE      = 0,
12     SEXP_FLAG_FIELD     = 1 << 0,
13     SEXP_FLAG_BOOLEAN   = 1 << 1,
14     SEXP_FLAG_SINGLE    = 1 << 2,
15     SEXP_FLAG_WILDCARD  = 1 << 3,
16 } _sexp_flag_t;
17
18 /*
19  * define bitwise operators to hide casts */
20
21 inline _sexp_flag_t
22 operator| (_sexp_flag_t a, _sexp_flag_t b)
23 {
24     return static_cast<_sexp_flag_t>(
25         static_cast<unsigned>(a) | static_cast<unsigned>(b));
26 }
27
28 inline _sexp_flag_t
29 operator& (_sexp_flag_t a, _sexp_flag_t b)
30 {
31     return static_cast<_sexp_flag_t>(
32         static_cast<unsigned>(a) & static_cast<unsigned>(b));
33 }
34
35 typedef struct  {
36     const char *name;
37     Xapian::Query::op xapian_op;
38     Xapian::Query initial;
39     _sexp_flag_t flags;
40 } _sexp_prefix_t;
41
42 static _sexp_prefix_t prefixes[] =
43 {
44     { "and",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
45       SEXP_FLAG_NONE },
46     { "attachment",     Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
47       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
48     { "body",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
49       SEXP_FLAG_FIELD },
50     { "from",           Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
51       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
52     { "folder",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
53       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
54     { "id",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
55       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
56     { "is",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
57       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
58     { "mid",            Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
59       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
60     { "mimetype",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
61       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
62     { "not",            Xapian::Query::OP_AND_NOT,      Xapian::Query::MatchAll,
63       SEXP_FLAG_NONE },
64     { "or",             Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
65       SEXP_FLAG_NONE },
66     { "path",           Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
67       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
68     { "property",       Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
69       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
70     { "starts-with",    Xapian::Query::OP_WILDCARD,     Xapian::Query::MatchAll,
71       SEXP_FLAG_SINGLE },
72     { "subject",        Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
73       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
74     { "tag",            Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
75       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
76     { "thread",         Xapian::Query::OP_OR,           Xapian::Query::MatchNothing,
77       SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
78     { "to",             Xapian::Query::OP_AND,          Xapian::Query::MatchAll,
79       SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
80     { }
81 };
82
83 static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch,
84                                                const _sexp_prefix_t *parent,
85                                                const sexp_t *sx,
86                                                Xapian::Query &output);
87
88 static notmuch_status_t
89 _sexp_combine_query (notmuch_database_t *notmuch,
90                      const _sexp_prefix_t *parent,
91                      Xapian::Query::op operation,
92                      Xapian::Query left,
93                      const sexp_t *sx,
94                      Xapian::Query &output)
95 {
96     Xapian::Query subquery;
97
98     notmuch_status_t status;
99
100     /* if we run out elements, return accumulator */
101
102     if (! sx) {
103         output = left;
104         return NOTMUCH_STATUS_SUCCESS;
105     }
106
107     status = _sexp_to_xapian_query (notmuch, parent, sx, subquery);
108     if (status)
109         return status;
110
111     return _sexp_combine_query (notmuch,
112                                 parent,
113                                 operation,
114                                 Xapian::Query (operation, left, subquery),
115                                 sx->next, output);
116 }
117
118 static notmuch_status_t
119 _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &output)
120 {
121     Xapian::Utf8Iterator p (phrase);
122     Xapian::Utf8Iterator end;
123     std::vector<std::string> terms;
124
125     while (p != end) {
126         Xapian::Utf8Iterator start;
127         while (p != end && ! Xapian::Unicode::is_wordchar (*p))
128             p++;
129
130         if (p == end)
131             break;
132
133         start = p;
134
135         while (p != end && Xapian::Unicode::is_wordchar (*p))
136             p++;
137
138         if (p != start) {
139             std::string word (start, p);
140             word = Xapian::Unicode::tolower (word);
141             terms.push_back (term_prefix + word);
142         }
143     }
144     output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), terms.end ());
145     return NOTMUCH_STATUS_SUCCESS;
146 }
147
148 static notmuch_status_t
149 _sexp_parse_wildcard (notmuch_database_t *notmuch,
150                       const _sexp_prefix_t *parent,
151                       std::string match,
152                       Xapian::Query &output)
153 {
154
155     std::string term_prefix = parent ? _find_prefix (parent->name) : "";
156
157     if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
158         _notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
159         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
160     }
161
162     output = Xapian::Query (Xapian::Query::OP_WILDCARD,
163                             term_prefix + Xapian::Unicode::tolower (match));
164     return NOTMUCH_STATUS_SUCCESS;
165 }
166
167 /* Here we expect the s-expression to be a proper list, with first
168  * element defining and operation, or as a special case the empty
169  * list */
170
171 static notmuch_status_t
172 _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
173                        Xapian::Query &output)
174 {
175     if (sx->ty == SEXP_VALUE) {
176         std::string term = Xapian::Unicode::tolower (sx->val);
177         Xapian::Stem stem = *(notmuch->stemmer);
178         std::string term_prefix = parent ? _find_prefix (parent->name) : "";
179         if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
180             output = Xapian::Query (term_prefix + sx->val);
181             return NOTMUCH_STATUS_SUCCESS;
182         }
183         if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
184             output = Xapian::Query ("Z" + term_prefix + stem (term));
185             return NOTMUCH_STATUS_SUCCESS;
186         } else {
187             return _sexp_parse_phrase (term_prefix, sx->val, output);
188         }
189     }
190
191     /* Empty list */
192     if (! sx->list) {
193         output = Xapian::Query::MatchAll;
194         return NOTMUCH_STATUS_SUCCESS;
195     }
196
197     if (sx->list->ty == SEXP_LIST) {
198         _notmuch_database_log (notmuch, "unexpected list in field/operation position\n",
199                                sx->list->val);
200         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
201     }
202
203     for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
204         if (strcmp (prefix->name, sx->list->val) == 0) {
205             if (prefix->flags & SEXP_FLAG_FIELD) {
206                 if (parent) {
207                     _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
208                                            prefix->name, parent->name);
209                     return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
210                 }
211                 parent = prefix;
212             }
213
214             if ((prefix->flags & SEXP_FLAG_SINGLE) &&
215                 (! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
216                 _notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
217                                        prefix->name);
218                 return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
219             }
220
221             if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
222                 return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
223
224             return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
225                                         sx->list->next, output);
226         }
227     }
228
229     _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
230
231     return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
232 }
233
234 notmuch_status_t
235 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
236                                       Xapian::Query &output)
237 {
238     const sexp_t *sx = NULL;
239     char *buf = talloc_strdup (notmuch, querystr);
240
241     sx = parse_sexp (buf, strlen (querystr));
242     if (! sx) {
243         _notmuch_database_log (notmuch, "invalid s-expression: '%s'\n", querystr);
244         return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
245     }
246
247     return _sexp_to_xapian_query (notmuch, NULL, sx, output);
248 }
249 #endif