1 /* regexp-fields.cc - field processor glue for regex supporting fields
3 * This file is part of notmuch.
5 * Copyright © 2015 Austin Clements
6 * Copyright © 2016 David Bremner
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see https://www.gnu.org/licenses/ .
21 * Author: Austin Clements <aclements@csail.mit.edu>
22 * David Bremner <david@tethera.net>
25 #include "regexp-fields.h"
26 #include "notmuch-private.h"
27 #include "database-private.h"
28 #include "xapian-extra.h"
31 compile_regex (regex_t ®exp, const char *str, std::string &msg)
33 int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
36 size_t len = regerror (err, ®exp, NULL, 0);
37 char *buffer = new char[len];
38 msg = "Regexp error: ";
39 (void) regerror (err, ®exp, buffer, len);
40 msg.append (buffer, len);
43 return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
45 return NOTMUCH_STATUS_SUCCESS;
48 RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
52 notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
55 throw Xapian::QueryParserError (msg);
58 RegexpPostingSource::~RegexpPostingSource ()
64 RegexpPostingSource::init (const Xapian::Database &db)
67 it_ = db_.valuestream_begin (slot_);
68 end_ = db.valuestream_end (slot_);
73 RegexpPostingSource::get_termfreq_min () const
79 RegexpPostingSource::get_termfreq_est () const
81 return get_termfreq_max () / 2;
85 RegexpPostingSource::get_termfreq_max () const
87 return db_.get_value_freq (slot_);
91 RegexpPostingSource::get_docid () const
93 return it_.get_docid ();
97 RegexpPostingSource::at_end () const
103 RegexpPostingSource::next (unused (double min_wt))
105 if (started_ && ! at_end ())
109 for (; ! at_end (); ++it_) {
110 std::string value = *it_;
111 if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
117 RegexpPostingSource::skip_to (Xapian::docid did, unused (double min_wt))
121 for (; ! at_end (); ++it_) {
122 std::string value = *it_;
123 if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
129 RegexpPostingSource::check (Xapian::docid did, unused (double min_wt))
132 if (! it_.check (did) || at_end ())
134 return (regexec (®exp_, (*it_).c_str (), 0, NULL, 0) == 0);
137 static inline Xapian::valueno
138 _find_slot (std::string prefix)
140 if (prefix == "from")
141 return NOTMUCH_VALUE_FROM;
142 else if (prefix == "subject")
143 return NOTMUCH_VALUE_SUBJECT;
144 else if (prefix == "mid")
145 return NOTMUCH_VALUE_MESSAGE_ID;
147 return Xapian::BAD_VALUENO;
150 RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
151 notmuch_field_flag_t options_,
152 Xapian::QueryParser &parser_,
153 notmuch_database_t *notmuch_)
154 : slot (_find_slot (field_)),
156 term_prefix (_find_prefix (field_.c_str ())),
164 _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
165 std::string regexp_str,
166 Xapian::Query &output, std::string &msg)
169 notmuch_status_t status;
171 status = compile_regex (regexp, regexp_str.c_str (), msg);
173 _notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ());
177 if (slot == Xapian::BAD_VALUENO)
178 slot = _find_slot (field);
180 if (slot == Xapian::BAD_VALUENO) {
181 std::string term_prefix = _find_prefix (field.c_str ());
182 std::vector<std::string> terms;
184 for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
185 it != notmuch->xapian_db->allterms_end (); ++it) {
186 if (regexec (®exp, (*it).c_str () + term_prefix.size (),
188 terms.push_back (*it);
190 output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
192 RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
193 output = Xapian::Query (postings->release ());
195 return NOTMUCH_STATUS_SUCCESS;
199 RegexpFieldProcessor::operator() (const std::string & str)
202 if (options & NOTMUCH_FIELD_PROBABILISTIC) {
203 return Xapian::Query (Xapian::Query::OP_AND_NOT,
204 xapian_query_match_all (),
205 Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
207 return Xapian::Query (term_prefix);
211 if (str.at (0) == '/') {
212 if (str.length () > 1 && str.at (str.size () - 1) == '/') {
214 std::string regexp_str = str.substr (1, str.size () - 2);
216 notmuch_status_t status;
218 status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg);
220 throw Xapian::QueryParserError (msg);
223 throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
226 if (options & NOTMUCH_FIELD_PROBABILISTIC) {
227 /* TODO replace this with a nicer API level triggering of
228 * phrase parsing, when possible */
229 std::string query_str;
231 if ((str.at (0) != '(' || *str.rbegin () != ')') &&
232 (*str.rbegin () != '*' || str.find (' ') != std::string::npos))
233 query_str = '"' + str + '"';
237 return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
240 std::string query_str;
243 if (str.length () > 1 && str.at (str.size () - 1) == '/')
244 query_str = str.substr (0, str.size () - 1);
248 term = term_prefix + query_str;
249 return Xapian::Query (term);