]> git.cworth.org Git - notmuch/commitdiff
lib: replace some uses of Query::MatchAll with a thread-safe alternative
authorKevin Boulain <kevin@boula.in>
Thu, 2 Mar 2023 17:59:15 +0000 (18:59 +0100)
committerDavid Bremner <david@tethera.net>
Fri, 31 Mar 2023 11:11:39 +0000 (08:11 -0300)
This replaces two instances of Xapian::Query::MatchAll with the
equivalent but thread-safe alternative Xapian::Query(std::string()).
Xapian::Query::MatchAll maintains an internal pointer to a refcounted
Xapian::Internal::QueryTerm.

None of this is thread-safe but that wouldn't be an issue if
Xapian::Query::MatchAll wasn't static. Because it's static, the
refcounting goes awry when Notmuch is called from multiple threads.
This is actually documented by Xapian:
https://github.com/xapian/xapian/blob/4715de3a9fcee741587439dc3cc1d2ff01ffeaf2/xapian-core/include/xapian/query.h#L65

While static, Xapian::Query::MatchNothing is safe because it doesn't
maintain an internal object and as such, doesn't use references.

Two best-effort tests making use of TSan were added to showcase the
issue (I couldn't figure out a way to deterministically reproduce it
without making an unmaintainable mess).

First, when two databases are created in parallel, a query that uses
Xapian::Query::MatchAll is made (lib/query.cc), resulting in the
following backtrace on a segfault:
  #0  0x00007ffff76822af in Xapian::Query::get_terms_begin (this=0x7fffe80137f0) at api/query.cc:141
  #1  0x00007ffff7f933f5 in _notmuch_query_cache_terms (query=0x7fffe80137c0) at lib/query.cc:176
  #2  0x00007ffff7f93784 in _notmuch_query_ensure_parsed_xapian (query=0x7fffe80137c0) at lib/query.cc:225
  #3  0x00007ffff7f9381a in _notmuch_query_ensure_parsed (query=0x7fffe80137c0) at lib/query.cc:260
  #4  0x00007ffff7f93bfe in _notmuch_query_search_documents (query=0x7fffe80137c0, type=0x7ffff7fa9b1e "mail", out=0x7ffff666da18) at lib/query.cc:361
  #5  0x00007ffff7f93ba4 in notmuch_query_search_messages (query=0x7fffe80137c0, out=0x7ffff666da18) at lib/query.cc:349
  #6  0x00007ffff7f83d98 in notmuch_database_upgrade (notmuch=0x7fffe8000bd0, progress_notify=0x0, closure=0x0) at lib/database.cc:934
  #7  0x00007ffff7fa110f in notmuch_database_create_with_config (database_path=0x7ffff666dcb0 "/tmp/notmuch.MZ2AGr", config_path=0x7ffff7faab3c "", profile=0x0, database=0x0, status_string=0x7ffff666dc90) at lib/open.cc:754
  #8  0x00007ffff7fa0d6f in notmuch_database_create_verbose (path=0x7ffff666dcb0 "/tmp/notmuch.MZ2AGr", database=0x0, status_string=0x7ffff666dc90) at lib/open.cc:653
  #9  0x00007ffff7fa0ceb in notmuch_database_create (path=0x7ffff666dcb0 "/tmp/notmuch.MZ2AGr", database=0x0) at lib/open.cc:637
  ...

Second, some queries would make use of Xapian::Query::MatchAll
(lib/regexp-fields.cc), resulting in the following backtrace on a
segfault:
  #0  0x00007f629828b690 in Xapian::Internal::QueryBranch::gather_terms (this=0x7f628800def0, void_terms=0x7f629726d5a0) at api/queryinternal.cc:1245
  #1  0x00007f629828c260 in Xapian::Internal::QueryScaleWeight::gather_terms (this=0x7f628800df70, void_terms=0x7f629726d5a0) at api/queryinternal.cc:1434
  #2  0x00007f629828b69f in Xapian::Internal::QueryBranch::gather_terms (this=0x7f628800dd90, void_terms=0x7f629726d5a0) at api/queryinternal.cc:1245
  #3  0x00007f6298282571 in Xapian::Query::get_unique_terms_begin (this=0x7f628800dcd8) at api/query.cc:166
  #4  0x00007f629841a59b in Xapian::Weight::Internal::accumulate_stats (this=0x7f628800dca0, subdb=..., rset=...) at weight/weightinternal.cc:86
  #5  0x00007f62983c15ba in LocalSubMatch::prepare_match (this=0x7f628800df20, nowait=true, total_stats=...) at matcher/localsubmatch.cc:172
  #6  0x00007f62983c8fcc in prepare_sub_matches (leaves=std::vector of length 1, capacity 1 = {...}, stats=...) at matcher/multimatch.cc:237
  #7  0x00007f62983c98a3 in MultiMatch::MultiMatch (this=0x7f629726d9a0, db_=..., query_=..., qlen=3, omrset=0x0, collapse_max_=0, collapse_key_=4294967295, percent_cutoff_=0, weight_cutoff_=0, order_=Xapian::Enquire::ASCENDING, sort_key_=0, sort_by_=Xapian::Enquire::Internal::VAL, sort_value_forward_=true, time_limit_=0, stats=..., weight_=0x7f6288008d50, matchspies_=std::vector of length 0, capacity 0, have_sorter=false, have_mdecider=false) at matcher/multimatch.cc:353
  #8  0x00007f629826fcba in Xapian::Enquire::Internal::get_mset (this=0x7f628800e0b0, first=0, maxitems=0, check_at_least=0, rset=0x0, mdecider=0x0) at api/omenquire.cc:569
  #9  0x00007f629827181c in Xapian::Enquire::get_mset (this=0x7f629726db80, first=0, maxitems=0, check_at_least=0, rset=0x0, mdecider=0x0) at api/omenquire.cc:937
  #10 0x00007f6298be529a in _notmuch_query_search_documents (query=0x7f6288009750, type=0x7f6298bfaafe "mail", out=0x7f629726dcc0) at lib/query.cc:447
  #11 0x00007f6298be4ae8 in notmuch_query_search_messages (query=0x7f6288009750, out=0x7f629726dcc0) at lib/query.cc:349
  ...

Printing Xapian::Query::MatchAll->internal.px->_refs in these
circumstances can help quickly identifying this scenario.

This is motivated by some test frameworks (like Rust's Cargo) that
runs unit tests in parallel and would easily encounter this issue,
unless client code gates every call to Notmuch behind a lock.

This is what can be expected from the tests when they fail:
   == stderr ==
  +==================
  +WARNING: ThreadSanitizer: data race (pid=207931)
  +  Read of size 1 at 0x7b10000001a0 by thread T2:
  +    #0 memcpy <null> (libtsan.so.2+0x62506)
  +    #1 void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<char*>(char*, char*, std::forward_iterator_tag) [clone .isra.0] <null> (libxapian.so.30+0x872b3)
  +
  +  Previous write of size 8 at 0x7b10000001a0 by thread T1:
  +    #0 operator new(unsigned long) <null> (libtsan.so.2+0x8ba83)
  +    #1 Xapian::Query::Query(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, unsigned int, unsigned int) <null> (libxapian.so.30+0x855cd)
  ...

configure
lib/query.cc
lib/regexp-fields.cc
test/T800-asan.sh
test/T810-tsan.sh [new file with mode: 0755]
test/T810-tsan.suppressions [new file with mode: 0644]
util/xapian-extra.h [new file with mode: 0644]

index c3629a73a4f3536993329ab1770a6622ab6fcb93..7afd08c7dac7822b74cf3315b5ba6adacbdb1de7 100755 (executable)
--- a/configure
+++ b/configure
@@ -422,6 +422,18 @@ else
 fi
 unset test_cmdline
 
+printf "C compiler supports thread sanitizer... "
+test_cmdline="${CC} ${CFLAGS} ${CPPFLAGS} -fsanitize=thread minimal.c ${LDFLAGS} -o minimal"
+if ${test_cmdline} >/dev/null 2>&1 && ./minimal
+then
+    printf "Yes.\n"
+    have_tsan=1
+else
+    printf "Nope, skipping those tests.\n"
+    have_tsan=0
+fi
+unset test_cmdline
+
 printf "Reading libnotmuch version from source... "
 cat > _libversion.c <<EOF
 #include <stdio.h>
@@ -1590,8 +1602,9 @@ NOTMUCH_GMIME_VERIFY_WITH_SESSION_KEY=${gmime_verify_with_session_key}
 NOTMUCH_ZLIB_CFLAGS="${zlib_cflags}"
 NOTMUCH_ZLIB_LDFLAGS="${zlib_ldflags}"
 
-# Does the C compiler support the address sanitizer
+# Does the C compiler support the sanitizers
 NOTMUCH_HAVE_ASAN=${have_asan}
+NOTMUCH_HAVE_TSAN=${have_tsan}
 
 # do we have man pages?
 NOTMUCH_HAVE_MAN=$((have_sphinx))
index 707f6222b16bd001c17097439e49804a6babc4ef..1c60c122c8e9eedd22b2e5481e8e189719551994 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "notmuch-private.h"
 #include "database-private.h"
+#include "xapian-extra.h"
 
 #include <glib.h> /* GHashTable, GPtrArray */
 
@@ -186,7 +187,7 @@ _notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
 {
     try {
        if (query_string == "" || query_string == "*") {
-           output = Xapian::Query::MatchAll;
+           output = xapian_query_match_all ();
        } else {
            output =
                notmuch->query_parser->
index 539915d892f9529d3d011dfa7611bb37aba718d2..3a775261fc3a1c2f31b82f345766e231fbf45f81 100644 (file)
@@ -25,6 +25,7 @@
 #include "regexp-fields.h"
 #include "notmuch-private.h"
 #include "database-private.h"
+#include "xapian-extra.h"
 
 notmuch_status_t
 compile_regex (regex_t &regexp, const char *str, std::string &msg)
@@ -200,7 +201,7 @@ RegexpFieldProcessor::operator() (const std::string & str)
     if (str.empty ()) {
        if (options & NOTMUCH_FIELD_PROBABILISTIC) {
            return Xapian::Query (Xapian::Query::OP_AND_NOT,
-                                 Xapian::Query::MatchAll,
+                                 xapian_query_match_all (),
                                  Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
        } else {
            return Xapian::Query (term_prefix);
index 8607732e176b9978bd72622a6e5ddf8a255e6710..5055c93e122b2c82449e5df4cf5d38632d971545 100755 (executable)
@@ -9,7 +9,7 @@ fi
 
 add_email_corpus
 
-TEST_CFLAGS="-fsanitize=address"
+TEST_CFLAGS="${TEST_CFLAGS:-} -fsanitize=address"
 
 test_begin_subtest "open and destroy"
 test_C ${MAIL_DIR} ${NOTMUCH_CONFIG} <<EOF
diff --git a/test/T810-tsan.sh b/test/T810-tsan.sh
new file mode 100755 (executable)
index 0000000..7e877b2
--- /dev/null
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+test_directory=$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)
+
+test_description='run code with TSan enabled against the library'
+# Note it is hard to ensure race conditions are deterministic so this
+# only provides best effort detection.
+
+. "$test_directory"/test-lib.sh || exit 1
+
+if [ $NOTMUCH_HAVE_TSAN -ne 1 ]; then
+    printf "Skipping due to missing TSan support\n"
+    test_done
+fi
+
+export TSAN_OPTIONS="suppressions=$test_directory/T810-tsan.suppressions"
+TEST_CFLAGS="${TEST_CFLAGS:-} -fsanitize=thread"
+
+cp -r ${MAIL_DIR} ${MAIL_DIR}-2
+
+test_begin_subtest "create"
+test_C ${MAIL_DIR} ${MAIL_DIR}-2 <<EOF
+#include <notmuch-test.h>
+#include <pthread.h>
+
+void *thread (void *arg) {
+  char *mail_dir = arg;
+  /*
+   * Calls into notmuch_query_search_messages which was using the thread-unsafe
+   * Xapian::Query::MatchAll.
+   */
+  EXPECT0(notmuch_database_create (mail_dir, NULL));
+  return NULL;
+}
+
+int main (int argc, char **argv) {
+  pthread_t t1, t2;
+  EXPECT0(pthread_create (&t1, NULL, thread, argv[1]));
+  EXPECT0(pthread_create (&t2, NULL, thread, argv[2]));
+  EXPECT0(pthread_join (t1, NULL));
+  EXPECT0(pthread_join (t2, NULL));
+  return 0;
+}
+EOF
+cat <<EOF > EXPECTED
+== stdout ==
+== stderr ==
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+add_email_corpus
+rm -r ${MAIL_DIR}-2
+cp -r ${MAIL_DIR} ${MAIL_DIR}-2
+
+test_begin_subtest "query"
+test_C ${MAIL_DIR} ${MAIL_DIR}-2 <<EOF
+#include <notmuch-test.h>
+#include <pthread.h>
+
+void *thread (void *arg) {
+  char *mail_dir = arg;
+  notmuch_database_t *db;
+  /*
+   * 'from' is NOTMUCH_FIELD_PROBABILISTIC | NOTMUCH_FIELD_PROCESSOR and an
+   * empty string gets us to RegexpFieldProcessor::operator which was using
+   * the tread-unsafe Xapian::Query::MatchAll.
+   */
+  EXPECT0(notmuch_database_open_with_config (mail_dir,
+                                             NOTMUCH_DATABASE_MODE_READ_ONLY,
+                                             NULL, NULL, &db, NULL));
+  notmuch_query_t *query = notmuch_query_create (db, "from:\"\"");
+  notmuch_messages_t *messages;
+  EXPECT0(notmuch_query_search_messages (query, &messages));
+  return NULL;
+}
+
+int main (int argc, char **argv) {
+  pthread_t t1, t2;
+  EXPECT0(pthread_create (&t1, NULL, thread, argv[1]));
+  EXPECT0(pthread_create (&t2, NULL, thread, argv[2]));
+  EXPECT0(pthread_join (t1, NULL));
+  EXPECT0(pthread_join (t2, NULL));
+  return 0;
+}
+EOF
+cat <<EOF > EXPECTED
+== stdout ==
+== stderr ==
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
diff --git a/test/T810-tsan.suppressions b/test/T810-tsan.suppressions
new file mode 100644 (file)
index 0000000..dbd16a9
--- /dev/null
@@ -0,0 +1,5 @@
+# It's unclear how TSan-friendly GLib is:
+# https://gitlab.gnome.org/GNOME/glib/-/issues/1672
+race:g_rw_lock_reader_lock
+# https://gitlab.gnome.org/GNOME/glib/-/issues/1952
+race:g_slice_alloc0
diff --git a/util/xapian-extra.h b/util/xapian-extra.h
new file mode 100644 (file)
index 0000000..39c7f48
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _XAPIAN_EXTRA_H
+#define _XAPIAN_EXTRA_H
+
+#include <string>
+#include <xapian.h>
+
+inline Xapian::Query
+xapian_query_match_all (void)
+{
+    // Xapian::Query::MatchAll isn't thread safe (a static object with reference
+    // counting) so instead reconstruct the equivalent on demand.
+    return Xapian::Query (std::string ());
+}
+
+#endif