From 271dede0c9a8083aa56af153d55388af8521f6c8 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Sat, 24 Aug 2024 08:43:03 -0700 Subject: [PATCH] cli/git-remote: add export command Two (sub)features are stubbed out in this initial implementation: deleting messages (as opposed to tags), and missing messages. There are two corresponding tests marked as broken in T860-git-remote.sh. A third test passes with the stub, which is maybe not ideal, but at least it acts as a regression test. --- git-remote-notmuch.c | 237 +++++++++++++++++++++++++++++ performance-test/M07-git-remote.sh | 4 + performance-test/T08-git-remote.sh | 41 +++++ test/T860-git-remote.sh | 169 ++++++++++++++++++++ 4 files changed, 451 insertions(+) diff --git a/git-remote-notmuch.c b/git-remote-notmuch.c index bfd045a8..addf23c7 100644 --- a/git-remote-notmuch.c +++ b/git-remote-notmuch.c @@ -41,6 +41,21 @@ FILE *log_file = NULL; char *buffer = NULL; size_t buffer_len = 0; +/* message state for tracking e.g. deletions */ +typedef enum { + MSG_STATE_UNKNOWN=0, + MSG_STATE_SEEN, + MSG_STATE_MISSING, + MSG_STATE_DELETED +} _message_state_t; + +static bool +set_message_state (GHashTable *mid_state, const char *mid, _message_state_t state) +{ + return g_hash_table_replace (mid_state, g_strdup (mid), + GINT_TO_POINTER (state)); +} + static inline bool equal_lastmod (const char *uuid1, unsigned long counter1, const char *uuid2, unsigned long counter2) @@ -284,6 +299,226 @@ cmd_import (notmuch_database_t *notmuch, store_lastmod (notmuch, nm_dir); } +static GString * +read_data () +{ + ssize_t nread; + size_t bytes; + size_t data_size; + + g_auto (GStrv) tokens = NULL; + + ASSERT ((nread = getline (&buffer, &buffer_len, stdin) != -1)); + + tokens = tokenize_buffer (); + + str2ul (tokens[1], &data_size); + + buffer = realloc (buffer, data_size + 1); + bytes = fread (buffer, 1, data_size, stdin); + ASSERT (bytes == data_size); + + buffer_len = data_size; + + return g_string_new_len (buffer, buffer_len); +} + +static void +free_string (GString *str) +{ + g_string_free (str, true); +} + +static bool +path_to_mid (notmuch_database_t *notmuch, const char *path, char **mid_p, size_t *mid_len_p) +{ + g_autofree char *basename = NULL; + const char *prefix = notmuch_config_get (notmuch, NOTMUCH_CONFIG_GIT_METADATA_PREFIX); + + if (strncmp (prefix, path, strlen (prefix))) + return false; + + basename = g_path_get_dirname (path + strlen (prefix) + 7); + ASSERT (HEX_SUCCESS == + hex_decode (notmuch, basename, mid_p, mid_len_p)); + return true; +} + +static void +mark_unseen (unused (notmuch_database_t *notmuch), + unused (GHashTable *mid_state)) +{ +} + +static void +purge_database (unused (notmuch_database_t *notmuch), + unused (GHashTable *mid_state)) +{ +} + +static void +check_missing (unused (notmuch_database_t *notmuch), unused (GHashTable *mid_state)) +{ +} + +static void +cmd_export (notmuch_database_t *notmuch, const char *nm_dir) +{ + ssize_t nread; + + int commit_count = 0; + + g_autoptr (GHashTable) blobs = NULL; + g_autoptr (GHashTable) mid_state = NULL; + + /* Do not supply a function to free values, as we use the same + * pointer for key and value */ + ASSERT (mid_state = g_hash_table_new_full ((GHashFunc) g_str_hash, + (GEqualFunc) g_str_equal, + g_free, NULL)); + + ASSERT (blobs = g_hash_table_new_full ((GHashFunc) g_str_hash, + (GEqualFunc) g_str_equal, + g_free, (GDestroyNotify) free_string)); + + while ((nread = getline (&buffer, &buffer_len, stdin)) != -1) { + flog ("export %s\n", buffer); + if (STRNCMP_LITERAL (buffer, "done") == 0) { + break; + } else if (STRNCMP_LITERAL (buffer, "blob") == 0) { + GString *data; + g_auto (GStrv) tokens = NULL; + + + flog ("export blob\n"); + buffer_line (stdin); + + tokens = tokenize_buffer (); + + data = read_data (); + + flog ("\tmark%s\n", tokens[1]); + g_hash_table_insert (blobs, g_strdup (tokens[1]), data); + buffer_line (stdin); + } else if (STRNCMP_LITERAL (buffer, "commit") == 0) { + char *mid = NULL; + size_t mid_len = 0; + bool process_this_commit = true; + g_autoptr (GString) commit_msg = NULL; + const char *commit_ref = buffer + strlen ("commit "); + const char *database_ref = notmuch_config_get (notmuch, NOTMUCH_CONFIG_GIT_REF); + chomp_newline (buffer); + if (strcmp (commit_ref, database_ref)) { + process_this_commit = false; + flog ("ignoring commit to ref %s\n", commit_ref); + } + + if (process_this_commit) { + commit_count++; + flog ("export commit %d\n", commit_count); + } + + /* mark for commit (ignored) */ + buffer_line (stdin); + /* author (ignored) */ + buffer_line (stdin); + /* committer (ignored) */ + buffer_line (stdin); + + /* commit message */ + commit_msg = read_data (); + flog ("commit msg %s\n", commit_msg->str); + while (strlen (buffer) > 0) { + g_autoptr (GString) mark = NULL; + g_autoptr (GString) path = NULL; + const GString *blob; + notmuch_message_t *message; + const char *tok; + size_t tok_len; + size_t max_tok_len; + tag_op_list_t *tag_ops; + g_auto (GStrv) tokens = NULL; + + buffer_line (stdin); + if (strlen (buffer) == 0) + break; + if (! process_this_commit) + break; + + tokens = tokenize_buffer (); + if (STRNCMP_LITERAL (tokens[0], "D") == 0) { + if (path_to_mid (notmuch, tokens[1], &mid, &mid_len)) { + flog ("marking message %s for deletion\n", mid); + set_message_state (mid_state, mid, MSG_STATE_DELETED); + } else { + if (debug_flags && strchr (debug_flags, 'd')) + flog ("ignoring non prefixed file %s\n", tokens[1]); + } + } else if (STRNCMP_LITERAL (tokens[0], "M") == 0) { + + ASSERT (blob = g_hash_table_lookup (blobs, tokens[2])); + + if (! path_to_mid (notmuch, tokens[3], &mid, &mid_len)) { + if (debug_flags) + flog ("ignoring non prefixed file %s\n", tokens[3]); + continue; + } + + if (debug_flags && strchr (debug_flags, 'd')) { + flog ("marking mid seen: %s\n", mid); + } + + ASSERT (NOTMUCH_STATUS_SUCCESS == + notmuch_database_find_message (notmuch, mid, &message)); + if (! message) { + if (debug_flags && strchr (debug_flags, 'm')) { + flog ("marking mid missing: %s\n", mid); + } + set_message_state (mid_state, mid, MSG_STATE_MISSING); + } else { + set_message_state (mid_state, mid, MSG_STATE_SEEN); + ASSERT (NOTMUCH_STATUS_SUCCESS == + notmuch_message_freeze (message)); + + tag_ops = tag_op_list_create (message); + tok = blob->str; + max_tok_len = blob->len; + tok_len = 0; + while ((tok_len < max_tok_len) && + (tok = strsplit_len (tok + tok_len, '\n', &tok_len)) != NULL) { + const char *tag = talloc_strndup (message, tok, tok_len); + ASSERT (0 == tag_op_list_append (tag_ops, tag, false)); + } + + ASSERT (NOTMUCH_STATUS_SUCCESS == + tag_op_list_apply (message, tag_ops, TAG_FLAG_REMOVE_ALL)); + + ASSERT (NOTMUCH_STATUS_SUCCESS == + notmuch_message_thaw (message)); + + notmuch_message_destroy (message); + + } + } else { + flog ("export ignoring line %s\n", buffer); + } + } + puts ("ok refs/heads/master"); + } + } + + mark_unseen (notmuch, mid_state); + + if (commit_count > 0) + purge_database (notmuch, mid_state); + + check_missing (notmuch, mid_state); + + store_lastmod (notmuch, nm_dir); + puts (""); +} + + /* stubs since we cannot link with notmuch.o */ const notmuch_opt_desc_t notmuch_shared_options[] = { { } @@ -412,6 +647,8 @@ main (int argc, char *argv[]) if (STRNCMP_LITERAL (s, "capabilities") == 0) cmd_capabilities (); + else if (STRNCMP_LITERAL (s, "export") == 0) + cmd_export (db, nm_dir); else if (STRNCMP_LITERAL (s, "import") == 0) cmd_import (db, nm_dir, uuid, lastmod); else if (STRNCMP_LITERAL (s, "list") == 0) diff --git a/performance-test/M07-git-remote.sh b/performance-test/M07-git-remote.sh index 57b9ab32..24b43f67 100755 --- a/performance-test/M07-git-remote.sh +++ b/performance-test/M07-git-remote.sh @@ -6,6 +6,7 @@ test_description='git remote helper' mkdir repo export GIT_DIR=`pwd`/repo +MAKE_EXPORT_PY=$NOTMUCH_SRCDIR/test/make-export.py memory_start @@ -13,4 +14,7 @@ echo "import refs/heads/master" > import.in memory_run "import" "git-remote-notmuch origin notmuch:// >import.out export.in +memory_run "export" "git-remote-notmuch origin notmuch:// >export.out > $path + fi + done + + cd $olddir +} + time_start time_run 'clone --bare' "git clone --quiet --bare -b master notmuch::default default.git" time_run 'clone' "git clone --quiet -b master notmuch:// repo" +time_run "push (no changes)" "git -C repo push --quiet origin master" + +add_tags repo 10 +git -C repo add -u +git -C repo commit --quiet -m'add tags to 10% of messages' +time_run "push (10% changed)" "git -C repo push --quiet origin master" + +add_tags repo 4 +git -C repo add -u +git -C repo commit --quiet -m'add tags to 25% of messages' +time_run "push (25% changed)" "git -C repo push --quiet origin master" + +add_tags repo 2 +git -C repo add -u +git -C repo commit --quiet -m'add tags to 50% of messages' +time_run "push (50% changed)" "git -C repo push --quiet origin master" + +hash=$(git -C repo hash-object --stdin -w < /dev/null) +# replace all files with empty files +git -C repo ls-tree -r HEAD | sed "s/blob [^\t]*/blob $hash/" \ + | git -C repo update-index --index-info +git -C repo commit --quiet -m'zero tags' 2>>log.txt 1>&2 + +time_run "push (rem. all tags)" "git -C repo push --quiet origin master" + time_done diff --git a/test/T860-git-remote.sh b/test/T860-git-remote.sh index 87a237a0..3a1c128a 100755 --- a/test/T860-git-remote.sh +++ b/test/T860-git-remote.sh @@ -119,6 +119,16 @@ EOF test_expect_equal_file EXPECTED repo/$TAG_FILE restore_state +backup_state +test_begin_subtest "push empty commit" +git -C repo pull +notmuch dump | sort > EXPECTED +git -C repo pull +git -C repo push +notmuch dump | sort > OUTPUT +test_expect_equal_file EXPECTED OUTPUT +restore_state + backup_state test_begin_subtest "pull sees deletion" notmuch tag -unread -- id:4EFC743A.3060609@april.org @@ -129,4 +139,163 @@ EOF test_expect_equal_file EXPECTED repo/$TAG_FILE restore_state +backup_state +test_begin_subtest 'export runs' +run_helper < OUTPUT +export +blob +mark :1 +data 10 +tag1 +tag2 + +commit refs/heads/master +mark :2 +author Notmuch Test Suite 1234 +0000 +committer Notmuch Test Suite 1234 +0000 +data 8 +ignored +M 100644 :1 $TAG_FILE + +done + +EOF +cat < EXPECTED +ok refs/heads/master + +EOF +test_expect_equal_file EXPECTED OUTPUT + +# this test depends on the previous one +test_begin_subtest 'export modifies database' +notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT +cat < EXPECTED ++tag1 +tag2 -- id:4EFC743A.3060609@april.org +EOF +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest 'restore via export' +notmuch dump > BEFORE +python3 $MAKE_EXPORT_PY > export.in +notmuch tag +transient -- id:4EFC743A.3060609@april.org +run_helper < export.in > OUTPUT +notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT +cat < EXPECTED ++inbox +unread -- id:4EFC743A.3060609@april.org +EOF +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest "push updates database" +cat<repo/$TAG_FILE +tag1 +tag2 +EOF +git -C repo add $TAG_FILE +git -C repo commit -m 'testing push' +git -C repo push origin master +notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT +cat < EXPECTED ++tag1 +tag2 -- id:4EFC743A.3060609@april.org +EOF +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest "adding tag via repo" +cat<repo/$TAG_FILE +tag1 +tag2 +tag3 +EOF +git -C repo add $TAG_FILE +git -C repo commit -m 'testing push' +git -C repo push origin master +notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT +cat < EXPECTED ++tag1 +tag2 +tag3 -- id:4EFC743A.3060609@april.org +EOF +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest "non-prefixed file ignored on push" +cat<repo/dummy +this is outside the notmuch metadata prefix +EOF +git -C repo add dummy +git -C repo commit -m 'testing prefix' +test_expect_code 0 "git -C repo push origin master" +restore_state + +backup_state +test_begin_subtest "non-prefixed file ignored on pull" +cat<repo/dummy +this is outside the notmuch metadata prefix +EOF +cp repo/dummy EXPECTED +git -C repo add dummy +git -C repo commit -m 'testing prefix' +git -C repo push origin master +git -C repo pull origin master +test_expect_equal_file EXPECTED repo/dummy +restore_state + +backup_state +test_begin_subtest "push of non-main ref ignored" +notmuch dump > EXPECTED +git -C repo switch -c chaos +git -C repo rm -r _notmuch_metadata +git -C repo commit -m "delete all the things" +git -C repo push origin chaos:chaos +notmuch dump > OUTPUT +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest "removing all tags via repo" +cat<repo/$TAG_FILE +EOF +git -C repo add $TAG_FILE +git -C repo commit -m 'testing push' +git -C repo push origin master +notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT +cat < EXPECTED + -- id:4EFC743A.3060609@april.org +EOF +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest "removing message via repo" +test_subtest_known_broken +parent=$(dirname $TAG_FILE) +# future proof this for when e.g. properties are stored +git -C repo rm -r $parent +git -C repo commit -m 'testing deletion' +git -C repo push origin master +notmuch dump id:4EFC743A.3060609@april.org | tail -n 1 > OUTPUT +cat < EXPECTED +#notmuch-dump batch-tag:3 config,properties,tags +EOF +test_expect_equal_file EXPECTED OUTPUT +restore_state + +backup_state +test_begin_subtest 'by default, missing messages are an error during export' +test_subtest_known_broken +sed s/4EFC743A.3060609@april.org/missing-message@example.com/ < export.in > missing.in +test_expect_code 1 "run_helper < missing.in" +restore_state + +backup_state +test_begin_subtest 'when configured, missing messages are ignored' +notmuch config set git.fail_on_missing false +test_expect_code 0 "run_helper < missing.in" +notmuch config set git.fail_on_missing true +restore_state + test_done -- 2.45.2