===================
The tags are stored in the git repo (and exported) as a set of empty
-files. For a message with Message-Id *id*, for each tag *tag*, there
+files. These empty files are contained within a directory named after
+the message-id.
+
+In what follows `encode()` represents a POSIX filesystem safe
+encoding. The encoding preserves alphanumerics, and the characters
+`+-_@=.,:`. All other octets are replaced with `%` followed by a two
+digit hex number.
+
+Currently :any:`notmuch-git` can read any format version, but can only
+create (via :any:`init`) :ref:`version 1 <format_version_1>` repositories.
+
+.. _format_version_0:
+
+Version 0
+---------
+
+This is the legacy format created by the `nmbug` tool prior to release
+0.37. For a message with Message-Id *id*, for each tag *tag*, there
is an empty file with path
tags/ `encode` (*id*) / `encode` (*tag*)
-The encoding preserves alphanumerics, and the characters `+-_@=.,:`.
-All other octets are replaced with `%` followed by a two digit hex
-number.
+.. _format_version_1:
+
+Version 1
+---------
+
+In format version 1 and later, the format version is contained in a
+top level file called FORMAT.
+
+For a message with Message-Id *id*, for each tag *tag*, there
+is an empty file with path
+
+ tags/ `hash1` (*id*) / `hash2` (*id*) `encode` (*id*) / `encode` (*tag*)
+
+The hash functions each represent one byte of the `blake2b` hex
+digest.
+
+Compared to :ref:`version 0 <format_version_0>`, this reduces the
+number of subdirectories within each directory.
.. _repo_location:
NOTMUCH_GIT_DIR = None
TAG_PREFIX = None
+FORMAT_VERSION = 0
_HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}')
_TAG_DIRECTORY = 'tags/'
-_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P<id>[^/]*)/(?P<tag>[^/]*)')
+_TAG_FILE_REGEX = ( _re.compile(_TAG_DIRECTORY + '(?P<id>[^/]*)/(?P<tag>[^/]*)'),
+ _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){2}(?P<id>[^/]*)/(?P<tag>[^/]*)'))
# magic hash for Git (git hash-object -t blob /dev/null)
_EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
Each tag $tag for message with Message-Id $id is written to
an empty file
- tags/encode($id)/encode($tag)
+ tags/hash1(id)/hash2(id)/encode($id)/encode($tag)
The encoding preserves alphanumerics, and the characters
"+-_@=.:," (not the quotes). All other octets are replaced with
_git(args=['config', 'core.logallrefupdates', 'true'], wait=True)
# create an empty blob (e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
_git(args=['hash-object', '-w', '--stdin'], input='', wait=True)
+ # create a blob for the FORMAT file
+ (status, stdout, _) = _git(args=['hash-object', '-w', '--stdin'], stdout=_subprocess.PIPE,
+ input='1\n', wait=True)
+ verhash=stdout.rstrip()
+ _LOG.debug('hash of FORMAT blob = {:s}'.format(verhash))
+ # Add FORMAT to the index
+ _git(args=['update-index', '--add', '--cacheinfo', '100644,{:s},FORMAT'.format(verhash)], wait=True)
+
_git(
args=[
- 'commit', '--allow-empty', '-m', 'Start a new nmbug repository'
+ 'commit', '-m', 'Start a new notmuch-git repository'
],
additional_env={'GIT_WORK_TREE': NOTMUCH_GIT_DIR},
wait=True)
Neither 'id' nor the tags in 'tags' should be encoded/escaped.
"""
- dir = 'tags/{id}'.format(id=_hex_quote(string=id))
+ dir = _id_path(id)
with _git(
args=['ls-files', dir],
(count,uuid,lastmod_str) = notmuch.stdout.readline().split()
return (count,uuid,int(lastmod_str))
+def _id_path(id):
+ hid=_hex_quote(string=id)
+ from hashlib import blake2b
+
+ if FORMAT_VERSION==0:
+ return 'tags/{hid}'.format(hid=hid)
+ elif FORMAT_VERSION==1:
+ idhash = blake2b(hid.encode('utf8'), digest_size=2).hexdigest()
+ return 'tags/{dir1}/{dir2}/{hid}'.format(
+ hid=hid,
+ dir1=idhash[0:2],dir2=idhash[2:])
+ else:
+ _LOG.error("Unknown format version",FORMAT_VERSION)
+ _sys.exit(1)
+
def _index_tags_for_message(id, status, tags):
"""
Update the Git index to either create or delete an empty file.
hash = '0000000000000000000000000000000000000000'
for tag in tags:
- path = 'tags/{id}/{tag}'.format(
- id=_hex_quote(string=id), tag=_hex_quote(string=tag))
+ path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag))
yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path)
def _unpack_diff_lines(stream):
"Iterate through (id, tag) tuples in a diff stream."
for line in stream:
- match = _TAG_FILE_REGEX.match(line.strip())
+ match = _TAG_FILE_REGEX[FORMAT_VERSION].match(line.strip())
if not match:
message = 'non-tag line in diff: {!r}'.format(line.strip())
if line.startswith(_TAG_DIRECTORY):
_sys.exit(1)
return stdout.rstrip()
+def read_format_version():
+ try:
+ (status, stdout, stderr) = _git(
+ args=['cat-file', 'blob', 'master:FORMAT'],
+ stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, wait=True)
+ except SubprocessError as e:
+ _LOG.debug("failed to read FORMAT file from git, assuming format version 0")
+ return 0
+
+ return int(stdout)
+
# based on BaseDirectory.save_data_path from pyxdg (LGPL2+)
def xdg_data_path(profile):
resource = _os.path.join('notmuch',profile,'git')
_LOG.debug('prefix = {:s}'.format(TAG_PREFIX))
_LOG.debug('repository = {:s}'.format(NOTMUCH_GIT_DIR))
+ FORMAT_VERSION = read_format_version()
+ _LOG.debug('FORMAT_VERSION={:d}'.format(FORMAT_VERSION))
+
if args.func == help:
arg_names = ['command']
else:
test_begin_subtest "committing new prefix works with force"
notmuch tag +new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -l debug -p 'new-prefix::' -C force-prefix.git commit --force
-git -C force-prefix.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT
+git -C force-prefix.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | xargs dirname | sort -u > OUTPUT
notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu
cat <<EOF>EXPECTED
-id:20091117190054.GU3165@dottiness.seas.harvard.edu
+20091117190054.GU3165@dottiness.seas.harvard.edu
EOF
test_expect_equal_file_nonempty EXPECTED OUTPUT
test_begin_subtest "commit"
notmuch git -C tags.git commit --force
-git -C tags.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT
-notmuch search --output=messages '*' | sort > EXPECTED
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | xargs dirname | sort -u > OUTPUT
+notmuch search --output=messages '*' | sed s/^id:// | sort > EXPECTED
test_expect_equal_file_nonempty EXPECTED OUTPUT
test_begin_subtest "commit --force succeeds"
test_begin_subtest "commit (incremental)"
notmuch tag +test id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git commit
-git -C tags.git ls-tree -r --name-only HEAD |
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \
grep 20091117190054 | sort > OUTPUT
echo "--------------------------------------------------" >> OUTPUT
notmuch tag -test id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git commit
-git -C tags.git ls-tree -r --name-only HEAD |
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \
grep 20091117190054 | sort >> OUTPUT
cat <<EOF > EXPECTED
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/test
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/test
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
--------------------------------------------------
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
EOF
test_expect_equal_file_nonempty EXPECTED OUTPUT
notmuch tag +test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git -p 'test::' commit --force
git -C tags.git ls-tree -r --name-only HEAD |
- grep 20091117190054 | sort > OUTPUT
+ grep 20091117190054 | notmuch_git_sanitize | sort > OUTPUT
echo "--------------------------------------------------" >> OUTPUT
notmuch tag -test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git commit --force
-git -C tags.git ls-tree -r --name-only HEAD |
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \
grep 20091117190054 | sort >> OUTPUT
cat <<EOF > EXPECTED
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/one
+20091117190054.GU3165@dottiness.seas.harvard.edu/one
--------------------------------------------------
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
EOF
test_expect_equal_file_nonempty EXPECTED OUTPUT
test_begin_subtest "archive"
notmuch git -C tags.git archive | tar tf - | \
- grep 20091117190054.GU3165@dottiness.seas.harvard.edu | sort > OUTPUT
+ grep 20091117190054.GU3165@dottiness.seas.harvard.edu | notmuch_git_sanitize | sort > OUTPUT
cat <<EOF > EXPECTED
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
EOF
notmuch git -C tags.git checkout
test_expect_equal_file EXPECTED OUTPUT
-e 's/^Date: Fri, 05 Jan 2001 .*0000/Date: GENERATED_DATE/'
}
+# remove redundant parts of notmuch-git internal paths
+notmuch_git_sanitize () {
+ sed -e 's,tags/\([0-9a-f]\{2\}/\)\{2\},,' -e '/FORMAT/d'
+}
notmuch_uuid_sanitize () {
sed 's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g'
}