X-Git-Url: https://git.cworth.org/git?p=notmuch;a=blobdiff_plain;f=notmuch-git.py;h=f188660c816ef264255308c8bf0c02d8e79dede2;hp=b4253c0dfc4dc739d0bb5b2cd2d06396f9f92e71;hb=HEAD;hpb=b07e121923a4ca00d0ec68ba9eebe8dafb70e13a diff --git a/notmuch-git.py b/notmuch-git.py index b4253c0d..97073c80 100644 --- a/notmuch-git.py +++ b/notmuch-git.py @@ -31,7 +31,6 @@ import locale as _locale import logging as _logging import os as _os import re as _re -import shutil as _shutil import subprocess as _subprocess import sys as _sys import tempfile as _tempfile @@ -46,10 +45,12 @@ _LOG.addHandler(_logging.StreamHandler()) NOTMUCH_GIT_DIR = None TAG_PREFIX = None +FORMAT_VERSION = 1 _HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}') _TAG_DIRECTORY = 'tags/' -_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P[^/]*)/(?P[^/]*)') +_TAG_FILE_REGEX = ( _re.compile(_TAG_DIRECTORY + '(?P[^/]*)/(?P[^/]*)'), + _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){2}(?P[^/]*)/(?P[^/]*)')) # magic hash for Git (git hash-object -t blob /dev/null) _EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' @@ -246,13 +247,13 @@ def count_messages(prefix=None): stdout=_subprocess.PIPE, wait=True) if status != 0: _LOG.error("failed to run notmuch config") - sys.exit(1) + _sys.exit(1) return int(stdout.rstrip()) def get_tags(prefix=None): "Get a list of tags with a given prefix." (status, stdout, stderr) = _spawn( - args=['notmuch', 'search', '--query=sexp', '--output=tags', _tag_query(prefix)], + args=['notmuch', 'search', '--exclude=false', '--query=sexp', '--output=tags', _tag_query(prefix)], stdout=_subprocess.PIPE, wait=True) return [tag for tag in stdout.splitlines()] @@ -265,7 +266,7 @@ def archive(treeish='HEAD', args=()): Each tag $tag for message with Message-Id $id is written to an empty file - tags/encode($id)/encode($tag) + tags/hash1(id)/hash2(id)/encode($id)/encode($tag) The encoding preserves alphanumerics, and the characters "+-_@=.:," (not the quotes). All other octets are replaced with @@ -375,7 +376,7 @@ def check_safe_fraction(status): total = count_messages (TAG_PREFIX) if total == 0: - _LOG.error('No existing tags with given prefix, stopping.'.format(safe)) + _LOG.error('No existing tags with given prefix, stopping.') _LOG.error('Use --force to override.') exit(1) change = len(status['added'])+len(status['deleted']) @@ -450,7 +451,7 @@ def fetch(remote=None): _git(args=args, wait=True) -def init(remote=None): +def init(remote=None,format_version=None): """ Create an empty notmuch-git repository. @@ -464,14 +465,34 @@ def init(remote=None): except FileExistsError: pass + if not format_version: + format_version = 1 + + format_version=int(format_version) + + if format_version > 1 or format_version < 0: + _LOG.error("Illegal format version {:d}".format(format_version)) + _sys.exit(1) + _spawn(args=['git', '--git-dir', NOTMUCH_GIT_DIR, 'init', '--initial-branch=master', '--quiet', '--bare'], wait=True) _git(args=['config', 'core.logallrefupdates', 'true'], wait=True) # create an empty blob (e69de29bb2d1d6434b8b29ae775ad8c2e48c5391) _git(args=['hash-object', '-w', '--stdin'], input='', wait=True) + allow_empty=('--allow-empty',) + if format_version >= 1: + allow_empty=() + # create a blob for the FORMAT file + (status, stdout, _) = _git(args=['hash-object', '-w', '--stdin'], stdout=_subprocess.PIPE, + input='{:d}\n'.format(format_version), wait=True) + verhash=stdout.rstrip() + _LOG.debug('hash of FORMAT blob = {:s}'.format(verhash)) + # Add FORMAT to the index + _git(args=['update-index', '--add', '--cacheinfo', '100644,{:s},FORMAT'.format(verhash)], wait=True) + _git( args=[ - 'commit', '--allow-empty', '-m', 'Start a new nmbug repository' + 'commit', *allow_empty, '-m', 'Start a new notmuch-git repository' ], additional_env={'GIT_WORK_TREE': NOTMUCH_GIT_DIR}, wait=True) @@ -676,6 +697,32 @@ def _is_unmerged(ref='@{upstream}'): stdout=_subprocess.PIPE, wait=True) return base != fetch_head +class DatabaseCache: + def __init__(self): + try: + from notmuch2 import Database + self._notmuch = Database() + except ImportError: + self._notmuch = None + self._known = {} + + def known(self,id): + if id in self._known: + return self._known[id]; + + if self._notmuch: + try: + _ = self._notmuch.find(id) + self._known[id] = True + except LookupError: + self._known[id] = False + else: + (_, stdout, stderr) = _spawn( + args=['notmuch', 'search', '--exclude=false', '--output=files', 'id:{0}'.format(id)], + stdout=_subprocess.PIPE, + wait=True) + self._known[id] = stdout != None + return self._known[id] @timed def get_status(): @@ -683,14 +730,11 @@ def get_status(): 'deleted': {}, 'missing': {}, } + db = DatabaseCache() with PrivateIndex(repo=NOTMUCH_GIT_DIR, prefix=TAG_PREFIX) as index: maybe_deleted = index.diff(filter='D') for id, tags in maybe_deleted.items(): - (_, stdout, stderr) = _spawn( - args=['notmuch', 'search', '--output=files', 'id:{0}'.format(id)], - stdout=_subprocess.PIPE, - wait=True) - if stdout: + if db.known(id): status['deleted'][id] = tags else: status['missing'][id] = tags @@ -716,6 +760,7 @@ class PrivateIndex: self.lastmod = None self.checksum = None self._load_cache_file() + self.file_tree = None self._index_tags() def __enter__(self): @@ -741,6 +786,43 @@ class PrivateIndex: _LOG.error("Error decoding cache") _sys.exit(1) + @timed + def _read_file_tree(self): + self.file_tree = {} + + with _git( + args=['ls-files', 'tags'], + additional_env={'GIT_INDEX_FILE': self.index_path}, + stdout=_subprocess.PIPE) as git: + for file in git.stdout: + dir=_os.path.dirname(file) + tag=_os.path.basename(file).rstrip() + if dir not in self.file_tree: + self.file_tree[dir]=[tag] + else: + self.file_tree[dir].append(tag) + + + def _clear_tags_for_message(self, id): + """ + Clear any existing index entries for message 'id' + + Neither 'id' nor the tags in 'tags' should be encoded/escaped. + """ + + if self.file_tree == None: + self._read_file_tree() + + dir = _id_path(id) + + if dir not in self.file_tree: + return + + for file in self.file_tree[dir]: + line = '0 0000000000000000000000000000000000000000\t{:s}/{:s}\n'.format(dir,file) + yield line + + @timed def _index_tags(self): "Write notmuch tags to private git index." @@ -776,7 +858,7 @@ class PrivateIndex: if tag.startswith(prefix)] id = _xapian_unquote(string=id) if clear_tags: - for line in _clear_tags_for_message(index=self.index_path, id=id): + for line in self._clear_tags_for_message(id=id): git.stdin.write(line) for line in _index_tags_for_message( id=id, status='A', tags=tags): @@ -813,24 +895,6 @@ def _read_index_checksum (index_path): except FileNotFoundError: return None - -def _clear_tags_for_message(index, id): - """ - Clear any existing index entries for message 'id' - - Neither 'id' nor the tags in 'tags' should be encoded/escaped. - """ - - dir = 'tags/{id}'.format(id=_hex_quote(string=id)) - - with _git( - args=['ls-files', dir], - additional_env={'GIT_INDEX_FILE': index}, - stdout=_subprocess.PIPE) as git: - for file in git.stdout: - line = '0 0000000000000000000000000000000000000000\t{:s}\n'.format(file.strip()) - yield line - def _read_database_lastmod(): with _spawn( args=['notmuch', 'count', '--lastmod', '*'], @@ -838,6 +902,21 @@ def _read_database_lastmod(): (count,uuid,lastmod_str) = notmuch.stdout.readline().split() return (count,uuid,int(lastmod_str)) +def _id_path(id): + hid=_hex_quote(string=id) + from hashlib import blake2b + + if FORMAT_VERSION==0: + return 'tags/{hid}'.format(hid=hid) + elif FORMAT_VERSION==1: + idhash = blake2b(hid.encode('utf8'), digest_size=2).hexdigest() + return 'tags/{dir1}/{dir2}/{hid}'.format( + hid=hid, + dir1=idhash[0:2],dir2=idhash[2:]) + else: + _LOG.error("Unknown format version",FORMAT_VERSION) + _sys.exit(1) + def _index_tags_for_message(id, status, tags): """ Update the Git index to either create or delete an empty file. @@ -852,8 +931,7 @@ def _index_tags_for_message(id, status, tags): hash = '0000000000000000000000000000000000000000' for tag in tags: - path = 'tags/{id}/{tag}'.format( - id=_hex_quote(string=id), tag=_hex_quote(string=tag)) + path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag)) yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path) @@ -869,7 +947,7 @@ def _diff_refs(filter, a='HEAD', b='@{upstream}'): def _unpack_diff_lines(stream): "Iterate through (id, tag) tuples in a diff stream." for line in stream: - match = _TAG_FILE_REGEX.match(line.strip()) + match = _TAG_FILE_REGEX[FORMAT_VERSION].match(line.strip()) if not match: message = 'non-tag line in diff: {!r}'.format(line.strip()) if line.startswith(_TAG_DIRECTORY): @@ -907,6 +985,17 @@ def _notmuch_config_get(key): _sys.exit(1) return stdout.rstrip() +def read_format_version(): + try: + (status, stdout, stderr) = _git( + args=['cat-file', 'blob', 'master:FORMAT'], + stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, wait=True) + except SubprocessError as e: + _LOG.debug("failed to read FORMAT file from git, assuming format version 0") + return 0 + + return int(stdout) + # based on BaseDirectory.save_data_path from pyxdg (LGPL2+) def xdg_data_path(profile): resource = _os.path.join('notmuch',profile,'git') @@ -1008,6 +1097,11 @@ if __name__ == '__main__': subparser.add_argument( 'command', metavar='COMMAND', nargs='?', help='The command to show help for.') + elif command == 'init': + subparser.add_argument( + '--format-version', metavar='VERSION', + default = None, + help='create format VERSION repository.') elif command == 'log': subparser.add_argument( 'args', metavar='ARG', nargs='*', @@ -1104,6 +1198,11 @@ if __name__ == '__main__': _LOG.debug('prefix = {:s}'.format(TAG_PREFIX)) _LOG.debug('repository = {:s}'.format(NOTMUCH_GIT_DIR)) + if args.func != init: + FORMAT_VERSION = read_format_version() + + _LOG.debug('FORMAT_VERSION={:d}'.format(FORMAT_VERSION)) + if args.func == help: arg_names = ['command'] else: