X-Git-Url: https://git.cworth.org/git?p=notmuch;a=blobdiff_plain;f=notmuch-git.py;h=f188660c816ef264255308c8bf0c02d8e79dede2;hp=35785336ef96ad4bbcc5f5146b1e0adcb1ce2908;hb=HEAD;hpb=99e85823c8de570c0f91bca44efa2c47cddb3cbf diff --git a/notmuch-git.py b/notmuch-git.py index 35785336..97073c80 100644 --- a/notmuch-git.py +++ b/notmuch-git.py @@ -31,7 +31,6 @@ import locale as _locale import logging as _logging import os as _os import re as _re -import shutil as _shutil import subprocess as _subprocess import sys as _sys import tempfile as _tempfile @@ -40,16 +39,18 @@ from urllib.parse import quote as _quote from urllib.parse import unquote as _unquote import json as _json -_LOG = _logging.getLogger('nmbug') +_LOG = _logging.getLogger('notmuch-git') _LOG.setLevel(_logging.WARNING) _LOG.addHandler(_logging.StreamHandler()) NOTMUCH_GIT_DIR = None TAG_PREFIX = None +FORMAT_VERSION = 1 _HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}') _TAG_DIRECTORY = 'tags/' -_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P[^/]*)/(?P[^/]*)') +_TAG_FILE_REGEX = ( _re.compile(_TAG_DIRECTORY + '(?P[^/]*)/(?P[^/]*)'), + _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){2}(?P[^/]*)/(?P[^/]*)')) # magic hash for Git (git hash-object -t blob /dev/null) _EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' @@ -239,23 +240,33 @@ def _tag_query(prefix=None): prefix = TAG_PREFIX return '(tag (starts-with "{:s}"))'.format(prefix.replace('"','\\\"')) +def count_messages(prefix=None): + "count messages with a given prefix." + (status, stdout, stderr) = _spawn( + args=['notmuch', 'count', '--query=sexp', _tag_query(prefix)], + stdout=_subprocess.PIPE, wait=True) + if status != 0: + _LOG.error("failed to run notmuch config") + _sys.exit(1) + return int(stdout.rstrip()) + def get_tags(prefix=None): "Get a list of tags with a given prefix." (status, stdout, stderr) = _spawn( - args=['notmuch', 'search', '--query=sexp', '--output=tags', _tag_query(prefix)], + args=['notmuch', 'search', '--exclude=false', '--query=sexp', '--output=tags', _tag_query(prefix)], stdout=_subprocess.PIPE, wait=True) return [tag for tag in stdout.splitlines()] def archive(treeish='HEAD', args=()): """ - Dump a tar archive of the current nmbug tag set. + Dump a tar archive of the current notmuch-git tag set. Using 'git archive'. Each tag $tag for message with Message-Id $id is written to an empty file - tags/encode($id)/encode($tag) + tags/hash1(id)/hash2(id)/encode($id)/encode($tag) The encoding preserves alphanumerics, and the characters "+-_@=.:," (not the quotes). All other octets are replaced with @@ -266,13 +277,13 @@ def archive(treeish='HEAD', args=()): def clone(repository): """ - Create a local nmbug repository from a remote source. + Create a local notmuch-git repository from a remote source. This wraps 'git clone', adding some options to avoid creating a working tree while preserving remote-tracking branches and upstreams. """ - with _tempfile.TemporaryDirectory(prefix='nmbug-clone.') as workdir: + with _tempfile.TemporaryDirectory(prefix='notmuch-git-clone.') as workdir: _spawn( args=[ 'git', 'clone', '--no-checkout', '--separate-git-dir', NOTMUCH_GIT_DIR, @@ -357,7 +368,26 @@ class CachedIndex: _git(args=['read-tree', self.current_treeish], wait=True) -def commit(treeish='HEAD', message=None): +def check_safe_fraction(status): + safe = 0.1 + conf = _notmuch_config_get ('git.safe_fraction') + if conf and conf != '': + safe=float(conf) + + total = count_messages (TAG_PREFIX) + if total == 0: + _LOG.error('No existing tags with given prefix, stopping.') + _LOG.error('Use --force to override.') + exit(1) + change = len(status['added'])+len(status['deleted']) + fraction = change/total + _LOG.debug('total messages {:d}, change: {:d}, fraction: {:f}'.format(total,change,fraction)) + if fraction > safe: + _LOG.error('safe fraction {:f} exceeded, stopping.'.format(safe)) + _LOG.error('Use --force to override or reconfigure git.safe_fraction.') + exit(1) + +def commit(treeish='HEAD', message=None, force=False): """ Commit prefix-matching tags from the notmuch database to Git. """ @@ -368,6 +398,9 @@ def commit(treeish='HEAD', message=None): _LOG.warning('Nothing to commit') return + if not force: + check_safe_fraction (status) + with CachedIndex(NOTMUCH_GIT_DIR, treeish) as index: try: _update_index(status=status) @@ -418,9 +451,9 @@ def fetch(remote=None): _git(args=args, wait=True) -def init(remote=None): +def init(remote=None,format_version=None): """ - Create an empty nmbug repository. + Create an empty notmuch-git repository. This wraps 'git init' with a few extra steps to support subsequent status and commit commands. @@ -432,20 +465,40 @@ def init(remote=None): except FileExistsError: pass + if not format_version: + format_version = 1 + + format_version=int(format_version) + + if format_version > 1 or format_version < 0: + _LOG.error("Illegal format version {:d}".format(format_version)) + _sys.exit(1) + _spawn(args=['git', '--git-dir', NOTMUCH_GIT_DIR, 'init', '--initial-branch=master', '--quiet', '--bare'], wait=True) _git(args=['config', 'core.logallrefupdates', 'true'], wait=True) # create an empty blob (e69de29bb2d1d6434b8b29ae775ad8c2e48c5391) _git(args=['hash-object', '-w', '--stdin'], input='', wait=True) + allow_empty=('--allow-empty',) + if format_version >= 1: + allow_empty=() + # create a blob for the FORMAT file + (status, stdout, _) = _git(args=['hash-object', '-w', '--stdin'], stdout=_subprocess.PIPE, + input='{:d}\n'.format(format_version), wait=True) + verhash=stdout.rstrip() + _LOG.debug('hash of FORMAT blob = {:s}'.format(verhash)) + # Add FORMAT to the index + _git(args=['update-index', '--add', '--cacheinfo', '100644,{:s},FORMAT'.format(verhash)], wait=True) + _git( args=[ - 'commit', '--allow-empty', '-m', 'Start a new nmbug repository' + 'commit', *allow_empty, '-m', 'Start a new notmuch-git repository' ], additional_env={'GIT_WORK_TREE': NOTMUCH_GIT_DIR}, wait=True) -def checkout(): +def checkout(force=None): """ Update the notmuch database from Git. @@ -453,6 +506,10 @@ def checkout(): to Git. """ status = get_status() + + if not force: + check_safe_fraction(status) + with _spawn( args=['notmuch', 'tag', '--batch'], stdin=_subprocess.PIPE) as p: for id, tags in status['added'].items(): @@ -484,9 +541,9 @@ def _insist_committed(): _LOG.error('\n'.join([ 'Uncommitted changes to {prefix}* tags in notmuch', '', - "For a summary of changes, run 'nmbug status'", - "To save your changes, run 'nmbug commit' before merging/pull", - "To discard your changes, run 'nmbug checkout'", + "For a summary of changes, run 'notmuch-git status'", + "To save your changes, run 'notmuch-git commit' before merging/pull", + "To discard your changes, run 'notmuch-git checkout'", ]).format(prefix=TAG_PREFIX)) _sys.exit(1) @@ -508,7 +565,7 @@ def pull(repository=None, refspecs=None): args.append(repository) if refspecs: args.extend(refspecs) - with _tempfile.TemporaryDirectory(prefix='nmbug-pull.') as workdir: + with _tempfile.TemporaryDirectory(prefix='notmuch-git-pull.') as workdir: for command in [ ['reset', '--hard'], args]: @@ -526,7 +583,7 @@ def merge(reference='@{upstream}'): The default reference is '@{upstream}'. """ _insist_committed() - with _tempfile.TemporaryDirectory(prefix='nmbug-merge.') as workdir: + with _tempfile.TemporaryDirectory(prefix='notmuch-git-merge.') as workdir: for command in [ ['reset', '--hard'], ['merge', reference]]: @@ -541,8 +598,8 @@ def log(args=()): """ A simple wrapper for 'git log'. - After running 'nmbug fetch', you can inspect the changes with - 'nmbug log HEAD..@{upstream}'. + After running 'notmuch-git fetch', you can inspect the changes with + 'notmuch-git log HEAD..@{upstream}'. """ # we don't want output trapping here, because we want the pager. args = ['log', '--name-status', '--no-renames'] + list(args) @@ -551,7 +608,7 @@ def log(args=()): def push(repository=None, refspecs=None): - "Push the local nmbug Git state to a remote repository." + "Push the local notmuch-git Git state to a remote repository." if refspecs and not repository: repository = _get_remote() args = ['push'] @@ -574,13 +631,13 @@ def status(): * A - Tag is present in notmuch database, but not committed to nmbug - (equivalently, tag has been deleted in nmbug repo, e.g. by a + Tag is present in notmuch database, but not committed to notmuch-git + (equivalently, tag has been deleted in notmuch-git repo, e.g. by a pull, but not restored to notmuch database). * D - Tag is present in nmbug repo, but not restored to notmuch + Tag is present in notmuch-git repo, but not restored to notmuch database (equivalently, tag has been deleted in notmuch). * U @@ -588,7 +645,7 @@ def status(): Message is unknown (missing from local notmuch database). The second character (if present) represents a difference between - local and upstream branches. Typically 'nmbug fetch' needs to be + local and upstream branches. Typically 'notmuch-git fetch' needs to be run to update this. * a @@ -640,6 +697,32 @@ def _is_unmerged(ref='@{upstream}'): stdout=_subprocess.PIPE, wait=True) return base != fetch_head +class DatabaseCache: + def __init__(self): + try: + from notmuch2 import Database + self._notmuch = Database() + except ImportError: + self._notmuch = None + self._known = {} + + def known(self,id): + if id in self._known: + return self._known[id]; + + if self._notmuch: + try: + _ = self._notmuch.find(id) + self._known[id] = True + except LookupError: + self._known[id] = False + else: + (_, stdout, stderr) = _spawn( + args=['notmuch', 'search', '--exclude=false', '--output=files', 'id:{0}'.format(id)], + stdout=_subprocess.PIPE, + wait=True) + self._known[id] = stdout != None + return self._known[id] @timed def get_status(): @@ -647,14 +730,11 @@ def get_status(): 'deleted': {}, 'missing': {}, } + db = DatabaseCache() with PrivateIndex(repo=NOTMUCH_GIT_DIR, prefix=TAG_PREFIX) as index: maybe_deleted = index.diff(filter='D') for id, tags in maybe_deleted.items(): - (_, stdout, stderr) = _spawn( - args=['notmuch', 'search', '--output=files', 'id:{0}'.format(id)], - stdout=_subprocess.PIPE, - wait=True) - if stdout: + if db.known(id): status['deleted'][id] = tags else: status['missing'][id] = tags @@ -680,6 +760,7 @@ class PrivateIndex: self.lastmod = None self.checksum = None self._load_cache_file() + self.file_tree = None self._index_tags() def __enter__(self): @@ -705,6 +786,43 @@ class PrivateIndex: _LOG.error("Error decoding cache") _sys.exit(1) + @timed + def _read_file_tree(self): + self.file_tree = {} + + with _git( + args=['ls-files', 'tags'], + additional_env={'GIT_INDEX_FILE': self.index_path}, + stdout=_subprocess.PIPE) as git: + for file in git.stdout: + dir=_os.path.dirname(file) + tag=_os.path.basename(file).rstrip() + if dir not in self.file_tree: + self.file_tree[dir]=[tag] + else: + self.file_tree[dir].append(tag) + + + def _clear_tags_for_message(self, id): + """ + Clear any existing index entries for message 'id' + + Neither 'id' nor the tags in 'tags' should be encoded/escaped. + """ + + if self.file_tree == None: + self._read_file_tree() + + dir = _id_path(id) + + if dir not in self.file_tree: + return + + for file in self.file_tree[dir]: + line = '0 0000000000000000000000000000000000000000\t{:s}/{:s}\n'.format(dir,file) + yield line + + @timed def _index_tags(self): "Write notmuch tags to private git index." @@ -740,7 +858,7 @@ class PrivateIndex: if tag.startswith(prefix)] id = _xapian_unquote(string=id) if clear_tags: - for line in _clear_tags_for_message(index=self.index_path, id=id): + for line in self._clear_tags_for_message(id=id): git.stdin.write(line) for line in _index_tags_for_message( id=id, status='A', tags=tags): @@ -777,24 +895,6 @@ def _read_index_checksum (index_path): except FileNotFoundError: return None - -def _clear_tags_for_message(index, id): - """ - Clear any existing index entries for message 'id' - - Neither 'id' nor the tags in 'tags' should be encoded/escaped. - """ - - dir = 'tags/{id}'.format(id=_hex_quote(string=id)) - - with _git( - args=['ls-files', dir], - additional_env={'GIT_INDEX_FILE': index}, - stdout=_subprocess.PIPE) as git: - for file in git.stdout: - line = '0 0000000000000000000000000000000000000000\t{:s}\n'.format(file.strip()) - yield line - def _read_database_lastmod(): with _spawn( args=['notmuch', 'count', '--lastmod', '*'], @@ -802,6 +902,21 @@ def _read_database_lastmod(): (count,uuid,lastmod_str) = notmuch.stdout.readline().split() return (count,uuid,int(lastmod_str)) +def _id_path(id): + hid=_hex_quote(string=id) + from hashlib import blake2b + + if FORMAT_VERSION==0: + return 'tags/{hid}'.format(hid=hid) + elif FORMAT_VERSION==1: + idhash = blake2b(hid.encode('utf8'), digest_size=2).hexdigest() + return 'tags/{dir1}/{dir2}/{hid}'.format( + hid=hid, + dir1=idhash[0:2],dir2=idhash[2:]) + else: + _LOG.error("Unknown format version",FORMAT_VERSION) + _sys.exit(1) + def _index_tags_for_message(id, status, tags): """ Update the Git index to either create or delete an empty file. @@ -816,8 +931,7 @@ def _index_tags_for_message(id, status, tags): hash = '0000000000000000000000000000000000000000' for tag in tags: - path = 'tags/{id}/{tag}'.format( - id=_hex_quote(string=id), tag=_hex_quote(string=tag)) + path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag)) yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path) @@ -833,7 +947,7 @@ def _diff_refs(filter, a='HEAD', b='@{upstream}'): def _unpack_diff_lines(stream): "Iterate through (id, tag) tuples in a diff stream." for line in stream: - match = _TAG_FILE_REGEX.match(line.strip()) + match = _TAG_FILE_REGEX[FORMAT_VERSION].match(line.strip()) if not match: message = 'non-tag line in diff: {!r}'.format(line.strip()) if line.startswith(_TAG_DIRECTORY): @@ -847,15 +961,15 @@ def _unpack_diff_lines(stream): def _help(parser, command=None): """ - Show help for an nmbug command. + Show help for an notmuch-git command. Because some folks prefer: - $ nmbug help COMMAND + $ notmuch-git help COMMAND to - $ nmbug COMMAND --help + $ notmuch-git COMMAND --help """ if command: parser.parse_args([command, '--help']) @@ -871,6 +985,17 @@ def _notmuch_config_get(key): _sys.exit(1) return stdout.rstrip() +def read_format_version(): + try: + (status, stdout, stderr) = _git( + args=['cat-file', 'blob', 'master:FORMAT'], + stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, wait=True) + except SubprocessError as e: + _LOG.debug("failed to read FORMAT file from git, assuming format version 0") + return 0 + + return int(stdout) + # based on BaseDirectory.save_data_path from pyxdg (LGPL2+) def xdg_data_path(profile): resource = _os.path.join('notmuch',profile,'git') @@ -943,6 +1068,10 @@ if __name__ == '__main__': help=( "Argument passed through to 'git archive'. Set anything " 'before , see git-archive(1) for details.')) + elif command == 'checkout': + subparser.add_argument( + '-f', '--force', action='store_true', + help='checkout a large fraction of tags.') elif command == 'clone': subparser.add_argument( 'repository', @@ -951,6 +1080,9 @@ if __name__ == '__main__': 'URLS section of git-clone(1) for more information on ' 'specifying repositories.')) elif command == 'commit': + subparser.add_argument( + '-f', '--force', action='store_true', + help='commit a large fraction of tags.') subparser.add_argument( 'message', metavar='MESSAGE', default='', nargs='?', help='Text for the commit message.') @@ -965,6 +1097,11 @@ if __name__ == '__main__': subparser.add_argument( 'command', metavar='COMMAND', nargs='?', help='The command to show help for.') + elif command == 'init': + subparser.add_argument( + '--format-version', metavar='VERSION', + default = None, + help='create format VERSION repository.') elif command == 'log': subparser.add_argument( 'args', metavar='ARG', nargs='*', @@ -1061,6 +1198,11 @@ if __name__ == '__main__': _LOG.debug('prefix = {:s}'.format(TAG_PREFIX)) _LOG.debug('repository = {:s}'.format(NOTMUCH_GIT_DIR)) + if args.func != init: + FORMAT_VERSION = read_format_version() + + _LOG.debug('FORMAT_VERSION={:d}'.format(FORMAT_VERSION)) + if args.func == help: arg_names = ['command'] else: