From 5658d17ed31b8446cc9052ee8f8a9e61937c0324 Mon Sep 17 00:00:00 2001 From: vg Date: Sun, 13 Dec 2020 12:02:35 +0100 Subject: add a script tagging uniquely files --- scripts/xattr_user_id | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100755 scripts/xattr_user_id diff --git a/scripts/xattr_user_id b/scripts/xattr_user_id new file mode 100755 index 0000000..28a1d99 --- /dev/null +++ b/scripts/xattr_user_id @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# Copyright 2020 vg +# SPDX-License-Identifier: MIT + +''' +Description +=========== + +Tag given files with a unique incrementing id. + +State File +========== + +State file records the last used id as a simple string and can be used as +a starting point when recalling this program. Otherwise the program starts +from 1 (== last used id is 0). The same starting point is assumed when +the state file does not exists. + +If no filename are given as positional command line argument, read filenames +from stdin. + +If given files are already tagged, they are just displayed, not +rettagged/renumbered unless -f is given. + +Usage +===== + +Usage: {progname} [--state STATE_FILE] [-z] [-f] [--] [FILENAME...] + {progname} -h|--help + +Options: + + -s, --state STATE_FILE + Records and read back last used id from/to this file + + -h, --help + Show the complete help + + -z + Null instead of LF as filename separator for in and out + + -f + Force. Rewrite already tagged files. + +''' + +### standard modules +import contextlib +import functools +import itertools +import operator +import os +import sys + +### external modules +import docopt + + +# This script is not compatible below python3.7.2, always abort (not in main +# since the syntax itself can cause the script to fail later inconveniently). +assert sys.hexversion >= 0x03070200 + + +def stream_split(sep='\n', stream=sys.stdin, *, rem='', chunksize=4096): + if stream.isatty(): + # workaround the double Ctrl-D issue for interactive entry where perf + # should not be an issue thus reading 1 by 1 is ok. + chunksize = 1 + for data in iter(functools.partial(stream.read, chunksize), ''): + splitted = ''.join((rem, data)).split(sep) + for part in itertools.islice(splitted, len(splitted)-1): + yield part + rem = splitted[-1] + yield rem + + +def filenames(arg_filenames, separator): + from itertools import repeat + yield from zip(repeat(False), + filter(bool, arg_filenames or stream_split(separator))) + + +def main(): + args = docopt.docopt(__doc__.format(progname=os.path.basename(sys.argv[0]))) + + last_used_id = 0 + if args['--state']: + with contextlib.suppress(FileNotFoundError): + with open(args['--state'], encoding='utf8') as stream: + last_used_id = int(stream.read()) + + separator = '\0' if args['-z'] else '\n' + for untagged, filename in filenames(args['FILENAME'], separator): + try: + file_tag_id = int(os.getxattr(filename, 'user.id').decode('utf8')) + except OSError as err: + untagged = True + if untagged or args['-f']: + last_used_id += 1 + os.setxattr(filename, 'user.id', str(last_used_id).encode('utf8')) + file_tag_id = last_used_id + print(file_tag_id, filename, end=separator) + + if args['--state']: + with open(args['--state'], 'w', encoding='utf8') as stream: + print(last_used_id, file=stream) + + +main() -- cgit v1.2.3