From 5658d17ed31b8446cc9052ee8f8a9e61937c0324 Mon Sep 17 00:00:00 2001
From: vg <vgm+dev@devys.org>
Date: Sun, 13 Dec 2020 12:02:35 +0100
Subject: add a script tagging uniquely files

---
 scripts/xattr_user_id | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100755 scripts/xattr_user_id

diff --git a/scripts/xattr_user_id b/scripts/xattr_user_id
new file mode 100755
index 0000000..28a1d99
--- /dev/null
+++ b/scripts/xattr_user_id
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# Copyright 2020 vg
+# SPDX-License-Identifier: MIT
+
+'''
+Description
+===========
+
+Tag given files with a unique incrementing id.
+
+State File
+==========
+
+State file records the last used id as a simple string and can be used as
+a starting point when recalling this program. Otherwise the program starts
+from 1 (== last used id is 0). The same starting point is assumed when
+the state file does not exists.
+
+If no filename are given as positional command line argument, read filenames
+from stdin.
+
+If given files are already tagged, they are just displayed, not
+rettagged/renumbered unless -f is given.
+
+Usage
+=====
+
+Usage: {progname} [--state STATE_FILE] [-z] [-f] [--] [FILENAME...]
+       {progname} -h|--help
+
+Options:
+
+  -s, --state STATE_FILE
+        Records and read back last used id from/to this file
+
+  -h, --help
+        Show the complete help
+
+  -z
+        Null instead of LF as filename separator for in and out
+
+  -f
+        Force. Rewrite already tagged files.
+
+'''
+
+### standard modules
+import contextlib
+import functools
+import itertools
+import operator
+import os
+import sys
+
+### external modules
+import docopt
+
+
+# This script is not compatible below python3.7.2, always abort (not in main
+# since the syntax itself can cause the script to fail later inconveniently).
+assert sys.hexversion >= 0x03070200
+
+
+def stream_split(sep='\n', stream=sys.stdin, *, rem='', chunksize=4096):
+    if stream.isatty():
+        # workaround the double Ctrl-D issue for interactive entry where perf
+        # should not be an issue thus reading 1 by 1 is ok.
+        chunksize = 1
+    for data in iter(functools.partial(stream.read, chunksize), ''):
+        splitted = ''.join((rem, data)).split(sep)
+        for part in itertools.islice(splitted, len(splitted)-1):
+            yield part
+        rem = splitted[-1]
+    yield rem
+
+
+def filenames(arg_filenames, separator):
+    from itertools import repeat
+    yield from zip(repeat(False),
+            filter(bool, arg_filenames or stream_split(separator)))
+
+
+def main():
+    args = docopt.docopt(__doc__.format(progname=os.path.basename(sys.argv[0])))
+
+    last_used_id = 0
+    if args['--state']:
+        with contextlib.suppress(FileNotFoundError):
+            with open(args['--state'], encoding='utf8') as stream:
+                last_used_id = int(stream.read())
+
+    separator = '\0' if args['-z'] else '\n'
+    for untagged, filename in filenames(args['FILENAME'], separator):
+        try:
+            file_tag_id = int(os.getxattr(filename, 'user.id').decode('utf8'))
+        except OSError as err:
+            untagged = True
+        if untagged or args['-f']:
+            last_used_id += 1
+            os.setxattr(filename, 'user.id', str(last_used_id).encode('utf8'))
+            file_tag_id = last_used_id
+        print(file_tag_id, filename, end=separator)
+
+    if args['--state']:
+        with open(args['--state'], 'w', encoding='utf8') as stream:
+            print(last_used_id, file=stream)
+
+
+main()
-- 
cgit v1.2.3