aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvg <vgm+dev@devys.org>2020-12-13 12:02:35 +0100
committervg <vgm+dev@devys.org>2020-12-13 12:02:35 +0100
commit5658d17ed31b8446cc9052ee8f8a9e61937c0324 (patch)
treeef6f686e2330216b8040816772f36733a7374d01
parent272a2b2fbc6830147f667e98384cdb6b6a07db71 (diff)
downloadscripts-5658d17ed31b8446cc9052ee8f8a9e61937c0324.tar.gz
scripts-5658d17ed31b8446cc9052ee8f8a9e61937c0324.tar.bz2
scripts-5658d17ed31b8446cc9052ee8f8a9e61937c0324.zip
add a script tagging uniquely files
-rwxr-xr-xscripts/xattr_user_id109
1 files changed, 109 insertions, 0 deletions
diff --git a/scripts/xattr_user_id b/scripts/xattr_user_id
new file mode 100755
index 0000000..28a1d99
--- /dev/null
+++ b/scripts/xattr_user_id
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# Copyright 2020 vg
+# SPDX-License-Identifier: MIT
+
+'''
+Description
+===========
+
+Tag given files with a unique incrementing id.
+
+State File
+==========
+
+State file records the last used id as a simple string and can be used as
+a starting point when recalling this program. Otherwise the program starts
+from 1 (== last used id is 0). The same starting point is assumed when
+the state file does not exists.
+
+If no filename are given as positional command line argument, read filenames
+from stdin.
+
+If given files are already tagged, they are just displayed, not
+rettagged/renumbered unless -f is given.
+
+Usage
+=====
+
+Usage: {progname} [--state STATE_FILE] [-z] [-f] [--] [FILENAME...]
+ {progname} -h|--help
+
+Options:
+
+ -s, --state STATE_FILE
+ Records and read back last used id from/to this file
+
+ -h, --help
+ Show the complete help
+
+ -z
+ Null instead of LF as filename separator for in and out
+
+ -f
+ Force. Rewrite already tagged files.
+
+'''
+
+### standard modules
+import contextlib
+import functools
+import itertools
+import operator
+import os
+import sys
+
+### external modules
+import docopt
+
+
+# This script is not compatible below python3.7.2, always abort (not in main
+# since the syntax itself can cause the script to fail later inconveniently).
+assert sys.hexversion >= 0x03070200
+
+
+def stream_split(sep='\n', stream=sys.stdin, *, rem='', chunksize=4096):
+ if stream.isatty():
+ # workaround the double Ctrl-D issue for interactive entry where perf
+ # should not be an issue thus reading 1 by 1 is ok.
+ chunksize = 1
+ for data in iter(functools.partial(stream.read, chunksize), ''):
+ splitted = ''.join((rem, data)).split(sep)
+ for part in itertools.islice(splitted, len(splitted)-1):
+ yield part
+ rem = splitted[-1]
+ yield rem
+
+
+def filenames(arg_filenames, separator):
+ from itertools import repeat
+ yield from zip(repeat(False),
+ filter(bool, arg_filenames or stream_split(separator)))
+
+
+def main():
+ args = docopt.docopt(__doc__.format(progname=os.path.basename(sys.argv[0])))
+
+ last_used_id = 0
+ if args['--state']:
+ with contextlib.suppress(FileNotFoundError):
+ with open(args['--state'], encoding='utf8') as stream:
+ last_used_id = int(stream.read())
+
+ separator = '\0' if args['-z'] else '\n'
+ for untagged, filename in filenames(args['FILENAME'], separator):
+ try:
+ file_tag_id = int(os.getxattr(filename, 'user.id').decode('utf8'))
+ except OSError as err:
+ untagged = True
+ if untagged or args['-f']:
+ last_used_id += 1
+ os.setxattr(filename, 'user.id', str(last_used_id).encode('utf8'))
+ file_tag_id = last_used_id
+ print(file_tag_id, filename, end=separator)
+
+ if args['--state']:
+ with open(args['--state'], 'w', encoding='utf8') as stream:
+ print(last_used_id, file=stream)
+
+
+main()