scripts/xattr_user_id


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

#!/usr/bin/env python3
# Copyright 2020 vg
# SPDX-License-Identifier: MIT

'''
Description
===========

Tag given files with a unique incrementing id.

State File
==========

State file records the last used id as a simple string and can be used as
a starting point when recalling this program. Otherwise the program starts
from 1 (== last used id is 0). The same starting point is assumed when
the state file does not exists.

If no filename are given as positional command line argument, read filenames
from stdin.

If given files are already tagged, they are just displayed, not
rettagged/renumbered unless -f is given.

Usage
=====

Usage: {progname} [--state STATE_FILE] [-z] [-f] [--] [FILENAME...]
       {progname} -h|--help

Options:

  -s, --state STATE_FILE
        Records and read back last used id from/to this file

  -h, --help
        Show the complete help

  -z
        Null instead of LF as filename separator for in and out

  -f
        Force. Rewrite already tagged files.

'''

### standard modules
import contextlib
import functools
import itertools
import operator
import os
import sys

### external modules
import docopt


# This script is not compatible below python3.7.2, always abort (not in main
# since the syntax itself can cause the script to fail later inconveniently).
assert sys.hexversion >= 0x03070200


def stream_split(sep='\n', stream=sys.stdin, *, rem='', chunksize=4096):
    if stream.isatty():
        # workaround the double Ctrl-D issue for interactive entry where perf
        # should not be an issue thus reading 1 by 1 is ok.
        chunksize = 1
    for data in iter(functools.partial(stream.read, chunksize), ''):
        splitted = ''.join((rem, data)).split(sep)
        for part in itertools.islice(splitted, len(splitted)-1):
            yield part
        rem = splitted[-1]
    yield rem


def filenames(arg_filenames, separator):
    from itertools import repeat
    yield from zip(repeat(False),
            filter(bool, arg_filenames or stream_split(separator)))


def main():
    args = docopt.docopt(__doc__.format(progname=os.path.basename(sys.argv[0])))

    last_used_id = 0
    if args['--state']:
        with contextlib.suppress(FileNotFoundError):
            with open(args['--state'], encoding='utf8') as stream:
                last_used_id = int(stream.read())

    separator = '\0' if args['-z'] else '\n'
    for untagged, filename in filenames(args['FILENAME'], separator):
        try:
            file_tag_id = int(os.getxattr(filename, 'user.id').decode('utf8'))
        except OSError as err:
            untagged = True
        if untagged or args['-f']:
            last_used_id += 1
            os.setxattr(filename, 'user.id', str(last_used_id).encode('utf8'))
            file_tag_id = last_used_id
        print(file_tag_id, filename, end=separator)

    if args['--state']:
        with open(args['--state'], 'w', encoding='utf8') as stream:
            print(last_used_id, file=stream)


main()