From 8fe0b2c7754873ec33bccb7952683b96113491bc Mon Sep 17 00:00:00 2001 From: vg Date: Fri, 7 Dec 2018 12:05:14 +0100 Subject: first commit; add contacts_validation --- .../contacts_validation/__init__.py | 78 +++++ .../contacts_validation/command_line.py | 51 ++++ .../contacts_validation/data/contacts_schema.yaml | 314 +++++++++++++++++++++ 3 files changed, 443 insertions(+) create mode 100644 contacts_validation/contacts_validation/__init__.py create mode 100644 contacts_validation/contacts_validation/command_line.py create mode 100644 contacts_validation/contacts_validation/data/contacts_schema.yaml (limited to 'contacts_validation/contacts_validation') diff --git a/contacts_validation/contacts_validation/__init__.py b/contacts_validation/contacts_validation/__init__.py new file mode 100644 index 0000000..822e789 --- /dev/null +++ b/contacts_validation/contacts_validation/__init__.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# Copyright 2018 vg@devys.org +# SPDX-License-Identifier: MIT + +import collections +import datetime +import os +import pprint +import sys + +import jsonschema +import yaml + + +def convert_types(obj): + if isinstance(obj, str): + return obj + elif isinstance(obj, datetime.date): + return obj.strftime('%F') + elif isinstance(obj, collections.Sequence): + for i, _ in enumerate(obj): + obj[i] = convert_types(obj[i]) + elif isinstance(obj, collections.Mapping): + for k, v in obj.items(): + obj[k] = convert_types(v) + + return obj + + +def print_contact(contact): + cfield = '\033[31;1m' + cnormal = '\033[0m' + print(f'{cfield}Contact{cnormal}: {contact["display"]}' + f' [{cfield}uuid{cnormal}: {contact["uuid"]}]') + + +def print_validation_error(data_obj, exception): + print('Best match error:') + print(exception) + print() + + cfield = '\033[31;1m' + cnormal = '\033[0m' + print('Errors in context sorted by relevance:') + for i, suberror in enumerate(sorted(exception.context, + key=jsonschema.exceptions.relevance)): + if i: + print() + print(f'=> {cfield}schema{cnormal}:', + '.'.join(str(e) for e in suberror.absolute_schema_path)) + print(f' {cfield}instance{cnormal}:', + '.'.join(str(e) for e in suberror.absolute_path)) + print(f' {cfield}E{cnormal}:', suberror.message) + + print(f'Error occured on: {".".join(str(i) for i in exception.absolute_path)}') + print_contact(data_obj[exception.path[0]]) + print() + + +def validate_yaml_data(data_obj, schema_obj): + try: + jsonschema.validate(convert_types(data_obj), schema_obj) + except jsonschema.ValidationError as e: + print_validation_error(data_obj, e) + + # schema ok, check for duplicates in display and uuids + display_dups = collections.defaultdict(list) + uuid_dups = collections.defaultdict(list) + for index, item in enumerate(data_obj): + display_dups[item['display']].append(index) + uuid_dups[item['uuid']].append(index) + for e in (data_obj[e] for e in display_dups.values() if len(e) > 2): + print('WARNING: duplicated display value for these contacts:') + print(f'=> display: {e["display"]} uuid: {e["uuid"]}') + for e in (data_obj[e] for e in uuid_dups.values() if len(e) > 2): + print('ERROR: duplicated uuid value for these contacts:') + print(f'=> display: {e["display"]} uuid: {e["uuid"]}') + diff --git a/contacts_validation/contacts_validation/command_line.py b/contacts_validation/contacts_validation/command_line.py new file mode 100644 index 0000000..0310c78 --- /dev/null +++ b/contacts_validation/contacts_validation/command_line.py @@ -0,0 +1,51 @@ +# Copyright 2018 vg@devys.org +# SPDX-License-Identifier: MIT + +''' +Validate my contacts. + +if no FILENAME or one filename is -, stdin will be used. + +Usage: contacts-validation [options] [--] [FILENAME...] + contacts-validation -h|--help|--help-format + +Options: + -h, --help Display this help message + --help-format display schema fields name/title/descriptions +''' + +import os +import sys + +import docopt +import yaml + +from . import validate_yaml_data + + +def gen_streams(filenames): + for filename in filenames: + if filename == '-': + yield sys.stdin + else: + with open(filename, 'r', encoding='utf8') as stream: + yield stream + + +def main(): + 'function called only when script invoked directly on command line' + args = docopt.docopt(__doc__) + + if args['--help-format']: + print(f'for now see the schema yaml file for description') + raise SystemExit(0) + + with open(f'{os.path.dirname(__file__)}/data/contacts_schema.yaml') as schema_fh: + schema_obj = yaml.safe_load(schema_fh.read()) + assert schema_obj + + for i, stream in enumerate(gen_streams(args['FILENAME'] or ['-'])): + print('#'*60, f'# Valdating stream {i}', '#'*60, sep='\n') + yaml_data = yaml.safe_load(stream.read()) + assert yaml_data + validate_yaml_data(yaml_data, schema_obj) diff --git a/contacts_validation/contacts_validation/data/contacts_schema.yaml b/contacts_validation/contacts_validation/data/contacts_schema.yaml new file mode 100644 index 0000000..7431d06 --- /dev/null +++ b/contacts_validation/contacts_validation/data/contacts_schema.yaml @@ -0,0 +1,314 @@ +# vim: set ts=2 sts=2 sw=2 : +$schema: http://json-schema.org/draft-07/schema# +title: contacts +description: schema of my personal contacts book +type: array +items: + title: contact + type: object + additionalProperties: false + required: [display, uuid] + properties: + + ##################### + # required properties + ##################### + + display: + description: contact name, displayed as is + type: string + + uuid: + description: unique identifier of the contact (ex. uuidgen command) + type: string + pattern: '^[\dabcdef]{8}-[\dabcdef]{4}-[\dabcdef]{4}-[\dabcdef]{4}-[\dabcdef]{12}$' + + ####################### + # additional properties + ####################### + # note: I made the choice to have to explicitely put property names (like + # phone: 'numberxxx' for phones) instead of having the possibility to + # either have phones: ['numberxxx', {phone: 'numberyyy', otherprop: + # xxx}]). It is a little less easier to write, but it is more clear when + # manually reading a contact (and less confusing). + + firstname: + $ref: '#/definitions/simple_strings' + + lastname: + $ref: '#/definitions/simple_string' + + lastnamebm: + title: last name (surname) before being married (birth last name) + $ref: '#/definitions/simple_string' + + nickname: + $ref: '#/definitions/simple_strings' + + title: + $ref: '#/definitions/simple_string' + + comments: + $ref: '#/definitions/comments' + + role: + #$ref: '#/definitions/simple_string' + type: string + + org: + type: array + items: {type: string} + + birthday: + $ref: '#/definitions/datetime' + + birthday_ignore: + description: + ignore this birthday when generating calendar events (ex. with + rem-generate-birthdays) + type: boolean + + married: + $ref: '#/definitions/datetime' + + favorite: + description: + use this contact as favorite, a special field in vcard format, used by + phones supporting it + type: boolean + + related: + description: this contact is related to another pointed by this uri + type: string + format: uri + + kind: + description: the kind of contact, or why this contact has been added + type: string + enum: + - job # here because of job + - com # here because it is a commercial contact of anything I use + - friend # here because it is one of my friend, or friend of friend + - family # here because it is part of my family + - school # contact here because of the school + + urls: + type: array + items: + description: a uri associated with the contact (ex. cv site) + type: string + format: uri + + notes: + $ref: '#/definitions/simple_strings' + + websites: + type: array + items: {$ref: '#/definitions/website'} + + associations: + type: array + items: {$ref: '#/definitions/association'} + + emails: + type: array + items: {$ref: '#/definitions/email'} + + ims: + title: instant messages addresses + type: array + items: {$ref: '#/definitions/im'} + + phones: + type: array + items: {$ref: '#/definitions/phone'} + + addresses: + type: array + items: {$ref: '#/definitions/address'} + + events: + type: array + items: {$ref: '#/definitions/event'} + + #imported: + # description: free form, used for the first time import of contact + # type: [array, object] + + +definitions: + + email: + type: object + additionalProperties: false + required: [email] + properties: + email: + type: string + # limitation: a true e-mail address can contain multiple '@' symbols, + # but I accept to not store them in my contacts. + pattern: '^[^@]+@[^@]+$' + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + + im: + type: object + additionalProperties: false + required: [im] + properties: + im: {type: string, format: uri} + type: + type: string + enum: [xmpp] + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + + phone: + type: object + additionalProperties: false + required: [phone] + properties: + phone: + type: string + pattern: '^\+[1-9]{2}\d{9}|[1-9]\d{2}|[1-9]\d{4}$' + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + + website: + type: object + additionalProperties: false + required: [website] + properties: + website: {type: string, format: uri} + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + + address: + type: object + additionalProperties: false + required: [street, code, city, country] + properties: + label: {$ref: '#/definitions/simple_string'} + street: + type: string + pattern: "^[\\w' .-]+$" + code: + type: [string, number] + $ref: '#/definitions/simple_string_pattern' + city: {type: string, pattern: "^[\\w' ,-]+$"} + country: {$ref: '#/definitions/simple_string'} + box: + type: [string, number] + $ref: '#/definitions/simple_string_pattern' + extended: {$ref: '#/definitions/simple_string'} + region: {$ref: '#/definitions/simple_string'} + + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + + association: + type: object + additionalProperties: false + required: [association] + properties: + association: {$ref: '#/definitions/simple_string'} + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + + event: + description: + any date of importance for this contact (assume 1 day if duration or end + date not given) + type: object + additionalProperties: false + # oneOf below define until and duration as mutually exclusive, see + # https://stackoverflow.com/questions/28162509/mutually-exclusive-property-groups + # for details + oneOf: + - {required: [event, date], not: {anyOf: [{required: [until]}, {required: [duration]}]}} + - {required: [event, date, until], not: {required: [duration]}} + - {required: [event, date, duration], not: {required: [until]}} + properties: + event: {type: string, description: description of the event} + date: {$ref: '#/definitions/datetime'} + comments: {$ref: '#/definitions/comments'} + tags: {$ref: '#/definitions/tags'} + until: + description: + the event was on-going until this date, mutually exclusive with + duration + $ref: '#/definition/datetime' + duration: + type: number + minimum: 1 + multipleOf: 1 # force to be an integer + default: 1 + description: + duration of the event expressed in days, mutually exclusive with + until + + comments: + type: string + description: + free form comments (in plural even though being a single block of text + since the text often contains multiple comments) + + tags: + type: array + uniqueItems: true + items: + type: string + title: tags + description: possible tags list + # note: not sure if I keep the enum + enum: + - main + - wired + - phone + - private # a private phone, not for work, not for professional + - home # a phone shared for all person residing in the same house + - mobile # a mobile/cell phone + - old + - box + - short + - voicemail + - voice + - work # a phone number dedicated for work, professional use + - parents + - down + - gateway + - maybe_invalid + - school + - uk + - fax + + strings: + description: validate a string or list of strings, no pattern restriction + type: [string, array] + items: + type: string + + simple_string_pattern: + description: + validate a simple string, simple means they contains alphanumeric and + space and dash and simple quote characters only + pattern: "^[\\w' -]+$" + + simple_string: + type: string + $ref: '#/definitions/simple_string_pattern' + + simple_strings: + description: + validate a simple string or list of simple strings + type: [string, array] + $ref: '#/definitions/simple_string_pattern' + items: + type: string + pattern: '#/definitions/simple_string_pattern' + + datetime: + type: string + description: matches a date in the form YYYY-MM-DD + pattern: '^\d{4}-\d{2}-\d{2}$' + -- cgit v1.2.3