utils¶
Utilities for manipulating dicts and lists.
add(d, key, value)
¶
Append key-value pair to dict d.
If key already exists in d, its value is converted to a list
and value is appended to it. value may also be a list. Values
are not duplicated.
Source code in tripper/datadoc/utils.py
def add(d: dict, key: str, value: "Any") -> None:
"""Append key-value pair to dict `d`.
If `key` already exists in `d`, its value is converted to a list
and `value` is appended to it. `value` may also be a list. Values
are not duplicated.
"""
if key not in d:
d[key] = value
else:
klst = d[key] if isinstance(d[key], list) else [d[key]]
if isinstance(value, dict):
v = klst if value in klst else klst + [value]
else:
vlst = value if isinstance(value, list) else [value]
try:
v = list(set(klst).union(vlst))
except TypeError: # klst contains unhashable dicts
v = klst + [x for x in vlst if x not in klst]
d[key] = (
v[0]
if len(v) == 1
else sorted(
# Sort dicts at end, by representing them with a huge
# unicode character
v,
key=lambda x: "\uffff" if isinstance(x, dict) else str(x),
)
)
addnested(d, key, value, cls=None)
¶
Like add(), but allows key to be a dot-separated list of sub-keys.
Returns the updated d.
Each sub-key will be added to d as a corresponding sub-dict.
Subdicts will be of type cls. If cls is None, subdicts will default
to the same type as d if d is a mapping, or to a dict otherwise.
Examples:
>>> d = {}
>>> addnested(d, "a.b.c", "val")
{'a': {'b': {'c': 'val'}}}
Source code in tripper/datadoc/utils.py
def addnested(
d: "Union[dict, list]",
key: str,
value: "Any",
cls: "Optional[type]" = None,
) -> "Union[dict, list]":
"""Like add(), but allows `key` to be a dot-separated list of sub-keys.
Returns the updated `d`.
Each sub-key will be added to `d` as a corresponding sub-dict.
Subdicts will be of type `cls`. If `cls` is None, subdicts will default
to the same type as `d` if `d` is a mapping, or to a dict otherwise.
Example:
>>> d = {}
>>> addnested(d, "a.b.c", "val")
{'a': {'b': {'c': 'val'}}}
"""
# pylint: disable=too-many-branches
if cls is None:
cls = type(d) if isinstance(d, Mapping) else dict
if "." in key:
first, rest = key.split(".", 1)
if isinstance(d, list):
for ele in d:
if isinstance(ele, dict):
addnested(ele, key, value)
break
else:
d.append(addnested(cls(), key, value))
elif first in d and isinstance(d[first], (dict, list)):
addnested(d[first], rest, value)
else:
addnested(d, first, addnested(cls(), rest, value))
elif isinstance(d, list):
for ele in d:
if isinstance(ele, dict):
add(ele, key, value)
break
else:
d.append({key: value})
else:
add(d, key, value)
return d
asseq(value)
¶
Returns a string or sequence as an iterable.
Source code in tripper/datadoc/utils.py
def asseq(value: "Union[str, Sequence, None]") -> "Sequence":
"""Returns a string or sequence as an iterable."""
return [value] if isinstance(value, str) else value if value else []
get(d, key, default=None, aslist=True)
¶
Like d.get(key, default) but returns the value as a list if
aslist is True and value is not already a list.
An empty list is returned in the special case that key is not in
d and default is None.
Source code in tripper/datadoc/utils.py
def get(
d: dict, key: str, default: "Any" = None, aslist: bool = True
) -> "Any":
"""Like `d.get(key, default)` but returns the value as a list if
`aslist` is True and value is not already a list.
An empty list is returned in the special case that `key` is not in
`d` and `default` is None.
"""
value = d.get(key, default)
if aslist:
return (
value
if isinstance(value, list)
else [] if value is None else [value]
)
return value
getlabel(d, default=None)
¶
Return label from a JSON-LD dict d.
Any of the following keys in d (listed in the order of
precedense, from high to low) will be interpreted as a label:
- skos:prefLabel
- rdfs:label
- prefLabel
- label
If d has none of the above keys and default is not None,
default is returned. Otherwise iriname(d["@id"]) is returned.
Examples:
getlabel({"@id": "ex:A", "label": "a"}) 'a'
Source code in tripper/datadoc/utils.py
def getlabel(d: dict, default: "Optional[str]" = None) -> str:
"""Return label from a JSON-LD dict `d`.
Any of the following keys in `d` (listed in the order of
precedense, from high to low) will be interpreted as a label:
- skos:prefLabel
- rdfs:label
- prefLabel
- label
If `d` has none of the above keys and `default` is not None,
`default` is returned. Otherwise `iriname(d["@id"])` is returned.
Example:
>>> getlabel({"@id": "ex:A", "label": "a"})
'a'
"""
labels = (
# The order is by purpose. prefLabel has precedense over label.
# But qualified IRIs has precedence over keywords.
SKOS.prefLabel,
"skos:prefLabel",
RDFS.label,
"rdfs:label",
"prefLabel",
"label",
)
for label in labels:
if label in d:
return d[label]
if default:
return default
if "@id" in d:
return iriname(d["@id"])
raise InvalidDatadocError(f"Cannot infer label from JSON-LD dict: {d}")
iriname(value)
¶
Return the name part of an IRI or CURIE.
Rules: - If value has no ":", return it as-is. - If value contains "#", return everything after the last "#". - If value does not start with http:// or https://, return everything after the last ":". - Otherwise, return everything after the last "/".
Exceptions:
| Type | Description |
|---|---|
ValueError |
If the inferred name is empty or is invalid. Valid names - start with A-Z, a-z or _ - the rest may in addition contain 0-9, ., +, -, or / |
Source code in tripper/datadoc/utils.py
def iriname(value: str) -> str:
"""Return the name part of an IRI or CURIE.
Rules:
- If value has no ":", return it as-is.
- If value contains "#", return everything after the last "#".
- If value does not start with http:// or https://, return
everything after the last ":".
- Otherwise, return everything after the last "/".
Raises:
ValueError: If the inferred name is empty or is invalid.
Valid names
- start with A-Z, a-z or _
- the rest may in addition contain 0-9, ., +, -, or /
"""
if ":" not in value:
name = value
elif "#" in value:
name = value.rsplit("#", 1)[1]
elif not re.match("^[a-zA-Z][a-zA-Z0-9+.-]*://", value):
name = value.rsplit(":", 1)[1]
else:
name = value.rsplit("/", 1)[1]
if not _NAME_RE.fullmatch(name):
raise ValueError(
f"Cannot infer name of IRI: {value} (getting invalid name "
f"'{name}')"
)
return name
merge(a, b)
¶
Return the merged result of a and b, where a and b can be
None, string or a sequence of strings.
The result will be None if both a and b are None and a string if one
is None and the other is a string or both are the same string. Otherwise,
the result will be a list with the unique strings from a and b.
Examples:
merge(None, None)
merge("a", None) 'a'
merge(None, "b") 'b'
merge("a", "b") ['a', 'b']
merge("a", ["c", "b", "a"]) ['a', 'c', 'b']
merge(["a", "d"], ["c", "b", "a"]) ['a', 'd', 'c', 'b']
Source code in tripper/datadoc/utils.py
def merge(a: "MergeType", b: "MergeType") -> "MergeType":
"""Return the merged result of `a` and `b`, where `a` and `b` can be
None, string or a sequence of strings.
The result will be None if both `a` and `b` are None and a string if one
is None and the other is a string or both are the same string. Otherwise,
the result will be a list with the unique strings from `a` and `b`.
Examples:
>>> merge(None, None)
>>> merge("a", None)
'a'
>>> merge(None, "b")
'b'
>>> merge("a", "b")
['a', 'b']
>>> merge("a", ["c", "b", "a"])
['a', 'c', 'b']
>>> merge(["a", "d"], ["c", "b", "a"])
['a', 'd', 'c', 'b']
"""
# pylint: disable=too-many-return-statements
if a is None and b is None:
return None
if a is None:
return b
if b is None:
return a
if isinstance(a, str) and isinstance(b, str):
return a if b == a else [a, b]
if isinstance(a, str) and isinstance(b, Sequence):
return [a] + [x for x in b if x != a]
if isinstance(a, Sequence) and isinstance(b, str):
return a if b in a else list(a) + [b]
if isinstance(a, Sequence) and isinstance(b, Sequence):
return list(a) + [x for x in b if x not in a]
raise TypeError("input must be None, string or a sequence")
stripnested(d)
¶
Strip off brackets from keys in nested dicts.
This function is intended for post-processing the result of a series of calls to addnested().
Examples:
d = {"a[1]": {"x": 1, "y": 2}, "a[2]": {"x": 3}} stripnested(d) {'a': [{'x': 1, 'y': 2}, {'x': 3}]}
Source code in tripper/datadoc/utils.py
def stripnested(d):
"""Strip off brackets from keys in nested dicts.
This function is intended for post-processing the result of a
series of calls to addnested().
Example:
>>> d = {"a[1]": {"x": 1, "y": 2}, "a[2]": {"x": 3}}
>>> stripnested(d)
{'a': [{'x': 1, 'y': 2}, {'x': 3}]}
"""
if isinstance(d, list):
new = type(d)()
for e in d:
new.append(stripnested(e))
elif isinstance(d, dict):
new = type(d)()
for k, v in d.items():
add(new, k.split("[")[0], v)
else:
new = d
return new