utils¶

Utility functions.

`AttrDict (dict)` ¶

Dict with attribute access.

Source code in tripper/utils.py

class AttrDict(dict):
    """Dict with attribute access."""

    def __getattr__(self, name):
        if name in self:
            return self[name]
        if name == "__wrapped__":
            # Hack to work around a pytest bug.  During its collection
            # phase pytest tries to mock namespace objects with an
            # attribute `__wrapped__`.
            return None
        raise KeyError(name)

    def __setattr__(self, name, value):
        self[name] = value

    def __repr__(self):
        return self._pprint()

    def __dir__(self):
        return dict.__dir__(self) + list(self.keys())

    def __getstate__(self):  # For pickle support
        return dict(self)

    def __setstate__(self, state):  # For pickle support
        pass

    def __deepcopy__(self, memo):  # For supporting deepcopy
        return AttrDict((k, deepcopy(v, memo)) for k, v in self.items())

    def _pprint(self, obj=None, indent=0):
        """Help method for pretty printing."""
        if obj is None:
            obj = self
        if not obj:
            return "AttrDict()"
        n = indent + 2
        s = ["AttrDict({"]
        for k, v in obj.items():
            val = self._pprint(v, n) if isinstance(v, AttrDict) else repr(v)
            s.append(f"{' '*n}{k!r}: {val},")
        s.append(" " * indent + "})")
        return "\n".join(s)

    def copy(self):
        """Return a shallow copy of self."""
        return AttrDict(self)

`copy(self)` ¶

Return a shallow copy of self.

Source code in tripper/utils.py

def copy(self):
    """Return a shallow copy of self."""
    return AttrDict(self)

`as_python(value)` ¶

Converts value to a native Python representation.

If value is a Literal, its Python representation will be returned. If value is a string, it will first be converted to a Literal, before its Python representation is returned. Otherwise, value will be returned as-is.

Source code in tripper/utils.py

def as_python(value: "Any") -> "Any":
    """Converts `value` to a native Python representation.

    If `value` is a Literal, its Python representation will be returned.
    If `value` is a string, it will first be converted to a Literal, before
    its Python representation is returned.
    Otherwise, `value` will be returned as-is.
    """
    if isinstance(value, Literal):
        return value.to_python()
    if isinstance(value, str):
        return parse_literal(value).to_python()
    return value

`bnode_iri(prefix='', source='', length=5)` ¶

Returns a new IRI for a blank node.

Parameters:

Name	Type	Description	Default
`prefix`	`str`	A prefix to insert between "_:" and the hash.	`''`
`source`	`str`	An unique string that the returned bnode will be a hash of. The default is to generate a random hash.	`''`
`length`	`int`	Length is the number of bytes in the hash. The default length of 5 is a compromise between readability (10 characters) and safety (corresponding to about 1e12 possibilites). You can change it to 16 to get 128 bits, corresponding to the uniqueness of UUIDs). It makes no sense to go beyond 32, because that is the maximum of the underlying shake_128 algorithm.	`5`

Returns:

Type	Description
`A new bnode IRI of the form "_`	", where `<prefix>` is `prefix` and `<hash>` is a hex-encoded hash of `source`.

Source code in tripper/utils.py

def bnode_iri(prefix: str = "", source: str = "", length: int = 5) -> str:
    """Returns a new IRI for a blank node.

    Parameters:
        prefix: A prefix to insert between "_:" and the hash.
        source: An unique string that the returned bnode will be a hash of.
            The default is to generate a random hash.
        length: Length is the number of bytes in the hash.  The default
            length of 5 is a compromise between readability (10 characters)
            and safety (corresponding to about 1e12 possibilites).  You can
            change it to 16 to get 128 bits, corresponding to the uniqueness
            of UUIDs).  It makes no sense to go beyond 32, because that is
            the maximum of the underlying shake_128 algorithm.

    Returns:
        A new bnode IRI of the form "_:<prefix><hash>", where `<prefix>`
        is `prefix` and `<hash>` is a hex-encoded hash of `source`.
    """
    if source:
        hash = hashlib.shake_128(source.encode()).hexdigest(length)
    else:
        # From Python 3.9 we can use random.randbytes(length).hex()
        hash = "".join(
            hex(random.randint(0, 255))[2:] for i in range(length)  # nosec
        )
    return f"_:{prefix}{hash}"

`check_function(func, s, exceptions)` ¶

Help function returning true if func(s) does not raise an exception.

False is returned if func(s) raises an exception listed in exceptions. Otherwise the exception is propagated.

Source code in tripper/utils.py

def check_function(func: "Callable", s: str, exceptions) -> bool:
    """Help function returning true if `func(s)` does not raise an exception.

    False is returned if `func(s)` raises an exception listed in `exceptions`.
    Otherwise the exception is propagated.
    """
    # Note that the missing type hint on `exceptions` is deliberate, see
    # https://peps.python.org/pep-0484/#exceptions
    try:
        func(s)
    except exceptions:
        return False
    return True

`check_service_availability(url, timeout=5, interval=1)` ¶

Check whether the service with given URL is available.

Parameters:

Name	Type	Description	Default
`url`	`str`	URL of the service to check.	required
`timeout`	`float`	Total time in seconds to wait for a respond.	`5`
`interval`	`float`	Internal time interval in seconds between checking if the service has responded.	`1`

Returns:

Type	Description
`bool`	Returns true if the service responds with code 200, otherwise false is returned.

Source code in tripper/utils.py

def check_service_availability(
    url: str, timeout: float = 5, interval: float = 1
) -> bool:
    """Check whether the service with given URL is available.

    Arguments:
        url: URL of the service to check.
        timeout: Total time in seconds to wait for a respond.
        interval: Internal time interval in seconds between checking if the
            service has responded.

    Returns:
        Returns true if the service responds with code 200,
        otherwise false is returned.
    """
    import time  # pylint: disable=import-outside-toplevel

    import requests  # pylint: disable=import-outside-toplevel

    # Interval should never be larger than timeout
    interval = min(interval, timeout)

    start_time = time.time()

    while True:
        try:
            response = requests.get(url, timeout=timeout)
            if response.status_code == 200:
                return True
        except requests.exceptions.RequestException:
            pass

        if time.time() - start_time >= timeout:
            return False
        time.sleep(interval)

`en(value)` ¶

Convenience function that returns value as a plain english literal.

Equivalent to Literal(value, lang="en").

Source code in tripper/utils.py

def en(value) -> "Literal":  # pylint: disable=invalid-name
    """Convenience function that returns value as a plain english literal.

    Equivalent to ``Literal(value, lang="en")``.
    """
    return Literal(value, lang="en")

`expand_iri(iri, prefixes, strict=False)` ¶

Return the full IRI if iri is prefixed. Otherwise iri is returned.

Source code in tripper/utils.py

def expand_iri(iri: str, prefixes: dict, strict: bool = False) -> str:
    """Return the full IRI if `iri` is prefixed.  Otherwise `iri` is
    returned."""
    match = re.match(MATCH_PREFIXED_IRI, iri)
    if match:
        prefix, name, _ = match.groups()
        if prefix in prefixes:
            return f"{prefixes[prefix]}{name}"
        if strict:
            raise NamespaceError(f'Undefined prefix "{prefix}" in IRI: {iri}')
        # warnings.warn(f'Undefined prefix "{prefix}" in IRI: {iri}')
    return iri

`extend_namespace(namespace, triplestore, format=None)` ¶

Extend a namespace with additional known names.

This makes only sense if the namespace was created with label_annotations or check set to true.

Parameters:

Name	Type	Description	Default
`namespace`	`Namespace`	The namespace to extend.	required
`triplestore`	`Union[Triplestore, str, Path, dict]`	Source from which to extend the namespace. It can be of one of the following types: - Triplestore: triplestore object to read from - str: URL to a triplestore to read from. May also be a path to a local file to read from - Path: path to a local file to read from - dict: dict mapping new IRI names to their corresponding IRIs	required
`format`	`Optional[str]`	Format to use when loading from a triplestore.	`None`

Source code in tripper/utils.py

def extend_namespace(
    namespace: Namespace,
    triplestore: "Union[Triplestore, str, Path, dict]",
    format: "Optional[str]" = None,
):
    """Extend a namespace with additional known names.

    This makes only sense if the namespace was created with
    `label_annotations` or `check` set to true.

    Arguments:
        namespace: The namespace to extend.
        triplestore: Source from which to extend the namespace. It can be
            of one of the following types:
              - Triplestore: triplestore object to read from
              - str: URL to a triplestore to read from.  May also be a
                path to a local file to read from
              - Path: path to a local file to read from
              - dict: dict mapping new IRI names to their corresponding IRIs
        format: Format to use when loading from a triplestore.
    """
    if namespace._iris is None:  # pylint: disable=protected-access
        raise TypeError(
            "only namespaces created with `label_annotations` or `check` set "
            "to true can be extend"
        )
    if isinstance(triplestore, dict):
        namespace._iris.update(triplestore)  # pylint: disable=protected-access
    else:
        namespace._update_iris(  # pylint: disable=protected-access
            triplestore, reload=True, format=format
        )

`function_id(func, length=4)` ¶

Return a checksum for function func.

The returned object is a string of hexadecimal digits.

length is the number of bytes in the returned checksum. Since the current implementation is based on the shake_128 algorithm, it make no sense to set length larger than 32 bytes.

Source code in tripper/utils.py

def function_id(func: "Callable", length: int = 4) -> str:
    """Return a checksum for function `func`.

    The returned object is a string of hexadecimal digits.

    `length` is the number of bytes in the returned checksum.  Since
    the current implementation is based on the shake_128 algorithm,
    it make no sense to set `length` larger than 32 bytes.
    """
    source = inspect.getsource(func)
    doc = func.__doc__ if func.__doc__ else ""
    return hashlib.shake_128(  # pylint: disable=too-many-function-args
        (source + doc).encode()
    ).hexdigest(length)

`get_entry_points(group)` ¶

Consistent interface to entry points for the given group.

Works for all supported versions of Python.

Examples:

get_entry_points("tripper.backends") # doctest: +SKIP [ EntryPoint( name='fuseki', value='pybacktrip.backends.fuseki', group='tripper.backends', ), EntryPoint( name='stardog', value='pybacktrip.backends.stardog', group='tripper.backends', ), ... ]

Source code in tripper/utils.py

def get_entry_points(group: str):
    """Consistent interface to entry points for the given group.

    Works for all supported versions of Python.

    Examples:

    >>> get_entry_points("tripper.backends")  # doctest: +SKIP
    [
        EntryPoint(
            name='fuseki',
            value='pybacktrip.backends.fuseki',
            group='tripper.backends',
        ),
        EntryPoint(
            name='stardog',
            value='pybacktrip.backends.stardog',
            group='tripper.backends',
       ),
       ...
    ]

    """
    if sys.version_info < (3, 10):
        # Fallback for Python < 3.10
        eps = entry_points().get(group, ())  # pylint: disable=no-member
    else:
        # New entry_point interface from Python 3.10+
        eps = entry_points(  # pylint: disable=unexpected-keyword-arg
            group=group
        )
    return eps

`infer_iri(obj)` ¶

Return IRI of the individual that stands for Python object obj.

Valid Python objects are DLite and Pydantic instances.

References:

Source code in tripper/utils.py

def infer_iri(obj):
    """Return IRI of the individual that stands for Python object `obj`.

    Valid Python objects are [DLite] and [Pydantic] instances.

    References:

    [DLite]: https://github.com/SINTEF/dlite
    [Pydantic]: https://docs.pydantic.dev/
    """

    # Please note that tripper does not depend on neither DLite nor Pydantic.
    # Hence neither of these packages are imported.  However, due to duck-
    # typing, infer_iri() is still able to recognise DLite and Pydantic
    # objects and infer their IRIs.

    if isinstance(obj, str):
        iri = obj
    elif hasattr(obj, "uri") and isinstance(obj.uri, str):
        # dlite.Metadata or dataclass (or instance with uri)
        iri = obj.uri
    elif hasattr(obj, "uuid") and obj.uuid:
        # dlite.Instance or dataclass
        iri = str(obj.uuid)
    elif hasattr(obj, "schema") and callable(obj.schema):
        # pydantic.BaseModel
        if hasattr(obj, "identity") and isinstance(obj.identity, str):
            # soft7 pydantic model
            iri = obj.identity
        else:
            # pydantic instance
            schema = obj.schema()
            properties = schema["properties"]
            if "uri" in properties and isinstance(properties["uri"], str):
                iri = properties["uri"]
            elif "identity" in properties and isinstance(
                properties["identity"], str
            ):
                iri = properties["identity"]
            elif "uuid" in properties and properties["uuid"]:
                iri = str(properties["uuid"])
            else:
                raise TypeError(
                    f"cannot infer IRI from pydantic object: {obj!r}"
                )
    else:
        raise TypeError(f"cannot infer IRI from object: {obj!r}")
    return str(iri)

`is_curie(curie, exclude_netloc=True)` ¶

Returns whether curie is a CURIE (compact URI).

Parameters:

Name	Type	Description	Default
`curie`	`str`	CURIE to validate.	required
`exclude_netloc`		Whether to exclude CURIEs with two slashes following the first colon. If true, the part before the colon should also correspond to a valid URI schema name.	`True`

Examples:

>>> is_curie("http://example.com")
False
>>> is_curie("rdf:type")
True

Source code in tripper/utils.py

def is_curie(curie: str, exclude_netloc=True) -> bool:
    """Returns whether `curie` is a CURIE (compact URI).

    Arguments:
        curie: CURIE to validate.
        exclude_netloc: Whether to exclude CURIEs with two slashes following
            the first colon. If true, the part before the colon should also
            correspond to a valid URI schema name.

    Example:

        >>> is_curie("http://example.com")
        False
        >>> is_curie("rdf:type")
        True
    """
    if len(curie) < 3:
        return False
    if curie[0] == "[" and curie[-1] == "]":
        curie = curie[1:-1]

    if not exclude_netloc:
        raise NotImplementedError(
            "is_curie() with argument `exclude_netloc=False` isn't implemented"
        )
    return bool(re.match(r"^[a-zA-Z][a-zA-Z0-9+.-]*:(?!//)\S*$", curie))

`is_uri(uri, require_netloc=True, allow_unescaped=False, safe='%:~/?&;=#')` ¶

Returns true if uri is a valid URI, otherwise false.

Parameters:

Name	Type	Description	Default
`uri`	`str`	URI to validate.	required
`require_netloc`	`bool`	Whether to require `uri` to contain a network location. Setting this to false, will exclude URNs, which in are valid URIs. However, in most practical cases, you would expect the URI to contain a network location.	`True`
`allow_unescaped`	`bool`	Whether to allow `uri` to contain unescaped special characters. Any character not in `safe` except for letters, digits and '_.-' are considered to be special. Escaping is expected to use standard URI %xx escape codes.	`False`
`safe`	`str`	Characters in addition to '_.-' that doesn't need to be escaped when `allow_unescaped` is false.	`'%:~/?&;=#'`

Source code in tripper/utils.py

def is_uri(
    uri: str,
    require_netloc: bool = True,
    allow_unescaped: bool = False,
    safe: str = "%:~/?&;=#",
):
    """Returns true if `uri` is a valid URI, otherwise false.

    Arguments:
        uri: URI to validate.
        require_netloc: Whether to require `uri` to contain a network location.
            Setting this to false, will exclude URNs, which in are valid URIs.
            However, in most practical cases, you would expect the URI to
            contain a network location.
        allow_unescaped: Whether to allow `uri` to contain unescaped special
            characters. Any character not in `safe` except for letters, digits
            and '_.-' are considered to be special.
            Escaping is expected to use standard URI %xx escape codes.
        safe: Characters in addition to '_.-' that doesn't need to be
            escaped when `allow_unescaped` is false.
    """
    if not allow_unescaped and urllib.parse.quote(uri, safe=safe) != uri:
        return False
    try:
        result = urllib.parse.urlparse(uri)
    except ValueError:
        return False
    return bool(result.scheme) and (not require_netloc or bool(result.netloc))

`openfile(url, mode='rt', timeout=3, **kwargs)` ¶

Like open(), but allows opening remote files using HTTP GET requests.

Should always be used in a with-statement.

Parameters:

Name	Type	Description	Default
`url`	`Union[str, Path, IO]`	File path, URL or stream to open.	required
`mode`	`str`	See `mode` argument of open().	`'rt'`
`timeout`	`float`	Timeout for accessing the file in seconds.	`3`
`kwargs`		Additional passed to open().	`{}`

Yields:

Type	Description
`Iterator[IO]`	A stream object returned by open().

Source code in tripper/utils.py

@contextmanager
def openfile(
    url: "Union[str, Path, IO]", mode: str = "rt", timeout: float = 3, **kwargs
) -> "Iterator[IO]":
    """Like open(), but allows opening remote files using HTTP GET requests.

    Should always be used in a with-statement.

    Arguments:
        url: File path, URL or stream to open.
        mode: See `mode` argument of open().
        timeout: Timeout for accessing the file in seconds.
        kwargs: Additional passed to open().

    Yields:
        A stream object returned by open().

    """
    if isinstance(url, (IO, io.IOBase)):
        yield url
        return

    url = str(url)
    u = url.lower()
    tmpfile = False
    f = None

    if u.startswith("file:"):
        fname = url[7:] if u.startswith("file://") else url[5:]

    elif u.startswith("http://") or u.startswith("https://"):
        import requests  # pylint: disable=import-outside-toplevel

        tmpfile = True
        r = requests.get(url, timeout=timeout)
        r.raise_for_status()
        with tempfile.NamedTemporaryFile(delete=False) as f:
            fname = f.name
            f.write(r.content)

    elif re.match(r"[a-zA-Z][a-zA-Z0-9+.-]*://", url):
        raise IOError(f"unknown scheme: {url.split(':', 1)[0]}")

    else:
        fname = url

    try:
        # pylint: disable=unspecified-encoding
        f = open(fname, mode=mode, **kwargs)  # type: ignore
        yield f  # type:ignore
    finally:
        if f is not None:
            f.close()
        if tmpfile:
            Path(fname).unlink()

`parse_literal(literal)` ¶

Parse Python object literal and return it as an instance of Literal.

The main difference between this function and the Literal constructor, is that this function correctly interprets n3-encoded literal strings.

Source code in tripper/utils.py

def parse_literal(literal: "Any") -> "Any":
    """Parse Python object `literal` and return it as an instance of Literal.

    The main difference between this function and the Literal constructor,
    is that this function correctly interprets n3-encoded literal strings.
    """
    # pylint: disable=invalid-name,too-many-branches,too-many-statements
    # pylint: disable=too-many-return-statements
    lang, datatype = None, None

    if isinstance(literal, Literal):
        return literal

    if hasattr(literal, "lang"):
        lang = literal.lang
    elif hasattr(literal, "language"):
        lang = literal.language

    if (
        not lang
        and hasattr(literal, "datatype")
        and literal.datatype is not None
    ):
        datatype = str(literal.datatype)

    # This should handle rdflib literals correctly (and probably most other
    # literal representations as well)
    if hasattr(literal, "value"):
        # Note that in rdflib 6.3, the `value` attribute may be None for some
        # datatypes (like rdf:JSON) even though a non-empty value exists.
        # As a workaround, we use the string representation if the value
        # attribute is None.
        if literal.value is not None:
            return Literal(literal.value, lang=lang, datatype=datatype)
        return Literal(str(literal), lang=lang, datatype=datatype)

    if not isinstance(literal, str):
        if isinstance(literal, tuple(Literal.datatypes)):
            if type(literal) in Literal.datatypes:
                datatype = Literal.datatypes[type(literal)][0]
            else:
                # literal is in instance of a subclass of one of the datatypes
                for k, v in Literal.datatypes.items():
                    if isinstance(literal, k):
                        datatype = v[0]
                        break
                else:
                    assert False, "should never be reached"  # nosec
            return Literal(literal, lang=lang, datatype=datatype)
        raise TypeError(f"unsupported literal type: {type(literal)}")

    if hasattr(literal, "n3") and callable(literal.n3):
        return parse_literal(literal.n3())

    match = re.match(r'^\s*("""(.*)"""|"(.*)")\s*$', literal, flags=re.DOTALL)
    if match:
        _, v1, v2 = match.groups()
        value, datatype = v1 if v1 else v2, XSD.string
    else:
        match = re.match(
            r'^\s*("""(.*)"""|"(.*)")\^\^(<([^>]+)>|([^<].*))\s*$',
            literal,
            flags=re.DOTALL,
        )
        if match:
            _, v1, v2, _, d1, d2 = match.groups()
            value = v1 if v1 else v2
            datatype = d1 if d1 else d2
        else:
            match = re.match(
                r'^\s*("""(.*)"""|"(.*)")@(.*)\s*$', literal, flags=re.DOTALL
            )
            if match:
                _, v1, v2, lang = match.groups()
                value = v1 if v1 else v2
            else:
                value = literal

    if lang or datatype:
        if datatype:
            types = {}
            for pytype, datatypes in Literal.datatypes.items():
                types.update({t: pytype for t in datatypes})
            type_ = types.get(datatype, str)
            if type_ is bool and value in ("False", "false", "0", 0, False):
                return Literal(False)
            try:
                value = type_(value)
            except TypeError:
                pass
        return Literal(value, lang=lang, datatype=datatype)

    for type_, datatypes in Literal.datatypes.items():
        if type_ is not bool:
            try:
                return Literal(
                    type_(literal), lang=lang, datatype=datatypes[0]
                )
            except (ValueError, TypeError):
                pass

    raise ValueError(f'cannot parse literal "{literal}"')

`parse_object(obj)` ¶

Applies heuristics to parse RDF object obj to an IRI or literal.

The following heuristics is performed (in the given order): - If obj is a Literal, it is returned. - If obj is a string and - starts with "_:", it is assumed to be a blank node and returned. - starts with a scheme, it is asumed to be an IRI and returned. - can be converted to a float, int or datetime, it is returned converted to a literal of the corresponding type. - it is a valid n3 representation, return it as the given type. - otherwise, return it as a xsd:string literal. - Otherwise, raise an ValueError.

Returns A string if obj is considered to be an IRI, otherwise a literal.

Source code in tripper/utils.py

def parse_object(obj: "Union[str, Literal]") -> "Union[str, Any]":
    """Applies heuristics to parse RDF object `obj` to an IRI or literal.

    The following heuristics is performed (in the given order):
    - If `obj` is a Literal, it is returned.
    - If `obj` is a string and
      - starts with "_:", it is assumed to be a blank node and returned.
      - starts with a scheme, it is asumed to be an IRI and returned.
      - can be converted to a float, int or datetime, it is returned
        converted to a literal of the corresponding type.
      - it is a valid n3 representation, return it as the given type.
      - otherwise, return it as a xsd:string literal.
    - Otherwise, raise an ValueError.

    Returns
        A string if `obj` is considered to be an IRI, otherwise a
        literal.
    """
    # pylint: disable=too-many-return-statements
    if isinstance(obj, Literal):
        return obj
    if isinstance(obj, str):
        if obj.startswith("_:") or re.match(r"^[a-z]+://", obj):  # IRI
            return obj
        if obj in ("true", "false"):  # boolean
            return Literal(obj, datatype=XSD.boolean)
        if re.match(r"^\s*[+-]?\d+\s*$", obj):  # integer
            return Literal(obj, datatype=XSD.integer)
        if check_function(float, obj, ValueError):  #  float
            return Literal(obj, datatype=XSD.double)
        if check_function(
            datetime.datetime.fromisoformat, obj, ValueError
        ):  #  datetime
            return Literal(obj, datatype=XSD.dateTime)
        return parse_literal(obj)
    raise ValueError("`obj` should be a literal or a string.")

`prefix_iri(iri, prefixes, strict=False)` ¶

Return prefixed IRI.

This is the reverse of expand_iri().

If strict is true, a NamespaceError exception is raised if no prefix can be found.

Source code in tripper/utils.py

def prefix_iri(iri: str, prefixes: dict, strict: bool = False) -> str:
    """Return prefixed IRI.

    This is the reverse of expand_iri().

    If `strict` is true, a NamespaceError exception is raised
    if no prefix can be found.

    """
    if not re.match(MATCH_PREFIXED_IRI, iri):
        for prefix, ns in prefixes.items():
            if iri.startswith(str(ns)):
                return f"{prefix}:{iri[len(str(ns)):]}"
        if strict:
            raise NamespaceError(f"No prefix defined for IRI: {iri}")
    return iri

`random_string(length=8)` ¶

Return a random string of the given length.

Source code in tripper/utils.py

def random_string(length=8):
    """Return a random string of the given length."""
    letters = string.ascii_letters + string.digits
    return "".join(random.choice(letters) for i in range(length))  # nosec

`recursive_update(d, other, append=True, cls=None)` ¶

Recursively update dict d with dict other.

Parameters:

Name	Type	Description	Default
`d`	`dict`	Dict to update.	required
`other`	`Union[dict, List[Union[dict, list]]]`	The source to update `d` from.	required
`append`	`bool`	If `append` is true and `other` has a key that also exists in `d`, then the value in `d` will be converted to a list with the value from `other` appended to it. If `append` is false, the values in `d` will be replaced by corresponding values in `other`.	`True`
`cls`	`Optional[type]`	Dict subclass for new sub-dicts in `d`. Defaults to the class of `d`.	`None`

Examples:

>>> d = {"a": 1}
>>> recursive_update(d, {"a": 2})
>>> d
{'a': [1, 2]}

>>> d = {"a": 1}
>>> recursive_update(d, {"a": 2}, append=False)
>>> d
{'a': 2}

Source code in tripper/utils.py

def recursive_update(
    d: dict,
    other: "Union[dict, List[Union[dict, list]]]",
    append: bool = True,
    cls: "Optional[type]" = None,
):
    """Recursively update dict `d` with dict `other`.

    Arguments:
        d: Dict to update.
        other: The source to update `d` from.
        append: If `append` is true and `other` has a key that also exists
            in `d`, then the value in `d` will be converted to a list with
            the value from `other` appended to it.
            If `append` is false, the values in `d` will be replaced by
            corresponding values in `other`.
        cls: Dict subclass for new sub-dicts in `d`. Defaults to the class
            of `d`.

    Example:

        >>> d = {"a": 1}
        >>> recursive_update(d, {"a": 2})
        >>> d
        {'a': [1, 2]}

        >>> d = {"a": 1}
        >>> recursive_update(d, {"a": 2}, append=False)
        >>> d
        {'a': 2}

    """
    # pylint: disable=too-many-branches
    if cls is None:
        cls = d.__class__

    new = _rec(d, other, append=append, cls=cls)
    d.update(new)

`split_iri(iri)` ¶

Split iri into namespace and name parts and return them as a tuple.

Parameters:

Name	Type	Description	Default
`iri`	`str`	The IRI to be split.	required

Returns:

Type	Description
`Tuple[str, str]`	A split IRI. Split into namespace and name.

Source code in tripper/utils.py

def split_iri(iri: str) -> "Tuple[str, str]":
    """Split iri into namespace and name parts and return them as a tuple.

    Parameters:
        iri: The IRI to be split.

    Returns:
        A split IRI. Split into namespace and name.

    """
    if "#" in iri:
        namespace, name = iri.rsplit("#", 1)
        return f"{namespace}#", name

    if "/" in iri:
        namespace, name = iri.rsplit("/", 1)
        return f"{namespace}/", name

    raise ValueError("all IRIs should contain a slash")

`substitute_query(query, iris=None, literals=None, prefixes=None, iriquote='<>')` ¶

Substitute IRI and literal variables in a SPARQL query.

Parameters:

Name	Type	Description	Default
`query`	`str`	String with the SPARQL query.	required
`iris`	`Optional[dict]`	Dict used for query substitutions that maps IRI variables to IRIs. The IRIs may be provided as fully expanded or prefixed with the prefix defined in `prefixes`.	`None`
`literals`	`Optional[dict]`	Dict used for query substitutions that maps literal variables to literals. For common datatypes, like strings and numbers, the values can just be normal Python objects. For special cases or more control, provide the values as instances of `tripper.Literal`.	`None`
`prefixes`	`Optional[dict]`	Dict mapping prefixes to namespace URLs.	`None`
`iriquote`	`str`	Quote characters to use for IRIs. Should be a string of length 2, with the start and end quote.	`'<>'`

Notes

The query argument may contain variables for IRIs and literals, to be substituted using the iris and literals arguments. These variables are prefixed $. This makes them easy to distinguish from query variables, that are typically prefixed with ?.

The query substitutions may be useful when the query is constructed from user input, since they are properly escaped and will be inserted in the query as a single token. This may prevent sparql injection attacks.

Source code in tripper/utils.py

def substitute_query(
    query: str,
    iris: "Optional[dict]" = None,
    literals: "Optional[dict]" = None,
    prefixes: "Optional[dict]" = None,
    iriquote: str = "<>",
) -> str:
    """Substitute IRI and literal variables in a SPARQL query.

    Arguments:
        query: String with the SPARQL query.
        iris: Dict used for query substitutions that maps IRI variables
            to IRIs. The IRIs may be provided as fully expanded or
            prefixed with the prefix defined in `prefixes`.
        literals: Dict used for query substitutions that maps literal
            variables to literals.  For common datatypes, like strings
            and numbers, the values can just be normal Python objects.
            For special cases or more control, provide the values as
            instances of `tripper.Literal`.
        prefixes: Dict mapping prefixes to namespace URLs.
        iriquote: Quote characters to use for IRIs. Should be a string of
            length 2, with the start and end quote.

    Notes:
        The `query` argument may contain variables for IRIs and literals,
        to be substituted using the `iris` and `literals` arguments. These
        variables are prefixed `$`. This makes them easy to distinguish from
        query variables, that are typically prefixed with `?`.

        The query substitutions may be useful when the query is constructed
        from user input, since they are properly escaped and will be inserted
        in the query as a single token.  This may prevent sparql injection
        attacks.
    """
    safe = "-._~:/?#@+&;="  # special IRI characters that are not escaped
    mapping = {}

    if iriquote:
        if len(iriquote) == 1:
            iriquote = iriquote[0] * 2
        elif len(iriquote) > 2:
            raise ValueError(
                f"`iriquote` cannot be more than 2 characters: '{iriquote}'"
            )
        if iriquote[1].isalnum() or iriquote[1] in safe:
            warnings.warn(
                f"End quote '{iriquote[1]}' is alphanumeric or in '{safe}'"
            )

    if iris:
        if prefixes is None:
            prefixes = {}
        for k, v in iris.items():
            expanded = expand_iri(v, prefixes=prefixes)
            quoted = urllib.parse.quote(expanded, safe=safe)
            q1, q2 = iriquote if iriquote else ("", "")  # type: ignore[misc]
            mapping[k] = f"{q1}{quoted}{q2}"

    if literals:
        for k, v in literals.items():
            mapping[k] = Literal(v).n3()

    return string.Template(query).safe_substitute(mapping)

`tfilter(triples, subject=None, predicate=None, object=None)` ¶

Filters out non-matching triples.

Parameters:

Name	Type	Description	Default
`triples`	`Iterable[Triple]`	Triples to filter from.	required
`subject`	`Optional[Union[Iterable[str], str]]`	If given, only keep triples whos subject matches `subject`. Can be an iterable of subjects.	`None`
`predicate`	`Optional[Union[Iterable[str], str]]`	If given, only keep triples whos predicate matches `predicate`. Can be an iterable of subjects.	`None`
`object`	`Optional[Union[Iterable, str, Literal]]`	If given, only keep triples whos subject matches `object`. Can be an iterable of objects.	`None`

Returns:

Type	Description
`Generator[Triple, None, None]`	A generator over matching triples.

Source code in tripper/utils.py

def tfilter(
    triples: "Iterable[Triple]",
    subject: "Optional[Union[Iterable[str], str]]" = None,
    predicate: "Optional[Union[Iterable[str], str]]" = None,
    object: "Optional[Union[Iterable, str, Literal]]" = None,
) -> "Generator[Triple, None, None]":
    """Filters out non-matching triples.

    Parameters:
        triples: Triples to filter from.
        subject: If given, only keep triples whos subject matches `subject`.
            Can be an iterable of subjects.
        predicate: If given, only keep triples whos predicate matches
            `predicate`.  Can be an iterable of subjects.
        object: If given, only keep triples whos subject matches `object`.
            Can be an iterable of objects.

    Returns:
        A generator over matching triples.
    """
    for s, p, o in triples:
        if subject and (
            s != subject if isinstance(subject, str) else s not in subject
        ):
            continue
        if predicate and (
            p != predicate
            if isinstance(predicate, str)
            else p not in predicate
        ):
            continue
        if object and (
            o != object
            if isinstance(object, (str, Literal))
            else o not in object
        ):
            continue
        yield s, p, o

utils¶

AttrDict (dict) ¶

copy(self) ¶

as_python(value) ¶

bnode_iri(prefix='', source='', length=5) ¶

check_function(func, s, exceptions) ¶

check_service_availability(url, timeout=5, interval=1) ¶

en(value) ¶

expand_iri(iri, prefixes, strict=False) ¶

extend_namespace(namespace, triplestore, format=None) ¶

function_id(func, length=4) ¶

get_entry_points(group) ¶

infer_iri(obj) ¶

is_curie(curie, exclude_netloc=True) ¶

is_uri(uri, require_netloc=True, allow_unescaped=False, safe='%:~/?&;=#') ¶

openfile(url, mode='rt', timeout=3, **kwargs) ¶

parse_literal(literal) ¶

parse_object(obj) ¶

prefix_iri(iri, prefixes, strict=False) ¶

random_string(length=8) ¶

recursive_update(d, other, append=True, cls=None) ¶

split_iri(iri) ¶

substitute_query(query, iris=None, literals=None, prefixes=None, iriquote='<>') ¶

tfilter(triples, subject=None, predicate=None, object=None) ¶

`AttrDict (dict)` ¶

`copy(self)` ¶

`as_python(value)` ¶

`bnode_iri(prefix='', source='', length=5)` ¶

`check_function(func, s, exceptions)` ¶

`check_service_availability(url, timeout=5, interval=1)` ¶

`en(value)` ¶

`expand_iri(iri, prefixes, strict=False)` ¶

`extend_namespace(namespace, triplestore, format=None)` ¶

`function_id(func, length=4)` ¶

`get_entry_points(group)` ¶

`infer_iri(obj)` ¶

`is_curie(curie, exclude_netloc=True)` ¶

`is_uri(uri, require_netloc=True, allow_unescaped=False, safe='%:~/?&;=#')` ¶

`openfile(url, mode='rt', timeout=3, **kwargs)` ¶

`parse_literal(literal)` ¶

`parse_object(obj)` ¶

`prefix_iri(iri, prefixes, strict=False)` ¶

`random_string(length=8)` ¶

`recursive_update(d, other, append=True, cls=None)` ¶

`split_iri(iri)` ¶

`substitute_query(query, iris=None, literals=None, prefixes=None, iriquote='<>')` ¶

`tfilter(triples, subject=None, predicate=None, object=None)` ¶