Skip to content

keywords

Parse keywords definition and generate context.

Keywords

A class representing all keywords within a theme.

Source code in tripper/datadoc/keywords.py
class Keywords:
    """A class representing all keywords within a theme."""

    # pylint: disable=too-many-public-methods

    rootdir = Path(__file__).absolute().parent.parent.parent.resolve()

    def __init__(
        self,
        theme: "Optional[Union[str, Sequence[str]]]" = "ddoc:datadoc",
        yamlfile: "Optional[FileLoc]" = None,
        timeout: float = 3,
    ) -> None:
        """Initialises keywords object.

        Arguments:
            theme: IRI of one of more themes to load keywords for.
            yamlfile: A YAML file with keyword definitions to parse.  May also
                be an URI in which case it will be accessed via HTTP GET.
                Deprecated. Use the `add_yaml()` or `add()` methods instead.
            timeout: Timeout in case `yamlfile` is a URI.

        Attributes:
            data: The dict loaded from the keyword yamlfile.
            keywords: A dict mapping keywords (name/prefixed/iri) to dicts
                describing the keywords.
            theme: IRI of a theme or scientic domain that the keywords
                belong to.
        """
        default_prefixes = AttrDict(ddoc=str(DDOC))
        self.theme = None  # theme for this object
        self.data = AttrDict(prefixes=default_prefixes, resources=AttrDict())

        # A "view" into `self.data`. A dict mapping short, prefixed
        # and expanded keyword names to corresponding value dicts in
        # self.data.
        self.keywords = AttrDict()

        # Themes and files that has been parsed
        self.parsed: "set" = set()

        if theme:
            self.add_theme(theme)

        if yamlfile:
            warnings.warn(
                "The `yamlfile` argument is deprecated. Use the `add_yaml()` "
                "or `add()` methods instead.",
                DeprecationWarning,
            )
            if isinstance(yamlfile, (str, Path)):
                self.load_yaml(yamlfile, timeout=timeout)
            else:
                for path in yamlfile:
                    self.load_yaml(path, timeout=timeout)

    def __contains__(self, item):
        return item in self.keywords

    def __getitem__(self, key):
        return self.keywords[key]

    def __iter__(self):
        return iter(k for k in self.keywords if is_curie(k))

    def __len__(self):
        return len(list(self.__iter__()))

    def __dir__(self):
        return dir(Keywords) + ["data", "keywords", "theme"]

    def __eq__(self, other):
        return self.data == other.data and self.theme == other.theme

    def _set_keyword(self, keywords, keyword, value, redefine=False):
        """Add new keyword-value pair to `keywords` dict."""
        # value = AttrDict(value)
        expanded = expand_iri(value.iri, self.get_prefixes())
        prefixed = prefix_iri(expanded, self.get_prefixes())
        if redefine or keyword not in keywords:
            keywords[keyword] = value
        if redefine or prefixed not in keywords:
            keywords[prefixed] = value
        if redefine or expanded not in keywords:
            keywords[expanded] = value

    def _set_keywords(self, clear=True, redefine=False):
        """Update internal keywords attribute to data attribute.

        Arguments:
            clear: If false, only new keywords will be added, but nothing
                removed.
            redefine: Wheter to redefine existing keyword.
        """
        if clear:
            self.keywords.clear()
        for clsvalue in self.data.get("resources", AttrDict()).values():
            for keyword, value in clsvalue.get("keywords", AttrDict()).items():
                self._set_keyword(
                    self.keywords, keyword, value, redefine=redefine
                )

    def copy(self):
        """Returns a copy of self."""
        new = Keywords(theme=None)
        new.theme = self.theme
        new.data = deepcopy(self.data)
        new.keywords = deepcopy(self.keywords)
        new.parsed = self.parsed.copy()
        return new

    def add(
        self,
        keywords: "Optional[KeywordsType]",
        format: "Optional[Union[str, Sequence]]" = None,
        timeout: float = 3,
        strict: bool = False,
        redefine: str = "raise",
    ) -> None:
        """Add `keywords` to this Keywords object.

        Arguments:
            keywords: Keywords definitions to add to this Keyword object.
                May be another Keyword object, path to a file, theme or a
                sequence of these.
            format: Format if `keywords`. Recognised formats include:
                yaml, csv, tsv, turtle, xml, json-ld, rdfa, ...
            timeout: Timeout when accessing remote files.
            strict: Whether to raise an `InvalidKeywordError` exception if `d`
                contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "skip": Don't redefine existing keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "raise": Raise an RedefineError (default).

        """
        if not isinstance(keywords, str) and isinstance(keywords, Sequence):
            if isinstance(format, str):
                format = [format] * len(keywords)
            elif format and len(format) != len(keywords):
                raise TypeError(
                    "If given, `format` must have the same length as "
                    "`keywords`"
                )

        def _add(kw, fmt):
            if kw is None:
                pass
            elif isinstance(kw, Keywords):
                self.theme = merge(self.theme, kw.theme)
                recursive_update(self.data, kw.data, cls=AttrDict)
                self._set_keywords(clear=False)
            elif isinstance(kw, dict):
                self._load_yaml(kw, strict=strict, redefine=redefine)
            elif not isinstance(kw, str) and isinstance(kw, Sequence):
                for i, e in enumerate(kw):
                    _add(e, fmt[i] if fmt else None)
            elif isinstance(kw, (str, Path, IOBase)):
                if (
                    isinstance(kw, str)
                    and ":" in kw
                    and not (
                        kw.startswith("/") or kw.startswith("./") or is_uri(kw)
                    )
                ):
                    self.add_theme(
                        kw,
                        timeout=timeout,
                        strict=strict,
                        redefine=redefine,
                    )
                else:
                    if not fmt:
                        name = kw.name if hasattr(kw, "name") else kw
                        fmt = Path(name).suffix
                    fmt = fmt.lstrip(".").lower()
                    # pylint:disable=consider-using-get
                    if fmt in RDFLIB_SUFFIX_FORMAT_MAP:
                        fmt = RDFLIB_SUFFIX_FORMAT_MAP[fmt]

                    if fmt in ("yaml", "yml"):
                        self.load_yaml(
                            kw,
                            timeout=timeout,
                            strict=strict,
                            redefine=redefine,
                        )
                    elif fmt in ("csv", "tsv", "xlsx", "excel"):
                        self.load_table(kw, format=fmt)
                    else:
                        self.load_rdffile(
                            kw,
                            format=fmt,
                            timeout=timeout,
                            strict=strict,
                            redefine=redefine,
                        )
            else:
                raise TypeError(
                    "`keywords` must be a KeywordsType object (Keywords "
                    "instance, dict, IO, Path, string or sequence). "
                    f"Got: {type(kw)}"
                )

        _add(keywords, format)

    def add_theme(
        self,
        theme: "Union[str, Sequence[str]]",
        timeout: float = 3,
        strict: bool = False,
        redefine: str = "raise",
    ) -> None:
        """Add keywords for `theme`, where `theme` is the IRI of a
        theme or scientific domain or a list of such IRIs.

        Arguments:
            theme: IRI (or list of IRIs) of a theme/scientific domain to load.
            timeout: Timeout when accessing remote files.
            strict: Whether to raise an `InvalidKeywordError` exception if the
                theme contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "skip": Don't redefine existing keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "raise": Raise an RedefineError (default).

        """
        if isinstance(theme, str):
            theme = [theme]

        parsedkey = (tuple(theme), strict, redefine)
        if parsedkey in self.parsed:
            return

        for name in theme:  # type: ignore
            expanded = expand_iri(name, self.get_prefixes())
            prefixed = prefix_iri(name, self.get_prefixes())
            add(
                self.data,
                "theme",
                prefixed,
            )
            for ep in get_entry_points("tripper.keywords"):
                if expand_iri(ep.value, self.get_prefixes()) == expanded:
                    package_name, path = ep.name.split("/", 1)
                    package = import_module(package_name)
                    fullpath = (
                        Path(package.__file__).parent / path  # type: ignore
                    )
                    self.add(
                        fullpath,
                        timeout=timeout,
                        strict=strict,
                        redefine=redefine,
                    )
                    break
            else:
                # Fallback in case the entry point is not installed
                if expanded == DDOC.datadoc:
                    self.load_yaml(
                        self.rootdir
                        / "tripper"
                        / "context"
                        / "0.3"
                        / "keywords.yaml",
                        timeout=timeout,
                        strict=strict,
                        redefine=redefine,
                    )
                else:
                    raise TypeError(f"Unknown theme: {name}")

        self.parsed.add(parsedkey)

    def load_yaml(
        self,
        yamlfile: "Union[Path, str]",
        timeout: float = 3,
        strict: bool = True,
        redefine: str = "raise",
    ) -> None:
        """Load YAML file with keyword definitions.

        Arguments:
            yamlfile: Path of URL to a YAML file to load.
            timeout: Timeout when accessing remote files.
            strict: Whether to raise an `InvalidKeywordError` exception if `d`
                contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "skip": Don't redefine existing keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "raise": Raise an RedefineError (default).

        """
        parsedkey = (yamlfile, strict, redefine)
        if parsedkey in self.parsed:
            return

        with openfile(yamlfile, timeout=timeout, mode="rt") as f:
            d = yaml.safe_load(f)
        try:
            self._load_yaml(d, strict=strict, redefine=redefine)
        except Exception as exc:
            raise ParseError(f"error parsing '{yamlfile}'") from exc

        self.parsed.add(parsedkey)

    def _load_yaml(
        self,
        d: dict,
        strict: bool = True,
        redefine: str = "raise",
    ) -> None:
        """Parse a dict with keyword definitions following the format of
        the YAML file.

        Arguments:
            d: Dict defining a keyword following the YAML file format.
            strict: Whether to raise an `InvalidKeywordError` exception if `d`
                contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "skip": Don't redefine existing keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "raise": Raise an RedefineError (default).

        """
        # pylint: disable=too-many-nested-blocks,too-many-statements
        # pylint: disable=too-many-locals
        self.add(d.get("basedOn"))

        required_resource_keys = {"iri"}
        valid_resource_keys = {
            "iri",
            "subClassOf",
            "description",
            "usageNote",
            "keywords",
        }
        required_keywords = {"iri"}
        valid_keywords = {
            "name",
            "iri",
            "type",
            "subPropertyOf",
            "inverseOf",  # XXX - to be implemented
            "domain",
            "range",
            "datatype",
            "inverse",  # XXX - to be implemented (remove?)
            "unit",  # XXX - to be implemented
            "conformance",
            "description",
            "usageNote",
            "theme",
            "default",
        }
        iri_keywords = {
            "iri",
            "type",
            "subPropertyOf",
            "inverseOf",
            "domain",
            "range",
            "datatype",
        }
        valid_conformances = ["mandatory", "recommended", "optional"]

        def to_prefixed(x):
            """Help function that converts an IRI or list of IRIs to
            prefixed IRIs."""
            if isinstance(x, str):
                return self.prefixed(x, strict=False)
            return [to_prefixed(e) for e in x]

        # Create a deep copies that we are updating
        prefixes = deepcopy(self.data.prefixes)
        resources = deepcopy(self.data.resources)
        keywords = deepcopy(self.keywords)

        # Prefixes
        for prefix, ns in d.get("prefixes", AttrDict()).items():
            if prefix in prefixes and ns != prefixes[prefix]:
                raise PrefixMismatchError(
                    f"prefix '{prefix}' is already mapped to "
                    f"'{prefixes[prefix]}'. Cannot redefine it to '{ns}'"
                )
            prefixes[prefix] = ns

        # Map keywords IRIs to keyword definitions
        iridefs = {}
        for defs in d.get("resources", {}).values():
            for kw, val in defs.get("keywords", {}).items():
                # Check that value has all the required keywords
                for k in required_keywords:
                    if k not in val:
                        raise MissingKeyError(f"no '{k}' in keyword '{kw}'")
                key = prefix_iri(val["iri"], prefixes)
                if len(val) > 1 or key not in iridefs:
                    iridefs[key] = val

        # Resources
        for cls, defs in d.get("resources", AttrDict()).items():
            resval = resources.get(cls, AttrDict())

            defs = AttrDict(defs).copy()
            for key in required_resource_keys:
                if key not in defs:
                    raise MissingKeyError(
                        f"missing required key '{key}' for resource '{cls}'"
                    )
            for key in defs:
                if strict and key not in valid_resource_keys:
                    raise InvalidDatadocError(f"invalid resource key: '{key}'")
            # TODO: Check for redefinition of existing class

            resval.iri = prefix_iri(defs.iri, prefixes)
            if "subClassOf" in defs:
                resval.subClassOf = to_prefixed(defs.subClassOf)
            if "description" in defs:
                resval.description = defs.description
            if "usageNote" in defs:
                resval.usageNote = defs.usageNote
            resval.setdefault("keywords", AttrDict())

            for keyword, value in defs.get("keywords", AttrDict()).items():

                # If a value only contain an IRI, replace it with a more
                # elaborate definition (if it exists)
                if len(value) == 1:
                    value = AttrDict(iridefs[value["iri"]])

                # Check conformance values
                if "conformance" in value:
                    c = value["conformance"]
                    if c not in valid_conformances:
                        raise DatadocValueError(f"invalid conformance: {c}")

                # If strict, check that all keys are known
                if strict:
                    for k in value.keys():
                        if k not in valid_keywords:
                            raise InvalidKeywordError(
                                f"keyword '{keyword}' has invalid key: {k}"
                            )

                # Normalise IRIs in values to prefixed IRIs
                value = AttrDict(value).copy()
                for k in iri_keywords:
                    if k in value:
                        value[k] = to_prefixed(value[k])

                # Add extra annotations to value
                if "name" not in value or ":" in value.name:
                    value.name = keyword
                if "theme" in d:
                    add(value, "theme", d["theme"])
                add(value, "domain", prefix_iri(defs.iri, prefixes))

                # Check whether we try to redefine an existing keyword
                skip = False
                if keyword in keywords:
                    for k, v in value.items():
                        oldval = keywords[keyword].get(k)
                        if k in ("iri", "domain") or v == oldval:
                            continue
                        oldiri = keywords[keyword].iri
                        if value.iri == oldiri:
                            raise RedefineError(
                                "Cannot redefine existing concept "
                                f"'{value.iri}'. Trying to change "
                                f"property '{k}' from '{oldval}' to "
                                f"'{v}'."
                            )
                        if redefine == "raise":
                            raise RedefineError(
                                f"Trying to redefine keyword "
                                f"'{keyword}' from '{oldiri}' "
                                f"to '{value.iri}'."
                            )
                        if redefine == "skip":
                            skip = True
                            warnings.warn(
                                f"Skip redefinition of keyword: {keyword}",
                                SkipRedefineKeywordWarning,
                            )
                        elif redefine == "allow":
                            warnings.warn(
                                f"Redefining keyword '{keyword}' from "
                                f"'{oldiri}' to '{value.iri}'.",
                                RedefineKeywordWarning,
                            )
                        else:
                            raise ValueError(
                                "Invalid value of `redefine` "
                                f'argument: "{redefine}".  Should be '
                                'one of "allow", "keep" or "raise".'
                            )
                        break
                if skip:
                    continue

                kw = resval.keywords
                if keyword in kw:
                    kw[keyword].update(value)
                else:
                    kw[keyword] = value

                self._set_keyword(keywords, keyword, value, redefine=True)

            if cls in resources:
                resources[cls].update(resval)
            else:
                resources[cls] = resval

        # Everything succeeded, update instance
        self.data.prefixes.update(prefixes)
        self.data.resources.update(resources)
        self.keywords.update(keywords)

        # Run an extra round and add keywords we have missed.
        self._set_keywords(clear=False, redefine=False)

    def save_yaml(
        self,
        yamlfile: "Union[Path, str]",
        keywords: "Optional[Sequence[str]]" = None,
        classes: "Optional[Union[str, Sequence[str]]]" = None,
        themes: "Optional[Union[str, Sequence[str]]]" = None,
    ) -> None:
        """Save YAML file with keyword definitions.

        Arguments:
            yamlfile: File to save keyword definitions to.
            keywords: Sequence of keywords to include.
            classes: Include keywords that have these classes in their domain.
            themes: Include keywords for these themes.

        """
        keywords, classes, themes = self._keywords_list(
            keywords, classes, themes
        )
        resources = {}
        for cls, clsval in self.data.resources.items():
            if self.prefixed(cls) in classes:
                resources[cls] = dict(clsval.copy())
                resources[cls]["keywords"] = {}
                for k, v in self.data.resources[cls].keywords.items():
                    if self.prefixed(k) in keywords:
                        resources[cls]["keywords"][k] = dict(v)
        data = dict(self.data.copy())
        del data["resources"]
        recursive_update(data, {}, cls=dict)
        data["resources"] = resources

        with open(yamlfile, "wt", encoding="utf-8") as f:
            yaml.safe_dump(data, f, sort_keys=False)

    def load_table(
        self,
        filename: "FileLoc",
        format: "Optional[str]" = None,  # pylint: disable=unused-argument
        prefixes: "Optional[dict]" = None,
        theme: "Optional[str]" = None,
        basedOn: "Optional[Union[str, List[str]]]" = None,
        **kwargs,
    ) -> None:
        """Load keywords from a csv file.

        Arguments:
            filename: File to load.
            format: File format. Unused.  Only csv is currently supported.
            prefixes: Dict with additional prefixes used in the table.
            theme: Theme defined by the table.
            basedOn: Theme(s) that the table is based on.
            kwargs: Keyword arguments passed on to TableDoc.parse_csv().
        """
        # pylint: disable=import-outside-toplevel
        from tripper.datadoc.tabledoc import TableDoc

        td = TableDoc.parse_csv(
            filename, type=None, prefixes=prefixes, **kwargs
        )
        dicts = td.asdicts()
        self.fromdicts(dicts, prefixes=prefixes, theme=theme, basedOn=basedOn)

    def save_table(
        self,
        filename: "FileLoc",
        format: "Optional[str]" = None,  # pylint: disable=unused-argument
        names: "Optional[Sequence]" = None,
        strip: bool = True,
        keymode: str = "name",
        **kwargs,
    ) -> None:
        # pylint: disable=line-too-long
        """Load keywords from a csv file.

        Arguments:
            filename: File to load.
            format: File format. Unused.  Only csv is currently supported.
            names: A sequence of keyword or class names to save.  The
                default is to save all keywords.
            strip: Whether to strip leading and trailing whitespaces
                from cells.
            keymode: How to represent column headers.  Should be either
                "name", "prefixed" (CURIE) or "expanded" (full IRI).
            kwargs: Additional keyword arguments passed to the writer.
                For more details, see [write_csv()].

        References:
        [write_csv()]: https://emmc-asbl.github.io/tripper/latest/api_reference/datadoc/tabledoc/#tripper.datadoc.tabledoc.TableDoc.write_csv
        """
        # pylint: disable=import-outside-toplevel
        from tripper.datadoc.tabledoc import TableDoc

        dicts = self.asdicts(names, keymode=keymode)
        td = TableDoc.fromdicts(dicts, type=None, keywords=self, strip=strip)
        td.write_csv(filename, **kwargs)

    def keywordnames(self) -> "list":
        """Return a list with all keyword names defined in this instance."""
        return [k for k in self.keywords.keys() if ":" not in k]

    def classnames(self) -> "list":
        """Return a list with all class names defined in this instance."""
        return list(self.data.resources.keys())

    def asdicts(
        self,
        names: "Optional[Sequence]" = None,
        keymode: str = "prefixed",
    ) -> "List[dict]":
        """Return the content of this Keywords object as a list of JSON-LD
        dicts.

        Arguments:
            names: A sequence of keyword or class names.  The
                default is to return all keywords.
            keymode: How to represent keys.  Should be either "name",
                "prefixed" (CURIE) or "expanded" (full IRI).

        Returns:
            List of JSON-LD dicts corresponding to `names`.
        """
        keymodes = {
            "name": iriname,
            "prefixed": None,
            "expanded": self.expanded,
        }
        maps = {
            "subPropertyOf": "rdfs:subPropertyOf",
            "unit": "ddoc:unitSymbol",
            "description": "dcterms:description",
            "usageNote": "vann:usageNote",
            "theme": "dcat:theme",
        }

        def key(k):
            """Return key `k` accordig to `keymode`."""
            return keymodes[keymode](k) if keymodes[keymode] else k

        conformance_indv = {v: k for k, v in CONFORMANCE_MAPS.items()}
        if names is None:
            names = self.keywordnames()

        classes = []
        dicts = []
        for name in names:
            if name not in self.keywords:
                classes.append(name)
                continue
            d = self.keywords[name]
            if "range" in d and self.expanded(d.range) != RDFS.Literal:
                proptype = "owl:ObjectProperty"
                range = d.range
            elif (
                "datatype" in d and self.expanded(d.datatype) != RDF.langString
            ):
                proptype = "owl:DatatypeProperty"
                range = d.get("datatype")
            else:
                proptype = "owl:AnnotationProperty"
                range = d.get("datatype")

            dct = {
                "@id": d.iri,
                "@type": proptype,
                key("rdfs:label"): d.name,
            }
            if "domain" in d:
                dct[key("rdfs:domain")] = d.domain
            if range:
                dct[key("rdfs:range")] = range
            if "conformance" in d:
                dct[key("ddoc:conformance")] = conformance_indv.get(
                    d.conformance, d.conformance
                )
            for k, v in d.items():
                if k in maps:
                    dct[key(maps[k])] = v
            dicts.append(dct)

        if classes:
            classmaps = {}
            for k, v in self.data.resources.items():
                classmaps[k] = k
                classmaps[self.expanded(k)] = k
                classmaps[self.prefixed(k)] = k

            for name in classes:
                d = self.data.resources[classmaps[name]]
                dct = {"@id": d.iri, "@type": OWL.Class}
                if "subClassOf" in d:
                    dct[key("rdfs:subClassOf")] = d.subClassOf
                if "description" in d:
                    dct[key("dcterms:description")] = d.description
                if "usageNote" in d:
                    dct[key("vann:usageNote")] = d.usageNote
                dicts.append(dct)

        return dicts

    def fromdicts(
        self,
        dicts: "Sequence[dict]",
        prefixes: "Optional[dict]" = None,
        theme: "Optional[str]" = None,
        basedOn: "Optional[Union[str, List[str]]]" = None,
        strict: bool = False,
        redefine: str = "raise",
    ) -> None:
        """Populate this Keywords object from a sequence of dicts.

        Arguments:
            dicts: A sequence of JSON-LD dicts to populate this keywords object
                from.  Their format should follow what is returned by
                tripper.datadoc.acquire().
            prefixes: Dict with additional prefixes used by `dicts`.
            theme: Theme defined by `dicts`.
            basedOn: Theme(s) that `dicts` are based on.
            strict: Whether to raise an `InvalidKeywordError` exception if `d`
                contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword.
                  - "skip": Don't redefine existing keyword.
                  - "raise": Raise an RedefineError (default).

        """
        data = self._fromdicts(
            dicts,
            prefixes=prefixes,
            theme=theme,
            basedOn=basedOn,
        )
        self._load_yaml(data, strict=strict, redefine=redefine)

    def _fromdicts(
        self,
        dicts: "Sequence[dict]",
        prefixes: "Optional[dict]" = None,
        theme: "Optional[str]" = None,
        basedOn: "Optional[Union[str, List[str]]]" = None,
    ) -> dict:
        """Help method for `fromdicts()` that returns a dict with
        keyword definitions following the format of the YAML file.
        """
        # pylint: disable=too-many-locals,too-many-statements

        def to_prefixed(x, prefixes, strict=True):
            """Help function that converts an IRI or list of IRIs to
            prefixed IRIs."""
            if isinstance(x, str):
                return prefix_iri(x, prefixes, strict=strict)
            return [to_prefixed(e, prefixes, strict=strict) for e in x]

        # Prefixes (merged with self.data.prefixes)
        p = self.get_prefixes().copy()
        if prefixes:
            for prefix, ns in prefixes.items():
                if prefix in p and p[prefix] != ns:
                    raise PrefixMismatchError(
                        f"adding prefix `{prefix}: {ns}` but it is already "
                        f"defined to '{p[prefix]}'"
                    )
            p.update({k: str(v) for k, v in prefixes.items()})

        def isproperty(v):
            if "@type" not in v:
                return False
            types = [v["@type"]] if isinstance(v["@type"], str) else v["@type"]
            for t in types:
                exp = expand_iri(t, p, strict=True)
                if exp in (
                    OWL.AnnotationProperty,
                    OWL.ObjectProperty,
                    OWL.DatatypeProperty,
                    RDF.Property,
                ):
                    return True
            return False

        entities = {expand_iri(d["@id"], p): d for d in dicts}
        properties = {k: v for k, v in entities.items() if isproperty(v)}
        classes = {k: v for k, v in entities.items() if k not in properties}

        data = AttrDict()
        if theme:
            data.theme = theme
        if basedOn:
            data.basedOn = basedOn
        data.prefixes = p
        data.resources = AttrDict()
        resources = data.resources

        # Add classes
        clslabels = {}
        for k, v in classes.items():
            d = AttrDict(iri=prefix_iri(k, p))
            for kk, vv in v.items():
                if kk in ("description", "usageNote"):
                    d[kk] = vv
                if kk == "subClassOf":
                    if isinstance(vv, str):
                        d[kk] = to_prefixed(vv, p, strict=True)
            d.setdefault("keywords", AttrDict())
            label = v["label"] if "label" in v else iriname(k)
            resources[label] = d
            clslabels[d.iri] = label

        # Add properties
        for propname, value in properties.items():
            name = iriname(propname)
            label = value["label"] if "label" in value else name
            d = AttrDict(iri=value["@id"])
            if "@type" in value:
                d.type = to_prefixed(value["@type"], p)
            d.domain = value.get("domain", RDFS.Resource)

            for domain in asseq(d.domain):
                dlabel = prefix_iri(domain, p, strict=True)
                domainname = clslabels.get(dlabel, iriname(domain))
                if domainname not in resources:
                    if domainname not in self.data.resources:
                        if domainname not in ("Resource",):
                            logger.info(
                                f"Adding undefined domain '{domain}' for "
                                f"keyword '{label}'"
                            )
                        r = AttrDict(
                            iri=prefix_iri(domain, p),
                            keywords=AttrDict(),
                        )
                    else:
                        r = self.data.resources[domainname].copy()
                    resources[domainname] = r
                    r.keywords[label] = d
                else:
                    resources[domainname].keywords[label] = d
            if "range" in value:
                _types = asseq(d.get("type", OWL.AnnotationProperty))
                types = [expand_iri(t, p) for t in _types]
                if OWL.ObjectProperty in types:
                    d.range = value["range"]
                else:
                    d.range = "rdfs:Literal"
                    if "range" in value:
                        d.datatype = value["range"]
            else:
                d.range = "rdfs:Literal"
                # TODO: Define if we accept missing datatype for literals
            if "conformance" in value:
                d.conformance = CONFORMANCE_MAPS[value["conformance"]]
            if "unitSymbol" in value:
                d.unit = value["unitSymbol"]
            for k, v in value.items():
                if (
                    k not in ("@id", "@type", "domain", "label", "name")
                    and k not in d
                ):
                    d[k] = v

        return data

    def missing_keywords(
        self,
        ts: "Triplestore",
        include_classes: bool = False,
        return_existing: bool = False,
    ) -> "Union[list, Tuple[list, list]]":
        """List keywords not defined in triplestore `ts`.

        Arguments:
            ts: Triplestore object to check.
            include_classes: Also return missing classes.
            return_existing: If true, two lists are returned:
                - list of keywords missing in `ts`
                - list of keywords existing in `ts`

        Returns:
            List with the names of keywords in this instance that are
            not defined in triplestore `ts`.
        """
        expanded = {k for k in self.keywords.keys() if "://" in k}
        if include_classes:
            expanded.update(self.expanded(c) for c in self.classnames())

        if not expanded:
            return []

        query = f"""
        SELECT ?s WHERE {{
          VALUES ?s {{ { ' '.join(f'<{iri}>' for iri in expanded) } }}
          ?s a ?o
        }}
        """
        existing = {r[0] for r in ts.query(query)}
        missing = expanded.difference(existing)
        missing_names = [self.shortname(k) for k in missing]

        if return_existing:
            existing_names = [self.keywords[k].name for k in existing]
            return missing_names, existing_names
        return missing_names

    def _load_rdf(
        self, ts: "Triplestore", iris: "Optional[Sequence[str]]" = None
    ) -> "Sequence[dict]":
        """Help method for load_rdf(). Returns dicts loaded from triplestore
        `ts`.

        If `iris` is not given, all OWL properties in `ts` will be loaded.
        """
        # pylint: disable=import-outside-toplevel,too-many-nested-blocks
        # pylint: disable=too-many-locals
        from tripper.datadoc.context import Context
        from tripper.datadoc.dataset import acquire

        context = Context(self.get_context())

        if iris is None:
            query = """
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            SELECT DISTINCT ?s WHERE {
              VALUES ?o {
                owl:DatatypeProperty owl:ObjectProperty owl:AnnotationProperty
                rdf:Property
              }
              ?s a ?o .
            }
            """
            iris = [iri[0] for iri in ts.query(query)]

        prefixes = self.data.prefixes
        for prefix, ns in ts.namespaces.items():
            self.add_prefix(prefix, ns)

        # Maps JSON-LD key name to keyword
        names = {
            DDOC.unitSymbol: "unit",
            "ddoc:unitSymbol": "unit",
        }

        dicts = []
        for iri in iris:
            d = AttrDict()
            for k, v in acquire(ts, iri, context=context).items():
                d[names.get(k, k)] = v
            dicts.append(d)

        dct = {expand_iri(d["@id"], prefixes): d for d in dicts}

        # FIXME: Add domain and range to returned dicts
        # Add domain and range to dicts
        seen = set()
        for d in list(dct.values()):
            for ref in ("domain", "range"):
                if ref in d:
                    for domain in asseq(d[ref]):
                        expanded = expand_iri(domain, prefixes)
                        if expanded.startswith(str(XSD)):
                            continue
                        if expanded not in seen:
                            seen.add(expanded)
                            acquired = acquire(ts, expanded, context=context)
                            if acquired:
                                dct[expanded] = acquired  # type: ignore

        newdicts = list(dct.values())
        return newdicts

    def save_rdf(self, ts: "Triplestore") -> dict:
        """Save to triplestore."""
        # pylint: disable=import-outside-toplevel,cyclic-import
        from tripper.datadoc.dataset import store

        for prefix, ns in self.get_prefixes().items():
            ts.bind(prefix, ns)

        # Ensure that the schema for properties is stored
        load_datadoc_schema(ts)

        # Store all keywords that are not already in the triplestore
        missing = self.missing_keywords(ts, include_classes=True)
        dicts = self.asdicts(missing)
        return store(ts, dicts)

    def load_rdf(
        self,
        ts: "Triplestore",
        iris: "Optional[Sequence[str]]" = None,
        strict: bool = False,
        redefine: str = "raise",
    ) -> None:
        """Populate this Keyword object from a triplestore.

        Arguments:
            ts: Triplestore to load keywords from.
            iris: IRIs to load. The default is to load IRIs corresponding to all
                properties an classes.
            strict: Whether to raise an `InvalidKeywordError` exception if `d`
                contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "skip": Don't redefine existing keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "raise": Raise an RedefineError (default).

        """
        dicts = self._load_rdf(ts, iris)
        self.fromdicts(
            dicts,
            prefixes=ts.namespaces,
            strict=strict,
            redefine=redefine,
        )

    def load_rdffile(
        self,
        rdffile: "FileLoc",
        format: "Optional[str]" = None,
        timeout: float = 3,
        iris: "Optional[Sequence[str]]" = None,
        strict: bool = False,
        redefine: str = "raise",
    ) -> None:
        """Load RDF from file or URL.

        Arguments:
            rdffile: File to load.
            format: Any format supported by rdflib.Graph.parse().
            timeout: Timeout in case `yamlfile` is a URI.
            iris: IRIs to load. The default is to load IRIs corresponding to
                all properties an classes.
            strict: Whether to raise an `InvalidKeywordError` exception if `d`
                contains an unknown key.
            redefine: Determine how to handle redefinition of existing
                keywords.  Should be one of the following strings:
                  - "allow": Allow redefining a keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "skip": Don't redefine existing keyword. Emits a
                    `RedefineKeywordWarning`.
                  - "raise": Raise an RedefineError (default).

        """
        if format is None:
            format = guess_rdf_format(rdffile)

        ts = Triplestore("rdflib")
        with openfile(rdffile, timeout=timeout, mode="rt") as f:
            ts.parse(f, format=format)
        self.load_rdf(ts, iris=iris, strict=strict, redefine=redefine)

    def isnested(self, keyword: str) -> bool:
        """Returns whether the keyword corresponds to an object property."""
        d = self.keywords[keyword]
        if "datatype" in d or d.range == "rdfs:Literal":
            return False
        return True

    def expanded(self, keyword: str, strict: bool = True) -> str:
        """Return the keyword expanded to its full IRI."""
        if keyword in self.keywords:
            iri = self.keywords[keyword].iri
        elif "resources" in self.data and keyword in self.data.resources:
            iri = self.data.resources[keyword].iri
        elif ":" in keyword or not strict:
            iri = keyword
        else:
            raise InvalidKeywordError(keyword)
        return expand_iri(iri, self.get_prefixes(), strict=strict)

    def range(self, keyword: str) -> str:
        """Return the range of the keyword."""
        return self.keywords[keyword].range

    def superclasses(self, cls: str) -> "Union[str, list]":
        """Return a list with `cls` and it superclasses prefixed.

        Example:

        >>> keywords = Keywords()
        >>> keywords.superclasses("Dataset")
        ... # doctest: +NORMALIZE_WHITESPACE
        ['dcat:Dataset',
         'dcat:Resource',
         'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

        >>> keywords.superclasses("dcat:Dataset")
        ... # doctest: +NORMALIZE_WHITESPACE
        ['dcat:Dataset',
         'dcat:Resource',
         'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

        """
        if cls in self.data.resources:
            r = self.data.resources[cls]
        else:
            cls = prefix_iri(cls, self.get_prefixes())
            rlst = [r for r in self.data.resources.values() if cls == r.iri]
            if not rlst:
                raise NoSuchTypeError(cls)
            if len(rlst) > 1:
                raise RuntimeError(
                    f"{cls} matches more than one resource: "
                    f"{', '.join(r.iri for r in rlst)}"
                )
            r = rlst[0]

        if "subClassOf" in r:
            if isinstance(r.subClassOf, str):
                return [r.iri, r.subClassOf]
            return [r.iri] + r.subClassOf
        return r.iri

    def keywordname(self, keyword: str) -> str:
        """Return the short name of `keyword`."""
        warnings.warn(
            "Keywords.keywordname() is deprecated. Use Keywords.shortname() "
            "instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        if keyword not in self.keywords:
            raise InvalidKeywordError(keyword)
        return self.keywords[keyword].name

    def shortname(self, iri: str) -> str:
        """Return the short name of `iri`.

        Example:

        >>> keywords = Keywords()
        >>> keywords.shortname("dcterms:title")
        'title'

        """
        if iri in self.keywords:
            return self.keywords[iri].name
        if iri in self.data.resources.keys():
            return iri
        expanded = self.expanded(iri)
        for k, v in self.data.resources.items():
            if expanded == self.expanded(v.iri):
                return k
        raise InvalidKeywordError(iri)

    def prefixed(self, name: str, strict: bool = True) -> str:
        """Return prefixed name or `name`.

        Example:

        >>> keywords = Keywords()
        >>> keywords.prefixed("title")
        'dcterms:title'
        """
        if name in self.keywords:
            return prefix_iri(self.keywords[name].iri, self.get_prefixes())
        if name in self.data.resources:
            return prefix_iri(
                self.data.resources[name].iri,
                self.get_prefixes(),
                strict=strict,
            )
        if is_curie(name):
            return name
        return prefix_iri(name, self.get_prefixes(), strict=strict)

    def typename(self, type) -> str:
        """Return the short name of `type`.

        Example:

        >>> keywords = Keywords()
        >>> keywords.typename("dcat:Dataset")
        'Dataset'

        """
        if type in self.data.resources:
            return type
        prefixed = prefix_iri(type, self.get_prefixes())
        for name, r in self.data.resources.items():
            if prefixed == r.iri:
                return name
        raise NoSuchTypeError(type)

    def get_prefixes(self) -> dict:
        """Return prefixes dict."""
        return self.data.get("prefixes", {})

    def add_prefix(self, prefix, namespace, replace=False):
        """Bind `prefix` to `namespace`.

        If `namespace` is None, is the prefix removed.

        If `replace` is true, will existing namespace will be overridden.
        """
        if namespace is None:
            del self.data.prefixes[str(prefix)]
        elif replace:
            self.data.prefixes[str(prefix)] = str(namespace)
        else:
            self.data.prefixes.setdefault(str(prefix), str(namespace))

    def get_context(self) -> dict:
        """Return JSON-LD context as a dict.

        Note: The returned dict corresponds to the value of the "@context"
        keyword in a JSON-LD document.
        """
        ctx = {}
        ctx["@version"] = 1.1

        # Add prefixes to context
        prefixes = self.data.get("prefixes", {})
        for prefix, ns in prefixes.items():
            ctx[prefix] = ns

        resources = self.data.get("resources", {})

        # Translate datatypes
        translate = {"rdf:JSON": "@json"}

        # Add keywords (properties) to context
        for resource in resources.values():
            for k, v in resource.get("keywords", {}).items():
                iri = v["iri"]
                if "datatype" in v:
                    dt = v["datatype"]
                    if isinstance(dt, str):
                        dt = translate.get(dt, dt)
                    else:
                        dt = [translate.get(t, t) for t in dt]

                    d = {}
                    if v.get("reverse", "").lower() == "true":
                        d["@reverse"] = iri
                    else:
                        d["@id"] = iri

                    if dt == "rdf:langString" or "language" in v:
                        d["@language"] = v.get("language", "en")
                    else:
                        d["@type"] = dt

                    ctx[k] = d  # type: ignore
                elif v.get("range", "rdfs:Literal") == "rdfs:Literal":
                    ctx[k] = iri
                else:
                    ctx[k] = {  # type: ignore
                        "@id": iri,
                        "@type": "@id",
                    }

        # Add resources (classes) to context
        for k, v in resources.items():
            ctx.setdefault(k, v.iri)

        return ctx

    def save_context(self, outfile: "FileLoc", indent: int = 2) -> None:
        """Save JSON-LD context file.

        Arguments:
            outfile: File to save the JSON-LD context to.
            indent: Indentation level. Defaults to two.
        """
        context = {"@context": self.get_context()}
        with open(outfile, "wt", encoding="utf-8") as f:
            json.dump(context, f, indent=indent)
            f.write(os.linesep)

    def _keywords_list(
        self,
        keywords: "Optional[Sequence[str]]" = None,
        classes: "Optional[Union[str, Sequence[str]]]" = None,
        themes: "Optional[Union[str, Sequence[str]]]" = None,
    ) -> "Tuple[Set[str], Set[str], Set[str]]":
        """Help function returning a list of keywords corresponding to arguments
        `keywords`, `classes` and `themes`. See also save_markdown_table().

        Arguments:
            keywords: Sequence of keywords to include.
            classes: Include keywords that have these classes in their domain.
            themes: Include keywords for these themes.

        Returns:
            keywordset: Set with all included keywords.
            classet: Set with all included classes.
            themeset: Set with all included themes.
        """
        keywords = (
            set(self.prefixed(k) for k in asseq(keywords))
            if keywords
            else set()
        )
        classes = (
            set(self.prefixed(d) for d in asseq(classes)) if classes else set()
        )
        themes = (
            set(self.prefixed(t) for t in asseq(themes)) if themes else set()
        )
        orig_classes = classes.copy()
        orig_themes = themes.copy()

        if not keywords and not classes and not themes:
            keywords.update(self.prefixed(k) for k in self.keywordnames())

        for value in self.data.resources.values():
            for k, v in value.get("keywords", {}).items():
                vdomain = [
                    self.prefixed(d) for d in asseq(v.get("domain", ()))
                ]
                vtheme = [self.prefixed(t) for t in asseq(v.get("theme", ()))]
                if orig_classes:
                    for domain in vdomain:
                        prefixed = self.prefixed(domain)
                        if prefixed in orig_classes:
                            keywords.add(k)
                if orig_themes:
                    for theme in vtheme:
                        prefixed = self.prefixed(theme)
                        if prefixed in orig_themes:
                            keywords.add(k)

        for k in keywords:
            v = self.keywords[k]
            vdomain = [self.prefixed(d) for d in asseq(v.get("domain", ()))]
            vtheme = [self.prefixed(t) for t in asseq(v.get("theme", ()))]
            if vdomain and not classes.intersection(vdomain):
                classes.add(vdomain[0])
            if vtheme and not themes.intersection(vtheme):
                themes.add(vtheme[0])

        return keywords, classes, themes

    def _keywords_table(
        self,
        keywords: "Sequence[str]",
    ) -> "List[str]":
        """Help function for save_markdown_table().

        Returns a list with Markdown table documenting the provided
        sequence of keywords.
        """
        # pylint: disable=too-many-locals,too-many-branches
        header = [
            "Keyword",
            "Range",
            "Conformance",
            "Definition",
            "Usage note",
        ]
        order = {"mandatory": 1, "recommended": 2, "optional": 3}
        refs = []
        table = []
        for keyword in keywords:
            d = self.keywords[keyword]
            rangestr = f"[{d.range}]" if "range" in d else ""
            if "datatype" in d:
                rangestr += (
                    ", " + ", ".join(d.datatype)
                    if isinstance(d.datatype, list)
                    else f"<br>({d.datatype})"
                )
            table.append(
                [
                    f"[{d.name}]",
                    rangestr,
                    f"{d.conformance}" if "conformance" in d else "",
                    f"{d.description}" if "description" in d else "",
                    f"{d.usageNote}" if "usageNote" in d else "",
                ]
            )
            refs.append(f"[{d.name}]: {self.expanded(d.iri)}")
            if "range" in d:
                refs.append(f"[{d.range}]: {self.expanded(d.range)}")
        table.sort(key=lambda row: order.get(row[2], 10))

        out = self._to_table(header, table)
        out.append("")
        out.extend(refs)
        out.append("")
        out.append("")
        return out

    def save_markdown_table(
        self, outfile: "FileLoc", keywords: "Sequence[str]"
    ) -> None:
        """Save markdown file with documentation of the keywords."""
        table = self._keywords_table(keywords)
        with open(outfile, "wt", encoding="utf-8") as f:
            f.write(os.linesep.join(table) + os.linesep)

    def save_markdown(
        self,
        outfile: "FileLoc",
        keywords: "Optional[Sequence[str]]" = None,
        classes: "Optional[Union[str, Sequence[str]]]" = None,
        themes: "Optional[Union[str, Sequence[str]]]" = None,
        explanation: bool = False,
        special: bool = False,
    ) -> None:
        """Save markdown file with documentation of the keywords.

        Arguments:
            outfile: File to save the markdown documentation to.
            keywords: Sequence of keywords to include.
            classes: Include keywords that have these classes in their domain.
            themes: Include keywords for these themes.
            explanation: Whether to include explanation of columns labels.
            special: Whether to generate documentation of special
                JSON-LD keywords.

        """
        # pylint: disable=too-many-locals,too-many-branches
        keywords, classes, themes = self._keywords_list(
            keywords, classes, themes
        )
        ts = Triplestore("rdflib")
        for prefix, ns in self.data.get("prefixes", {}).items():
            ts.bind(prefix, ns)

        out = [
            "<!-- Do not edit! This file is generated with Tripper. "
            "Edit the keywords.yaml file instead. -->",
            "",
            f"# Keywords{f' for theme: {themes}' if themes else ''}",
            (
                f"The tables below lists the keywords for the theme {themes}."
                if themes
                else ""
            ),
            "",
        ]
        column_explanations = [
            "The meaning of the columns are as follows:",
            "",
            "- **Keyword**: The keyword referring to a property used for "
            "the data documentation.",
            "- **Range**: Refer to the class for the values of the keyword.",
            "- **Conformance**: Whether the keyword is mandatory, recommended "
            "or optional when documenting the given type of resources.",
            "- **Definition**: The definition of the keyword.",
            "- **Usage note**: Notes about how to use the keyword.",
            "",
        ]
        special_keywords = [
            "## Special keywords (from JSON-LD)",
            "See the [JSON-LD specification] for more details.",
            "",
            # pylint: disable=line-too-long
            "| Keyword    | Range         | Conformance | Definition                                                              | Usage note |",
            "|------------|---------------|-------------|-------------------------------------------------------------------------|------------|",
            "| [@id]      | IRI           | mandatory   | IRI identifying the resource to document.                               |            |",
            "| [@type]    | IRI           | recommended | Ontological class defining the class of a node.                         |            |",
            "| [@context] | dict&#124list | optional    | Context defining namespace prefixes and additional keywords.            |            |",
            "| [@base]    | namespace     | optional    | Base IRI against which relative IRIs are resolved.                      |            |",
            "| [@vocab]   | namespace     | optional    | Used to expand properties and values in @type with a common prefix IRI. |            |",
            "| [@graph]   | list          | optional    | Used for documenting multiple resources.                                |            |",
            "",
        ]
        if explanation:
            out.extend(column_explanations)
        if special:
            out.extend(special_keywords)
        refs = []

        for cls in sorted(classes):
            name = self.prefixed(cls)
            shortname = iriname(name)
            if shortname in self.data.resources:
                resource = self.data.resources[shortname]
            else:
                for rname, resource in self.data.resources.items():
                    if self.prefixed(resource.iri) == name:
                        shortname = rname
                        break
                else:
                    raise MissingKeyError(cls)

            out.append("")
            out.append(f"## Properties on [{shortname}]")
            if "description" in resource:
                out.append(resource.description)
            if "subClassOf" in resource:
                out.append("")
                subcl = (
                    [resource.subClassOf]
                    if isinstance(resource.subClassOf, str)
                    else resource.subClassOf
                )
                out.append(
                    f"- subClassOf: {', '.join(f'[{sc}]' for sc in subcl)}"
                )
                for sc in subcl:
                    refs.append(f"[{sc}]: {ts.expand_iri(sc)}")
            if "iri" in resource:
                refs.append(f"[{shortname}]: {ts.expand_iri(resource.iri)}")
            included_keywords = [
                k
                for k, v in self.keywords.items()
                if name in v.domain and is_curie(k)
            ]
            out.extend(
                self._keywords_table(keywords=sorted(included_keywords))
            )
            out.append("")

        # References
        extra_refs = [
            # pylint: disable=line-too-long
            "[@id]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@type]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@context]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@base]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@vocab]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@graph]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        ]
        refs.extend(extra_refs)
        out.append("")
        out.append("")
        out.append("")
        out.extend(refs)
        with open(outfile, "wt", encoding="utf-8") as f:
            f.write("\n".join(out) + "\n")

    def save_markdown_prefixes(self, outfile: "FileLoc") -> None:
        """Save markdown file with documentation of the prefixes."""
        out = [
            "# Predefined prefixes",
            (
                "All namespace prefixes listed on this page are defined in "
                "the [default JSON-LD context]."
            ),
            (
                "See [User-defined prefixes] for how to extend this list "
                "with additional namespace prefixes."
            ),
        ]
        rows = [
            [prefix, ns]
            for prefix, ns in self.data.get("prefixes", {}).items()
        ]
        out.extend(self._to_table(["Prefix", "Namespace"], rows))
        out.append("")
        out.append("")
        out.extend(
            [
                # pylint: disable=line-too-long
                "[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.3/context.json",
                "[User-defined prefixes]: customisation.md/#user-defined-prefixes",
            ]
        )
        with open(outfile, "wt", encoding="utf-8") as f:
            f.write("\n".join(out) + "\n")

    def _to_table(self, header: "Sequence", rows: "Iterable") -> list:
        """Return header and rows as a ."""

        widths = [len(h) for h in header]
        for row in rows:
            for i, col in enumerate(row):
                n = len(col)
                if n > widths[i]:
                    widths[i] = n

        lines = []
        empty = ""
        if rows:
            lines.append("")
            lines.append(
                "| "
                + " | ".join(
                    f"{head:{widths[i]}}" for i, head in enumerate(header)
                )
                + " |"
            )
            lines.append(
                "| "
                + " | ".join(
                    f"{empty:-<{widths[i]}}" for i in range(len(header))
                )
                + " |"
            )
            for row in rows:
                lines.append(
                    "| "
                    + " | ".join(
                        f"{col:{widths[i]}}" for i, col in enumerate(row)
                    )
                    + " |"
                )

        return lines

__init__(self, theme='ddoc:datadoc', yamlfile=None, timeout=3) special

Initialises keywords object.

Parameters:

Name Type Description Default
theme Optional[Union[str, Sequence[str]]]

IRI of one of more themes to load keywords for.

'ddoc:datadoc'
yamlfile Optional[FileLoc]

A YAML file with keyword definitions to parse. May also be an URI in which case it will be accessed via HTTP GET. Deprecated. Use the add_yaml() or add() methods instead.

None
timeout float

Timeout in case yamlfile is a URI.

3

Attributes:

Name Type Description
data

The dict loaded from the keyword yamlfile.

keywords

A dict mapping keywords (name/prefixed/iri) to dicts describing the keywords.

theme

IRI of a theme or scientic domain that the keywords belong to.

Source code in tripper/datadoc/keywords.py
def __init__(
    self,
    theme: "Optional[Union[str, Sequence[str]]]" = "ddoc:datadoc",
    yamlfile: "Optional[FileLoc]" = None,
    timeout: float = 3,
) -> None:
    """Initialises keywords object.

    Arguments:
        theme: IRI of one of more themes to load keywords for.
        yamlfile: A YAML file with keyword definitions to parse.  May also
            be an URI in which case it will be accessed via HTTP GET.
            Deprecated. Use the `add_yaml()` or `add()` methods instead.
        timeout: Timeout in case `yamlfile` is a URI.

    Attributes:
        data: The dict loaded from the keyword yamlfile.
        keywords: A dict mapping keywords (name/prefixed/iri) to dicts
            describing the keywords.
        theme: IRI of a theme or scientic domain that the keywords
            belong to.
    """
    default_prefixes = AttrDict(ddoc=str(DDOC))
    self.theme = None  # theme for this object
    self.data = AttrDict(prefixes=default_prefixes, resources=AttrDict())

    # A "view" into `self.data`. A dict mapping short, prefixed
    # and expanded keyword names to corresponding value dicts in
    # self.data.
    self.keywords = AttrDict()

    # Themes and files that has been parsed
    self.parsed: "set" = set()

    if theme:
        self.add_theme(theme)

    if yamlfile:
        warnings.warn(
            "The `yamlfile` argument is deprecated. Use the `add_yaml()` "
            "or `add()` methods instead.",
            DeprecationWarning,
        )
        if isinstance(yamlfile, (str, Path)):
            self.load_yaml(yamlfile, timeout=timeout)
        else:
            for path in yamlfile:
                self.load_yaml(path, timeout=timeout)

add(self, keywords, format=None, timeout=3, strict=False, redefine='raise')

Add keywords to this Keywords object.

Parameters:

Name Type Description Default
keywords Optional[KeywordsType]

Keywords definitions to add to this Keyword object. May be another Keyword object, path to a file, theme or a sequence of these.

required
format Optional[Union[str, Sequence]]

Format if keywords. Recognised formats include: yaml, csv, tsv, turtle, xml, json-ld, rdfa, ...

None
timeout float

Timeout when accessing remote files.

3
strict bool

Whether to raise an InvalidKeywordError exception if d contains an unknown key.

False
redefine str

Determine how to handle redefinition of existing keywords. Should be one of the following strings: - "allow": Allow redefining a keyword. Emits a RedefineKeywordWarning. - "skip": Don't redefine existing keyword. Emits a RedefineKeywordWarning. - "raise": Raise an RedefineError (default).

'raise'
Source code in tripper/datadoc/keywords.py
def add(
    self,
    keywords: "Optional[KeywordsType]",
    format: "Optional[Union[str, Sequence]]" = None,
    timeout: float = 3,
    strict: bool = False,
    redefine: str = "raise",
) -> None:
    """Add `keywords` to this Keywords object.

    Arguments:
        keywords: Keywords definitions to add to this Keyword object.
            May be another Keyword object, path to a file, theme or a
            sequence of these.
        format: Format if `keywords`. Recognised formats include:
            yaml, csv, tsv, turtle, xml, json-ld, rdfa, ...
        timeout: Timeout when accessing remote files.
        strict: Whether to raise an `InvalidKeywordError` exception if `d`
            contains an unknown key.
        redefine: Determine how to handle redefinition of existing
            keywords.  Should be one of the following strings:
              - "allow": Allow redefining a keyword. Emits a
                `RedefineKeywordWarning`.
              - "skip": Don't redefine existing keyword. Emits a
                `RedefineKeywordWarning`.
              - "raise": Raise an RedefineError (default).

    """
    if not isinstance(keywords, str) and isinstance(keywords, Sequence):
        if isinstance(format, str):
            format = [format] * len(keywords)
        elif format and len(format) != len(keywords):
            raise TypeError(
                "If given, `format` must have the same length as "
                "`keywords`"
            )

    def _add(kw, fmt):
        if kw is None:
            pass
        elif isinstance(kw, Keywords):
            self.theme = merge(self.theme, kw.theme)
            recursive_update(self.data, kw.data, cls=AttrDict)
            self._set_keywords(clear=False)
        elif isinstance(kw, dict):
            self._load_yaml(kw, strict=strict, redefine=redefine)
        elif not isinstance(kw, str) and isinstance(kw, Sequence):
            for i, e in enumerate(kw):
                _add(e, fmt[i] if fmt else None)
        elif isinstance(kw, (str, Path, IOBase)):
            if (
                isinstance(kw, str)
                and ":" in kw
                and not (
                    kw.startswith("/") or kw.startswith("./") or is_uri(kw)
                )
            ):
                self.add_theme(
                    kw,
                    timeout=timeout,
                    strict=strict,
                    redefine=redefine,
                )
            else:
                if not fmt:
                    name = kw.name if hasattr(kw, "name") else kw
                    fmt = Path(name).suffix
                fmt = fmt.lstrip(".").lower()
                # pylint:disable=consider-using-get
                if fmt in RDFLIB_SUFFIX_FORMAT_MAP:
                    fmt = RDFLIB_SUFFIX_FORMAT_MAP[fmt]

                if fmt in ("yaml", "yml"):
                    self.load_yaml(
                        kw,
                        timeout=timeout,
                        strict=strict,
                        redefine=redefine,
                    )
                elif fmt in ("csv", "tsv", "xlsx", "excel"):
                    self.load_table(kw, format=fmt)
                else:
                    self.load_rdffile(
                        kw,
                        format=fmt,
                        timeout=timeout,
                        strict=strict,
                        redefine=redefine,
                    )
        else:
            raise TypeError(
                "`keywords` must be a KeywordsType object (Keywords "
                "instance, dict, IO, Path, string or sequence). "
                f"Got: {type(kw)}"
            )

    _add(keywords, format)

add_prefix(self, prefix, namespace, replace=False)

Bind prefix to namespace.

If namespace is None, is the prefix removed.

If replace is true, will existing namespace will be overridden.

Source code in tripper/datadoc/keywords.py
def add_prefix(self, prefix, namespace, replace=False):
    """Bind `prefix` to `namespace`.

    If `namespace` is None, is the prefix removed.

    If `replace` is true, will existing namespace will be overridden.
    """
    if namespace is None:
        del self.data.prefixes[str(prefix)]
    elif replace:
        self.data.prefixes[str(prefix)] = str(namespace)
    else:
        self.data.prefixes.setdefault(str(prefix), str(namespace))

add_theme(self, theme, timeout=3, strict=False, redefine='raise')

Add keywords for theme, where theme is the IRI of a theme or scientific domain or a list of such IRIs.

Parameters:

Name Type Description Default
theme Union[str, Sequence[str]]

IRI (or list of IRIs) of a theme/scientific domain to load.

required
timeout float

Timeout when accessing remote files.

3
strict bool

Whether to raise an InvalidKeywordError exception if the theme contains an unknown key.

False
redefine str

Determine how to handle redefinition of existing keywords. Should be one of the following strings: - "allow": Allow redefining a keyword. Emits a RedefineKeywordWarning. - "skip": Don't redefine existing keyword. Emits a RedefineKeywordWarning. - "raise": Raise an RedefineError (default).

'raise'
Source code in tripper/datadoc/keywords.py
def add_theme(
    self,
    theme: "Union[str, Sequence[str]]",
    timeout: float = 3,
    strict: bool = False,
    redefine: str = "raise",
) -> None:
    """Add keywords for `theme`, where `theme` is the IRI of a
    theme or scientific domain or a list of such IRIs.

    Arguments:
        theme: IRI (or list of IRIs) of a theme/scientific domain to load.
        timeout: Timeout when accessing remote files.
        strict: Whether to raise an `InvalidKeywordError` exception if the
            theme contains an unknown key.
        redefine: Determine how to handle redefinition of existing
            keywords.  Should be one of the following strings:
              - "allow": Allow redefining a keyword. Emits a
                `RedefineKeywordWarning`.
              - "skip": Don't redefine existing keyword. Emits a
                `RedefineKeywordWarning`.
              - "raise": Raise an RedefineError (default).

    """
    if isinstance(theme, str):
        theme = [theme]

    parsedkey = (tuple(theme), strict, redefine)
    if parsedkey in self.parsed:
        return

    for name in theme:  # type: ignore
        expanded = expand_iri(name, self.get_prefixes())
        prefixed = prefix_iri(name, self.get_prefixes())
        add(
            self.data,
            "theme",
            prefixed,
        )
        for ep in get_entry_points("tripper.keywords"):
            if expand_iri(ep.value, self.get_prefixes()) == expanded:
                package_name, path = ep.name.split("/", 1)
                package = import_module(package_name)
                fullpath = (
                    Path(package.__file__).parent / path  # type: ignore
                )
                self.add(
                    fullpath,
                    timeout=timeout,
                    strict=strict,
                    redefine=redefine,
                )
                break
        else:
            # Fallback in case the entry point is not installed
            if expanded == DDOC.datadoc:
                self.load_yaml(
                    self.rootdir
                    / "tripper"
                    / "context"
                    / "0.3"
                    / "keywords.yaml",
                    timeout=timeout,
                    strict=strict,
                    redefine=redefine,
                )
            else:
                raise TypeError(f"Unknown theme: {name}")

    self.parsed.add(parsedkey)

asdicts(self, names=None, keymode='prefixed')

Return the content of this Keywords object as a list of JSON-LD dicts.

Parameters:

Name Type Description Default
names Optional[Sequence]

A sequence of keyword or class names. The default is to return all keywords.

None
keymode str

How to represent keys. Should be either "name", "prefixed" (CURIE) or "expanded" (full IRI).

'prefixed'

Returns:

Type Description
List[dict]

List of JSON-LD dicts corresponding to names.

Source code in tripper/datadoc/keywords.py
def asdicts(
    self,
    names: "Optional[Sequence]" = None,
    keymode: str = "prefixed",
) -> "List[dict]":
    """Return the content of this Keywords object as a list of JSON-LD
    dicts.

    Arguments:
        names: A sequence of keyword or class names.  The
            default is to return all keywords.
        keymode: How to represent keys.  Should be either "name",
            "prefixed" (CURIE) or "expanded" (full IRI).

    Returns:
        List of JSON-LD dicts corresponding to `names`.
    """
    keymodes = {
        "name": iriname,
        "prefixed": None,
        "expanded": self.expanded,
    }
    maps = {
        "subPropertyOf": "rdfs:subPropertyOf",
        "unit": "ddoc:unitSymbol",
        "description": "dcterms:description",
        "usageNote": "vann:usageNote",
        "theme": "dcat:theme",
    }

    def key(k):
        """Return key `k` accordig to `keymode`."""
        return keymodes[keymode](k) if keymodes[keymode] else k

    conformance_indv = {v: k for k, v in CONFORMANCE_MAPS.items()}
    if names is None:
        names = self.keywordnames()

    classes = []
    dicts = []
    for name in names:
        if name not in self.keywords:
            classes.append(name)
            continue
        d = self.keywords[name]
        if "range" in d and self.expanded(d.range) != RDFS.Literal:
            proptype = "owl:ObjectProperty"
            range = d.range
        elif (
            "datatype" in d and self.expanded(d.datatype) != RDF.langString
        ):
            proptype = "owl:DatatypeProperty"
            range = d.get("datatype")
        else:
            proptype = "owl:AnnotationProperty"
            range = d.get("datatype")

        dct = {
            "@id": d.iri,
            "@type": proptype,
            key("rdfs:label"): d.name,
        }
        if "domain" in d:
            dct[key("rdfs:domain")] = d.domain
        if range:
            dct[key("rdfs:range")] = range
        if "conformance" in d:
            dct[key("ddoc:conformance")] = conformance_indv.get(
                d.conformance, d.conformance
            )
        for k, v in d.items():
            if k in maps:
                dct[key(maps[k])] = v
        dicts.append(dct)

    if classes:
        classmaps = {}
        for k, v in self.data.resources.items():
            classmaps[k] = k
            classmaps[self.expanded(k)] = k
            classmaps[self.prefixed(k)] = k

        for name in classes:
            d = self.data.resources[classmaps[name]]
            dct = {"@id": d.iri, "@type": OWL.Class}
            if "subClassOf" in d:
                dct[key("rdfs:subClassOf")] = d.subClassOf
            if "description" in d:
                dct[key("dcterms:description")] = d.description
            if "usageNote" in d:
                dct[key("vann:usageNote")] = d.usageNote
            dicts.append(dct)

    return dicts

classnames(self)

Return a list with all class names defined in this instance.

Source code in tripper/datadoc/keywords.py
def classnames(self) -> "list":
    """Return a list with all class names defined in this instance."""
    return list(self.data.resources.keys())

copy(self)

Returns a copy of self.

Source code in tripper/datadoc/keywords.py
def copy(self):
    """Returns a copy of self."""
    new = Keywords(theme=None)
    new.theme = self.theme
    new.data = deepcopy(self.data)
    new.keywords = deepcopy(self.keywords)
    new.parsed = self.parsed.copy()
    return new

expanded(self, keyword, strict=True)

Return the keyword expanded to its full IRI.

Source code in tripper/datadoc/keywords.py
def expanded(self, keyword: str, strict: bool = True) -> str:
    """Return the keyword expanded to its full IRI."""
    if keyword in self.keywords:
        iri = self.keywords[keyword].iri
    elif "resources" in self.data and keyword in self.data.resources:
        iri = self.data.resources[keyword].iri
    elif ":" in keyword or not strict:
        iri = keyword
    else:
        raise InvalidKeywordError(keyword)
    return expand_iri(iri, self.get_prefixes(), strict=strict)

fromdicts(self, dicts, prefixes=None, theme=None, basedOn=None, strict=False, redefine='raise')

Populate this Keywords object from a sequence of dicts.

Parameters:

Name Type Description Default
dicts Sequence[dict]

A sequence of JSON-LD dicts to populate this keywords object from. Their format should follow what is returned by tripper.datadoc.acquire().

required
prefixes Optional[dict]

Dict with additional prefixes used by dicts.

None
theme Optional[str]

Theme defined by dicts.

None
basedOn Optional[Union[str, List[str]]]

Theme(s) that dicts are based on.

None
strict bool

Whether to raise an InvalidKeywordError exception if d contains an unknown key.

False
redefine str

Determine how to handle redefinition of existing keywords. Should be one of the following strings: - "allow": Allow redefining a keyword. - "skip": Don't redefine existing keyword. - "raise": Raise an RedefineError (default).

'raise'
Source code in tripper/datadoc/keywords.py
def fromdicts(
    self,
    dicts: "Sequence[dict]",
    prefixes: "Optional[dict]" = None,
    theme: "Optional[str]" = None,
    basedOn: "Optional[Union[str, List[str]]]" = None,
    strict: bool = False,
    redefine: str = "raise",
) -> None:
    """Populate this Keywords object from a sequence of dicts.

    Arguments:
        dicts: A sequence of JSON-LD dicts to populate this keywords object
            from.  Their format should follow what is returned by
            tripper.datadoc.acquire().
        prefixes: Dict with additional prefixes used by `dicts`.
        theme: Theme defined by `dicts`.
        basedOn: Theme(s) that `dicts` are based on.
        strict: Whether to raise an `InvalidKeywordError` exception if `d`
            contains an unknown key.
        redefine: Determine how to handle redefinition of existing
            keywords.  Should be one of the following strings:
              - "allow": Allow redefining a keyword.
              - "skip": Don't redefine existing keyword.
              - "raise": Raise an RedefineError (default).

    """
    data = self._fromdicts(
        dicts,
        prefixes=prefixes,
        theme=theme,
        basedOn=basedOn,
    )
    self._load_yaml(data, strict=strict, redefine=redefine)

get_context(self)

Return JSON-LD context as a dict.

Note: The returned dict corresponds to the value of the "@context" keyword in a JSON-LD document.

Source code in tripper/datadoc/keywords.py
def get_context(self) -> dict:
    """Return JSON-LD context as a dict.

    Note: The returned dict corresponds to the value of the "@context"
    keyword in a JSON-LD document.
    """
    ctx = {}
    ctx["@version"] = 1.1

    # Add prefixes to context
    prefixes = self.data.get("prefixes", {})
    for prefix, ns in prefixes.items():
        ctx[prefix] = ns

    resources = self.data.get("resources", {})

    # Translate datatypes
    translate = {"rdf:JSON": "@json"}

    # Add keywords (properties) to context
    for resource in resources.values():
        for k, v in resource.get("keywords", {}).items():
            iri = v["iri"]
            if "datatype" in v:
                dt = v["datatype"]
                if isinstance(dt, str):
                    dt = translate.get(dt, dt)
                else:
                    dt = [translate.get(t, t) for t in dt]

                d = {}
                if v.get("reverse", "").lower() == "true":
                    d["@reverse"] = iri
                else:
                    d["@id"] = iri

                if dt == "rdf:langString" or "language" in v:
                    d["@language"] = v.get("language", "en")
                else:
                    d["@type"] = dt

                ctx[k] = d  # type: ignore
            elif v.get("range", "rdfs:Literal") == "rdfs:Literal":
                ctx[k] = iri
            else:
                ctx[k] = {  # type: ignore
                    "@id": iri,
                    "@type": "@id",
                }

    # Add resources (classes) to context
    for k, v in resources.items():
        ctx.setdefault(k, v.iri)

    return ctx

get_prefixes(self)

Return prefixes dict.

Source code in tripper/datadoc/keywords.py
def get_prefixes(self) -> dict:
    """Return prefixes dict."""
    return self.data.get("prefixes", {})

isnested(self, keyword)

Returns whether the keyword corresponds to an object property.

Source code in tripper/datadoc/keywords.py
def isnested(self, keyword: str) -> bool:
    """Returns whether the keyword corresponds to an object property."""
    d = self.keywords[keyword]
    if "datatype" in d or d.range == "rdfs:Literal":
        return False
    return True

keywordname(self, keyword)

Return the short name of keyword.

Source code in tripper/datadoc/keywords.py
def keywordname(self, keyword: str) -> str:
    """Return the short name of `keyword`."""
    warnings.warn(
        "Keywords.keywordname() is deprecated. Use Keywords.shortname() "
        "instead.",
        DeprecationWarning,
        stacklevel=2,
    )
    if keyword not in self.keywords:
        raise InvalidKeywordError(keyword)
    return self.keywords[keyword].name

keywordnames(self)

Return a list with all keyword names defined in this instance.

Source code in tripper/datadoc/keywords.py
def keywordnames(self) -> "list":
    """Return a list with all keyword names defined in this instance."""
    return [k for k in self.keywords.keys() if ":" not in k]

load_rdf(self, ts, iris=None, strict=False, redefine='raise')

Populate this Keyword object from a triplestore.

Parameters:

Name Type Description Default
ts Triplestore

Triplestore to load keywords from.

required
iris Optional[Sequence[str]]

IRIs to load. The default is to load IRIs corresponding to all properties an classes.

None
strict bool

Whether to raise an InvalidKeywordError exception if d contains an unknown key.

False
redefine str

Determine how to handle redefinition of existing keywords. Should be one of the following strings: - "allow": Allow redefining a keyword. Emits a RedefineKeywordWarning. - "skip": Don't redefine existing keyword. Emits a RedefineKeywordWarning. - "raise": Raise an RedefineError (default).

'raise'
Source code in tripper/datadoc/keywords.py
def load_rdf(
    self,
    ts: "Triplestore",
    iris: "Optional[Sequence[str]]" = None,
    strict: bool = False,
    redefine: str = "raise",
) -> None:
    """Populate this Keyword object from a triplestore.

    Arguments:
        ts: Triplestore to load keywords from.
        iris: IRIs to load. The default is to load IRIs corresponding to all
            properties an classes.
        strict: Whether to raise an `InvalidKeywordError` exception if `d`
            contains an unknown key.
        redefine: Determine how to handle redefinition of existing
            keywords.  Should be one of the following strings:
              - "allow": Allow redefining a keyword. Emits a
                `RedefineKeywordWarning`.
              - "skip": Don't redefine existing keyword. Emits a
                `RedefineKeywordWarning`.
              - "raise": Raise an RedefineError (default).

    """
    dicts = self._load_rdf(ts, iris)
    self.fromdicts(
        dicts,
        prefixes=ts.namespaces,
        strict=strict,
        redefine=redefine,
    )

load_rdffile(self, rdffile, format=None, timeout=3, iris=None, strict=False, redefine='raise')

Load RDF from file or URL.

Parameters:

Name Type Description Default
rdffile FileLoc

File to load.

required
format Optional[str]

Any format supported by rdflib.Graph.parse().

None
timeout float

Timeout in case yamlfile is a URI.

3
iris Optional[Sequence[str]]

IRIs to load. The default is to load IRIs corresponding to all properties an classes.

None
strict bool

Whether to raise an InvalidKeywordError exception if d contains an unknown key.

False
redefine str

Determine how to handle redefinition of existing keywords. Should be one of the following strings: - "allow": Allow redefining a keyword. Emits a RedefineKeywordWarning. - "skip": Don't redefine existing keyword. Emits a RedefineKeywordWarning. - "raise": Raise an RedefineError (default).

'raise'
Source code in tripper/datadoc/keywords.py
def load_rdffile(
    self,
    rdffile: "FileLoc",
    format: "Optional[str]" = None,
    timeout: float = 3,
    iris: "Optional[Sequence[str]]" = None,
    strict: bool = False,
    redefine: str = "raise",
) -> None:
    """Load RDF from file or URL.

    Arguments:
        rdffile: File to load.
        format: Any format supported by rdflib.Graph.parse().
        timeout: Timeout in case `yamlfile` is a URI.
        iris: IRIs to load. The default is to load IRIs corresponding to
            all properties an classes.
        strict: Whether to raise an `InvalidKeywordError` exception if `d`
            contains an unknown key.
        redefine: Determine how to handle redefinition of existing
            keywords.  Should be one of the following strings:
              - "allow": Allow redefining a keyword. Emits a
                `RedefineKeywordWarning`.
              - "skip": Don't redefine existing keyword. Emits a
                `RedefineKeywordWarning`.
              - "raise": Raise an RedefineError (default).

    """
    if format is None:
        format = guess_rdf_format(rdffile)

    ts = Triplestore("rdflib")
    with openfile(rdffile, timeout=timeout, mode="rt") as f:
        ts.parse(f, format=format)
    self.load_rdf(ts, iris=iris, strict=strict, redefine=redefine)

load_table(self, filename, format=None, prefixes=None, theme=None, basedOn=None, **kwargs)

Load keywords from a csv file.

Parameters:

Name Type Description Default
filename FileLoc

File to load.

required
format Optional[str]

File format. Unused. Only csv is currently supported.

None
prefixes Optional[dict]

Dict with additional prefixes used in the table.

None
theme Optional[str]

Theme defined by the table.

None
basedOn Optional[Union[str, List[str]]]

Theme(s) that the table is based on.

None
kwargs

Keyword arguments passed on to TableDoc.parse_csv().

{}
Source code in tripper/datadoc/keywords.py
def load_table(
    self,
    filename: "FileLoc",
    format: "Optional[str]" = None,  # pylint: disable=unused-argument
    prefixes: "Optional[dict]" = None,
    theme: "Optional[str]" = None,
    basedOn: "Optional[Union[str, List[str]]]" = None,
    **kwargs,
) -> None:
    """Load keywords from a csv file.

    Arguments:
        filename: File to load.
        format: File format. Unused.  Only csv is currently supported.
        prefixes: Dict with additional prefixes used in the table.
        theme: Theme defined by the table.
        basedOn: Theme(s) that the table is based on.
        kwargs: Keyword arguments passed on to TableDoc.parse_csv().
    """
    # pylint: disable=import-outside-toplevel
    from tripper.datadoc.tabledoc import TableDoc

    td = TableDoc.parse_csv(
        filename, type=None, prefixes=prefixes, **kwargs
    )
    dicts = td.asdicts()
    self.fromdicts(dicts, prefixes=prefixes, theme=theme, basedOn=basedOn)

load_yaml(self, yamlfile, timeout=3, strict=True, redefine='raise')

Load YAML file with keyword definitions.

Parameters:

Name Type Description Default
yamlfile Union[Path, str]

Path of URL to a YAML file to load.

required
timeout float

Timeout when accessing remote files.

3
strict bool

Whether to raise an InvalidKeywordError exception if d contains an unknown key.

True
redefine str

Determine how to handle redefinition of existing keywords. Should be one of the following strings: - "allow": Allow redefining a keyword. Emits a RedefineKeywordWarning. - "skip": Don't redefine existing keyword. Emits a RedefineKeywordWarning. - "raise": Raise an RedefineError (default).

'raise'
Source code in tripper/datadoc/keywords.py
def load_yaml(
    self,
    yamlfile: "Union[Path, str]",
    timeout: float = 3,
    strict: bool = True,
    redefine: str = "raise",
) -> None:
    """Load YAML file with keyword definitions.

    Arguments:
        yamlfile: Path of URL to a YAML file to load.
        timeout: Timeout when accessing remote files.
        strict: Whether to raise an `InvalidKeywordError` exception if `d`
            contains an unknown key.
        redefine: Determine how to handle redefinition of existing
            keywords.  Should be one of the following strings:
              - "allow": Allow redefining a keyword. Emits a
                `RedefineKeywordWarning`.
              - "skip": Don't redefine existing keyword. Emits a
                `RedefineKeywordWarning`.
              - "raise": Raise an RedefineError (default).

    """
    parsedkey = (yamlfile, strict, redefine)
    if parsedkey in self.parsed:
        return

    with openfile(yamlfile, timeout=timeout, mode="rt") as f:
        d = yaml.safe_load(f)
    try:
        self._load_yaml(d, strict=strict, redefine=redefine)
    except Exception as exc:
        raise ParseError(f"error parsing '{yamlfile}'") from exc

    self.parsed.add(parsedkey)

missing_keywords(self, ts, include_classes=False, return_existing=False)

List keywords not defined in triplestore ts.

Parameters:

Name Type Description Default
ts Triplestore

Triplestore object to check.

required
include_classes bool

Also return missing classes.

False
return_existing bool

If true, two lists are returned: - list of keywords missing in ts - list of keywords existing in ts

False

Returns:

Type Description
Union[list, Tuple[list, list]]

List with the names of keywords in this instance that are not defined in triplestore ts.

Source code in tripper/datadoc/keywords.py
def missing_keywords(
    self,
    ts: "Triplestore",
    include_classes: bool = False,
    return_existing: bool = False,
) -> "Union[list, Tuple[list, list]]":
    """List keywords not defined in triplestore `ts`.

    Arguments:
        ts: Triplestore object to check.
        include_classes: Also return missing classes.
        return_existing: If true, two lists are returned:
            - list of keywords missing in `ts`
            - list of keywords existing in `ts`

    Returns:
        List with the names of keywords in this instance that are
        not defined in triplestore `ts`.
    """
    expanded = {k for k in self.keywords.keys() if "://" in k}
    if include_classes:
        expanded.update(self.expanded(c) for c in self.classnames())

    if not expanded:
        return []

    query = f"""
    SELECT ?s WHERE {{
      VALUES ?s {{ { ' '.join(f'<{iri}>' for iri in expanded) } }}
      ?s a ?o
    }}
    """
    existing = {r[0] for r in ts.query(query)}
    missing = expanded.difference(existing)
    missing_names = [self.shortname(k) for k in missing]

    if return_existing:
        existing_names = [self.keywords[k].name for k in existing]
        return missing_names, existing_names
    return missing_names

prefixed(self, name, strict=True)

Return prefixed name or name.

Examples:

keywords = Keywords() keywords.prefixed("title") 'dcterms:title'

Source code in tripper/datadoc/keywords.py
def prefixed(self, name: str, strict: bool = True) -> str:
    """Return prefixed name or `name`.

    Example:

    >>> keywords = Keywords()
    >>> keywords.prefixed("title")
    'dcterms:title'
    """
    if name in self.keywords:
        return prefix_iri(self.keywords[name].iri, self.get_prefixes())
    if name in self.data.resources:
        return prefix_iri(
            self.data.resources[name].iri,
            self.get_prefixes(),
            strict=strict,
        )
    if is_curie(name):
        return name
    return prefix_iri(name, self.get_prefixes(), strict=strict)

range(self, keyword)

Return the range of the keyword.

Source code in tripper/datadoc/keywords.py
def range(self, keyword: str) -> str:
    """Return the range of the keyword."""
    return self.keywords[keyword].range

save_context(self, outfile, indent=2)

Save JSON-LD context file.

Parameters:

Name Type Description Default
outfile FileLoc

File to save the JSON-LD context to.

required
indent int

Indentation level. Defaults to two.

2
Source code in tripper/datadoc/keywords.py
def save_context(self, outfile: "FileLoc", indent: int = 2) -> None:
    """Save JSON-LD context file.

    Arguments:
        outfile: File to save the JSON-LD context to.
        indent: Indentation level. Defaults to two.
    """
    context = {"@context": self.get_context()}
    with open(outfile, "wt", encoding="utf-8") as f:
        json.dump(context, f, indent=indent)
        f.write(os.linesep)

save_markdown(self, outfile, keywords=None, classes=None, themes=None, explanation=False, special=False)

Save markdown file with documentation of the keywords.

Parameters:

Name Type Description Default
outfile FileLoc

File to save the markdown documentation to.

required
keywords Optional[Sequence[str]]

Sequence of keywords to include.

None
classes Optional[Union[str, Sequence[str]]]

Include keywords that have these classes in their domain.

None
themes Optional[Union[str, Sequence[str]]]

Include keywords for these themes.

None
explanation bool

Whether to include explanation of columns labels.

False
special bool

Whether to generate documentation of special JSON-LD keywords.

False
Source code in tripper/datadoc/keywords.py
def save_markdown(
    self,
    outfile: "FileLoc",
    keywords: "Optional[Sequence[str]]" = None,
    classes: "Optional[Union[str, Sequence[str]]]" = None,
    themes: "Optional[Union[str, Sequence[str]]]" = None,
    explanation: bool = False,
    special: bool = False,
) -> None:
    """Save markdown file with documentation of the keywords.

    Arguments:
        outfile: File to save the markdown documentation to.
        keywords: Sequence of keywords to include.
        classes: Include keywords that have these classes in their domain.
        themes: Include keywords for these themes.
        explanation: Whether to include explanation of columns labels.
        special: Whether to generate documentation of special
            JSON-LD keywords.

    """
    # pylint: disable=too-many-locals,too-many-branches
    keywords, classes, themes = self._keywords_list(
        keywords, classes, themes
    )
    ts = Triplestore("rdflib")
    for prefix, ns in self.data.get("prefixes", {}).items():
        ts.bind(prefix, ns)

    out = [
        "<!-- Do not edit! This file is generated with Tripper. "
        "Edit the keywords.yaml file instead. -->",
        "",
        f"# Keywords{f' for theme: {themes}' if themes else ''}",
        (
            f"The tables below lists the keywords for the theme {themes}."
            if themes
            else ""
        ),
        "",
    ]
    column_explanations = [
        "The meaning of the columns are as follows:",
        "",
        "- **Keyword**: The keyword referring to a property used for "
        "the data documentation.",
        "- **Range**: Refer to the class for the values of the keyword.",
        "- **Conformance**: Whether the keyword is mandatory, recommended "
        "or optional when documenting the given type of resources.",
        "- **Definition**: The definition of the keyword.",
        "- **Usage note**: Notes about how to use the keyword.",
        "",
    ]
    special_keywords = [
        "## Special keywords (from JSON-LD)",
        "See the [JSON-LD specification] for more details.",
        "",
        # pylint: disable=line-too-long
        "| Keyword    | Range         | Conformance | Definition                                                              | Usage note |",
        "|------------|---------------|-------------|-------------------------------------------------------------------------|------------|",
        "| [@id]      | IRI           | mandatory   | IRI identifying the resource to document.                               |            |",
        "| [@type]    | IRI           | recommended | Ontological class defining the class of a node.                         |            |",
        "| [@context] | dict&#124list | optional    | Context defining namespace prefixes and additional keywords.            |            |",
        "| [@base]    | namespace     | optional    | Base IRI against which relative IRIs are resolved.                      |            |",
        "| [@vocab]   | namespace     | optional    | Used to expand properties and values in @type with a common prefix IRI. |            |",
        "| [@graph]   | list          | optional    | Used for documenting multiple resources.                                |            |",
        "",
    ]
    if explanation:
        out.extend(column_explanations)
    if special:
        out.extend(special_keywords)
    refs = []

    for cls in sorted(classes):
        name = self.prefixed(cls)
        shortname = iriname(name)
        if shortname in self.data.resources:
            resource = self.data.resources[shortname]
        else:
            for rname, resource in self.data.resources.items():
                if self.prefixed(resource.iri) == name:
                    shortname = rname
                    break
            else:
                raise MissingKeyError(cls)

        out.append("")
        out.append(f"## Properties on [{shortname}]")
        if "description" in resource:
            out.append(resource.description)
        if "subClassOf" in resource:
            out.append("")
            subcl = (
                [resource.subClassOf]
                if isinstance(resource.subClassOf, str)
                else resource.subClassOf
            )
            out.append(
                f"- subClassOf: {', '.join(f'[{sc}]' for sc in subcl)}"
            )
            for sc in subcl:
                refs.append(f"[{sc}]: {ts.expand_iri(sc)}")
        if "iri" in resource:
            refs.append(f"[{shortname}]: {ts.expand_iri(resource.iri)}")
        included_keywords = [
            k
            for k, v in self.keywords.items()
            if name in v.domain and is_curie(k)
        ]
        out.extend(
            self._keywords_table(keywords=sorted(included_keywords))
        )
        out.append("")

    # References
    extra_refs = [
        # pylint: disable=line-too-long
        "[@id]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@type]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@context]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@base]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@vocab]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@graph]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
    ]
    refs.extend(extra_refs)
    out.append("")
    out.append("")
    out.append("")
    out.extend(refs)
    with open(outfile, "wt", encoding="utf-8") as f:
        f.write("\n".join(out) + "\n")

save_markdown_prefixes(self, outfile)

Save markdown file with documentation of the prefixes.

Source code in tripper/datadoc/keywords.py
def save_markdown_prefixes(self, outfile: "FileLoc") -> None:
    """Save markdown file with documentation of the prefixes."""
    out = [
        "# Predefined prefixes",
        (
            "All namespace prefixes listed on this page are defined in "
            "the [default JSON-LD context]."
        ),
        (
            "See [User-defined prefixes] for how to extend this list "
            "with additional namespace prefixes."
        ),
    ]
    rows = [
        [prefix, ns]
        for prefix, ns in self.data.get("prefixes", {}).items()
    ]
    out.extend(self._to_table(["Prefix", "Namespace"], rows))
    out.append("")
    out.append("")
    out.extend(
        [
            # pylint: disable=line-too-long
            "[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.3/context.json",
            "[User-defined prefixes]: customisation.md/#user-defined-prefixes",
        ]
    )
    with open(outfile, "wt", encoding="utf-8") as f:
        f.write("\n".join(out) + "\n")

save_markdown_table(self, outfile, keywords)

Save markdown file with documentation of the keywords.

Source code in tripper/datadoc/keywords.py
def save_markdown_table(
    self, outfile: "FileLoc", keywords: "Sequence[str]"
) -> None:
    """Save markdown file with documentation of the keywords."""
    table = self._keywords_table(keywords)
    with open(outfile, "wt", encoding="utf-8") as f:
        f.write(os.linesep.join(table) + os.linesep)

save_rdf(self, ts)

Save to triplestore.

Source code in tripper/datadoc/keywords.py
def save_rdf(self, ts: "Triplestore") -> dict:
    """Save to triplestore."""
    # pylint: disable=import-outside-toplevel,cyclic-import
    from tripper.datadoc.dataset import store

    for prefix, ns in self.get_prefixes().items():
        ts.bind(prefix, ns)

    # Ensure that the schema for properties is stored
    load_datadoc_schema(ts)

    # Store all keywords that are not already in the triplestore
    missing = self.missing_keywords(ts, include_classes=True)
    dicts = self.asdicts(missing)
    return store(ts, dicts)

save_table(self, filename, format=None, names=None, strip=True, keymode='name', **kwargs)

Load keywords from a csv file.

Parameters:

Name Type Description Default
filename FileLoc

File to load.

required
format Optional[str]

File format. Unused. Only csv is currently supported.

None
names Optional[Sequence]

A sequence of keyword or class names to save. The default is to save all keywords.

None
strip bool

Whether to strip leading and trailing whitespaces from cells.

True
keymode str

How to represent column headers. Should be either "name", "prefixed" (CURIE) or "expanded" (full IRI).

'name'
kwargs

Additional keyword arguments passed to the writer. For more details, see [write_csv()].

{}

References:

Source code in tripper/datadoc/keywords.py
def save_table(
    self,
    filename: "FileLoc",
    format: "Optional[str]" = None,  # pylint: disable=unused-argument
    names: "Optional[Sequence]" = None,
    strip: bool = True,
    keymode: str = "name",
    **kwargs,
) -> None:
    # pylint: disable=line-too-long
    """Load keywords from a csv file.

    Arguments:
        filename: File to load.
        format: File format. Unused.  Only csv is currently supported.
        names: A sequence of keyword or class names to save.  The
            default is to save all keywords.
        strip: Whether to strip leading and trailing whitespaces
            from cells.
        keymode: How to represent column headers.  Should be either
            "name", "prefixed" (CURIE) or "expanded" (full IRI).
        kwargs: Additional keyword arguments passed to the writer.
            For more details, see [write_csv()].

    References:
    [write_csv()]: https://emmc-asbl.github.io/tripper/latest/api_reference/datadoc/tabledoc/#tripper.datadoc.tabledoc.TableDoc.write_csv
    """
    # pylint: disable=import-outside-toplevel
    from tripper.datadoc.tabledoc import TableDoc

    dicts = self.asdicts(names, keymode=keymode)
    td = TableDoc.fromdicts(dicts, type=None, keywords=self, strip=strip)
    td.write_csv(filename, **kwargs)

save_yaml(self, yamlfile, keywords=None, classes=None, themes=None)

Save YAML file with keyword definitions.

Parameters:

Name Type Description Default
yamlfile Union[Path, str]

File to save keyword definitions to.

required
keywords Optional[Sequence[str]]

Sequence of keywords to include.

None
classes Optional[Union[str, Sequence[str]]]

Include keywords that have these classes in their domain.

None
themes Optional[Union[str, Sequence[str]]]

Include keywords for these themes.

None
Source code in tripper/datadoc/keywords.py
def save_yaml(
    self,
    yamlfile: "Union[Path, str]",
    keywords: "Optional[Sequence[str]]" = None,
    classes: "Optional[Union[str, Sequence[str]]]" = None,
    themes: "Optional[Union[str, Sequence[str]]]" = None,
) -> None:
    """Save YAML file with keyword definitions.

    Arguments:
        yamlfile: File to save keyword definitions to.
        keywords: Sequence of keywords to include.
        classes: Include keywords that have these classes in their domain.
        themes: Include keywords for these themes.

    """
    keywords, classes, themes = self._keywords_list(
        keywords, classes, themes
    )
    resources = {}
    for cls, clsval in self.data.resources.items():
        if self.prefixed(cls) in classes:
            resources[cls] = dict(clsval.copy())
            resources[cls]["keywords"] = {}
            for k, v in self.data.resources[cls].keywords.items():
                if self.prefixed(k) in keywords:
                    resources[cls]["keywords"][k] = dict(v)
    data = dict(self.data.copy())
    del data["resources"]
    recursive_update(data, {}, cls=dict)
    data["resources"] = resources

    with open(yamlfile, "wt", encoding="utf-8") as f:
        yaml.safe_dump(data, f, sort_keys=False)

shortname(self, iri)

Return the short name of iri.

Examples:

keywords = Keywords() keywords.shortname("dcterms:title") 'title'

Source code in tripper/datadoc/keywords.py
def shortname(self, iri: str) -> str:
    """Return the short name of `iri`.

    Example:

    >>> keywords = Keywords()
    >>> keywords.shortname("dcterms:title")
    'title'

    """
    if iri in self.keywords:
        return self.keywords[iri].name
    if iri in self.data.resources.keys():
        return iri
    expanded = self.expanded(iri)
    for k, v in self.data.resources.items():
        if expanded == self.expanded(v.iri):
            return k
    raise InvalidKeywordError(iri)

superclasses(self, cls)

Return a list with cls and it superclasses prefixed.

Examples:

keywords = Keywords() keywords.superclasses("Dataset") ... # doctest: +NORMALIZE_WHITESPACE ['dcat:Dataset', 'dcat:Resource', 'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

keywords.superclasses("dcat:Dataset") ... # doctest: +NORMALIZE_WHITESPACE ['dcat:Dataset', 'dcat:Resource', 'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

Source code in tripper/datadoc/keywords.py
def superclasses(self, cls: str) -> "Union[str, list]":
    """Return a list with `cls` and it superclasses prefixed.

    Example:

    >>> keywords = Keywords()
    >>> keywords.superclasses("Dataset")
    ... # doctest: +NORMALIZE_WHITESPACE
    ['dcat:Dataset',
     'dcat:Resource',
     'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

    >>> keywords.superclasses("dcat:Dataset")
    ... # doctest: +NORMALIZE_WHITESPACE
    ['dcat:Dataset',
     'dcat:Resource',
     'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

    """
    if cls in self.data.resources:
        r = self.data.resources[cls]
    else:
        cls = prefix_iri(cls, self.get_prefixes())
        rlst = [r for r in self.data.resources.values() if cls == r.iri]
        if not rlst:
            raise NoSuchTypeError(cls)
        if len(rlst) > 1:
            raise RuntimeError(
                f"{cls} matches more than one resource: "
                f"{', '.join(r.iri for r in rlst)}"
            )
        r = rlst[0]

    if "subClassOf" in r:
        if isinstance(r.subClassOf, str):
            return [r.iri, r.subClassOf]
        return [r.iri] + r.subClassOf
    return r.iri

typename(self, type)

Return the short name of type.

Examples:

keywords = Keywords() keywords.typename("dcat:Dataset") 'Dataset'

Source code in tripper/datadoc/keywords.py
def typename(self, type) -> str:
    """Return the short name of `type`.

    Example:

    >>> keywords = Keywords()
    >>> keywords.typename("dcat:Dataset")
    'Dataset'

    """
    if type in self.data.resources:
        return type
    prefixed = prefix_iri(type, self.get_prefixes())
    for name, r in self.data.resources.items():
        if prefixed == r.iri:
            return name
    raise NoSuchTypeError(type)

get_keywords(keywords=None, format=None, theme='ddoc:datadoc', yamlfile=None, timeout=3)

A convenient function that returns a Context instance.

Parameters:

Name Type Description Default
keywords Optional[KeywordsType]

Optional existing keywords object.

None
format Optional[str]

Format of input if keywords refer to a file that can be loaded.

None
theme Optional[Union[str, Sequence[str]]]

IRI of one of more themes to load keywords for.

'ddoc:datadoc'
yamlfile Optional[FileLoc]

YAML file with keyword definitions to parse. May also be an URI in which case it will be accessed via HTTP GET. Deprecated. Use the add_yaml() or add() methods instead.

None
timeout float

Timeout in case yamlfile is a URI.

3
Source code in tripper/datadoc/keywords.py
def get_keywords(
    keywords: "Optional[KeywordsType]" = None,
    format: "Optional[str]" = None,
    theme: "Optional[Union[str, Sequence[str]]]" = "ddoc:datadoc",
    yamlfile: "Optional[FileLoc]" = None,
    timeout: float = 3,
) -> "Keywords":
    """A convenient function that returns a Context instance.

    Arguments:
        keywords: Optional existing keywords object.
        format: Format of input if `keywords` refer to a file that can be
            loaded.
        theme: IRI of one of more themes to load keywords for.
        yamlfile: YAML file with keyword definitions to parse.  May also
            be an URI in which case it will be accessed via HTTP GET.
            Deprecated. Use the `add_yaml()` or `add()` methods instead.
        timeout: Timeout in case `yamlfile` is a URI.
    """
    if isinstance(keywords, Keywords):
        kw = keywords
        if theme:
            kw.add_theme(theme, timeout=timeout)
    else:
        kw = Keywords(theme=theme)
        if keywords:
            kw.add(keywords, format=format, timeout=timeout)

    if yamlfile:
        warnings.warn(
            "The `yamlfile` argument is deprecated. Use the `add_yaml()` or "
            "`add()` methods instead.",
            DeprecationWarning,
        )
        kw.load_yaml(yamlfile, timeout=timeout)

    return kw

load_datadoc_schema(ts)

Load schema for data documentation to triplestore ts.

It is safe to call this function more than once.

Source code in tripper/datadoc/keywords.py
def load_datadoc_schema(ts: "Triplestore") -> None:
    """Load schema for data documentation to triplestore `ts`.

    It is safe to call this function more than once.
    """
    if not ts.query(f"ASK WHERE {{ <{DDOC()}> a <{OWL.Ontology}> }}"):
        ts.bind("ddoc", DDOC)
        path = Path(tripper.__file__).parent / "context" / "datadoc.ttl"
        ts.parse(path)

main(argv=None)

Main function providing CLI access to keywords.

Source code in tripper/datadoc/keywords.py
def main(argv=None):
    """Main function providing CLI access to keywords."""
    import argparse  # pylint: disable=import-outside-toplevel

    parser = argparse.ArgumentParser(
        description=(
            "Tool for generation of JSON-LD context and documentation from "
            "keyword definitions."
        )
    )
    parser.add_argument(
        "--input",
        "-i",
        metavar="FILENAME",
        default=[],
        action="append",
        help="Load keywords from this file. May be given multiple times.",
    )
    parser.add_argument(
        "--format",
        "-f",
        metavar="FORMAT",
        nargs="?",
        action="append",
        help=(
            "Formats of --input. Default format is inferred from the file "
            "name extension.  If given, this option must be provided the "
            "same number of times as --input."
        ),
    )
    parser.add_argument(
        "--theme",
        "-t",
        metavar="NAME",
        nargs="?",
        default=[],
        action="append",
        help="Load keywords from this theme.",
    )
    parser.add_argument(
        "--strict",
        action="store_true",
        help="Whether to raise an exception of input contains an unknown key.",
    )
    parser.add_argument(
        "--redefine",
        default="raise",
        choices=["raise", "allow", "skip"],
        help="How to handle redifinition of existing keywords.",
    )
    parser.add_argument(
        "--context",
        "-c",
        metavar="FILENAME",
        help="Generate JSON-LD context file.",
    )
    parser.add_argument(
        "--keywords",
        "-k",
        metavar="FILENAME",
        help="Generate keywords Markdown documentation.",
    )
    parser.add_argument(
        "--explanation",
        "-e",
        action="store_true",
        help="Whether to include explanation in generated documentation.",
    )
    parser.add_argument(
        "--special-keywords",
        "-s",
        action="store_true",
        help="Whether to include special keywords in generated documentation.",
    )
    parser.add_argument(
        "--kw",
        metavar="KW1,KW2,...",
        help=(
            "Comma-separated list of keywords to include in generated table. "
            "Implies --keywords."
        ),
    )
    parser.add_argument(
        "--classes",
        metavar="C1,C2,...",
        help=(
            "Generate keywords Markdown documentation for any keywords who's "
            "domain is in the comma-separated list CLASSES. "
            "Implies --keywords."
        ),
    )
    parser.add_argument(
        "--themes",
        metavar="T1,T2,...",
        help=(
            "Generate keywords Markdown documentation for any keywords that "
            "belong to one of the themes in the comma-separated list THEMES. "
            "Implies --keywords."
        ),
    )
    parser.add_argument(
        "--prefixes",
        "-p",
        metavar="FILENAME",
        help="Generate prefixes Markdown documentation.",
    )
    parser.add_argument(
        "--list-themes",
        action="store_true",
        help="List installed themes and exit.",
    )

    args = parser.parse_args(argv)

    if args.list_themes:
        themes = [ep.value for ep in get_entry_points("tripper.keywords")]
        parser.exit(message=os.linesep.join(themes) + os.linesep)

    if args.format and len(args.format) != len(args.input):
        parser.error(
            "The number of --format options must match the number "
            "of --input options."
        )

    if args.theme:
        default_theme = None if None in args.theme else args.theme[0]
    else:
        default_theme = "ddoc:datadoc"

    kw = Keywords(theme=default_theme)

    for theme in args.theme[1:]:
        if theme:
            kw.add_theme(theme, strict=args.strict, redefine=args.redefine)

    kw.add(args.input, args.format, strict=args.strict, redefine=args.redefine)

    if args.context:
        kw.save_context(args.context)

    if args.keywords or args.kw or args.classes or args.themes:
        kw.save_markdown(
            args.keywords,
            keywords=args.kw.split(",") if args.kw else None,
            classes=args.classes.split(",") if args.classes else None,
            themes=args.themes.split(",") if args.themes else None,
            explanation=args.explanation,
            special=args.special_keywords,
        )

    if args.prefixes:
        kw.save_markdown_prefixes(args.prefixes)