keywords¶

Parse keywords definition and generate context.

`Keywords` ¶

A class representing all keywords within a domain.

Source code in tripper/datadoc/keywords.py

class Keywords:
    """A class representing all keywords within a domain."""

    rootdir = Path(__file__).absolute().parent.parent.parent.resolve()

    def __init__(
        self,
        domain: "Optional[Union[str, Sequence[str]]]" = "default",
        yamlfile: "Optional[Union[FileLoc, Sequence[FileLoc]]]" = None,
        timeout: float = 3,
    ) -> None:
        """Initialises keywords object.

        Arguments:
            domain: Name of one of more domains to load keywords for.
            yamlfile: YAML file with keyword definitions to parse.  May also
                be an URI in which case it will be accessed via HTTP GET.
            timeout: Timeout in case `yamlfile` is a URI.

        Attributes:
            data: The dict loaded from the keyword yamlfile.
            keywords: A dict mapping keywords (name/prefixed/iri) to dicts
                describing the keywords.
            domain: Name of the scientic domain that the keywords belong to.
        """
        self.data = AttrDict()
        self.keywords = AttrDict()
        self.domain = None

        if domain:
            self.add_domain(domain)

        if yamlfile:
            if isinstance(yamlfile, (str, Path)):
                self.parse(yamlfile, timeout=timeout)
            else:
                for path in yamlfile:
                    self.parse(path, timeout=timeout)

    def __contains__(self, item):
        return item in self.keywords

    def __getitem__(self, key):
        return self.keywords[key]

    def __iter__(self):
        return iter(self.keywords)

    def __dir__(self):
        return dir(Keywords) + ["data", "keywords", "domain"]

    def copy(self):
        """Returns a copy of self."""
        new = Keywords(domain=None)
        new.data.update(self.data)
        new.keywords.update(self.keywords)
        new.domain = self.domain
        return new

    def add(self, keywords: "KeywordsType", timeout: float = 3) -> None:
        """Add `keywords` to current keyword object."""

        def _add(kw):
            if kw is None:
                pass
            elif isinstance(kw, Keywords):
                self.data.update(kw.data)
                self.keywords.update(kw.keywords)
                self.domain = merge(self.domain, kw.domain)
            elif isinstance(kw, Path):
                self.parse(kw, timeout=timeout)
            elif isinstance(kw, str):
                if kw.startswith("/") or kw.startswith("./") or is_url(kw):
                    self.parse(kw, timeout=timeout)
                else:
                    self.add_domain(kw, timeout=timeout)
            elif isinstance(kw, Sequence):
                for e in kw:
                    _add(e)
            else:
                raise TypeError(
                    "`keywords` must be a Keywords object, a Path object, "
                    f"a string or a sequence of these.  Got: {type(kw)}"
                )

        _add(keywords)

    def add_domain(
        self, domain: "Union[str, Sequence[str]]", timeout: float = 3
    ) -> None:
        """Add keywords for `domain`, where `domain` is the name of a
        scientific domain or a list of scientific domain names."""
        if isinstance(domain, str):
            domain = [domain]

        for name in domain:  # type: ignore
            if self.domain is None:
                self.domain = name  # type: ignore
            for ep in get_entry_points("tripper.keywords"):
                if ep.value == name:
                    self.parse(
                        self.rootdir / ep.name / "keywords.yaml",
                        timeout=timeout,
                    )
                    break
            else:
                if name == "default":
                    # Fallback in case the entry point is not installed
                    self.parse(
                        self.rootdir
                        / "tripper"
                        / "context"
                        / "0.3"
                        / "keywords.yaml",
                        timeout=timeout,
                    )
                else:
                    raise TypeError(f"Unknown domain name: {name}")

    def parse(
        self,
        yamlfile: "Union[Path, str]",
        timeout: float = 3,
    ) -> None:
        """Parse YAML file with keyword definitions."""
        # pylint: disable=too-many-nested-blocks
        with openfile(yamlfile, timeout=timeout, mode="rt") as f:
            d = yaml.safe_load(f)

        self.add(d.get("basedOn"))
        recursive_update(self.data, d)

        resources = self.data.get("resources", {})
        for resource_name, resource in resources.items():
            for keyword, value in resource.get("keywords", {}).items():

                # Simple validation
                valid_keys = [
                    "name",
                    "iri",
                    "domain",
                    "range",
                    "datatype",
                    "conformance",
                    "description",
                    "usageNote",
                    "default",
                ]
                for k in value.keys():
                    if k not in valid_keys:
                        raise InvalidKeywordError(
                            f"keyword '{keyword}' has invalid key: {k}"
                        )
                valid_conformances = ["mandatory", "recommended", "optional"]
                if "conformance" in value:
                    if value["conformance"] not in valid_conformances:
                        raise InvalidKeywordError(
                            f"keyword '{keyword}' has invalid conformance: "
                            f"'{value['conformance']}'. Valid values are "
                            f"{', '.join(valid_conformances)} "
                            f"when parsing '{yamlfile}'"
                        )

                if keyword in self.keywords:
                    # Only allowed changes to existing keywords:
                    #   - make conformance more strict
                    #   - extend the domain
                    #   - change default value
                    for k, v in self.keywords[keyword].items():
                        if k == "conformance":
                            if "conformance" in value and (
                                valid_conformances.index(value.conformance)
                                > valid_conformances.index(v)
                            ):
                                raise InvalidKeywordError(
                                    f"keyword '{keyword}' reduces strictness "
                                    "of existing conformance: "
                                    f"{value.conformance} when parsing "
                                    f"'{yamlfile}'"
                                )
                            value.setdefault(k, v)
                        elif k == "domain":
                            add(value, "domain", resource_name)
                        elif k == "default":
                            value.setdefault(k, v)
                        elif k in value and value[k] != v:
                            raise RedefineKeywordError(
                                f"Cannot redefine '{k}' in keyword '{keyword}' "
                                f"when parsing '{yamlfile}'"
                            )
                else:
                    value["name"] = keyword
                    add(value, "domain", resource_name)
                    recursive_update(
                        self.keywords,
                        {
                            keyword: value,
                            value["iri"]: value,
                            expand_iri(
                                value["iri"], self.data["prefixes"]
                            ): value,
                        },
                        append=False,
                    )

    def isnested(self, keyword: str) -> bool:
        """Returns whether the keyword corresponds to an object property."""
        d = self.keywords[keyword]
        if "datatype" in d or d.range == "rdfs:Literal":
            return False
        return True

    def expanded(self, keyword: str) -> str:
        """Return the keyword expanded to its full IRI."""
        if keyword in self.keywords:
            iri = self.keywords[keyword].iri
        elif keyword in self.data.resources:
            iri = self.data.resources[keyword].iri
        elif ":" in keyword:
            iri = keyword
        else:
            raise InvalidKeywordError(keyword)
        return expand_iri(iri, self.data.get("prefixes", {}))

    def range(self, keyword: str) -> str:
        """Return the range of the keyword."""
        return self.keywords[keyword].range

    def superclasses(self, cls: str) -> "Union[str, list]":
        """Return a list with `cls` and it superclasses prefixed.

        Example:

        >>> keywords = Keywords()
        >>> keywords.superclasses("Dataset")
        ... # doctest: +NORMALIZE_WHITESPACE
        ['dcat:Dataset',
         'dcat:Resource',
         'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

        >>> keywords.superclasses("dcat:Dataset")
        ... # doctest: +NORMALIZE_WHITESPACE
        ['dcat:Dataset',
         'dcat:Resource',
         'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

        """
        if cls in self.data.resources:
            r = self.data.resources[cls]
        else:
            cls = prefix_iri(cls, self.data.get("prefixes", {}))
            rlst = [r for r in self.data.resources.values() if cls == r.iri]
            if not rlst:
                raise NoSuchTypeError(cls)
            if len(rlst) > 1:
                raise RuntimeError(
                    f"{cls} matches more than one resource: "
                    f"{', '.join(r.iri for r in rlst)}"
                )
            r = rlst[0]

        if "subClassOf" in r:
            if isinstance(r.subClassOf, str):
                return [r.iri, r.subClassOf]
            return [r.iri] + r.subClassOf
        return r.iri

    def keywordname(self, keyword: str) -> str:
        """Return the short name of `keyword`.

        Example:

        >>> keywords = Keywords()
        >>> keywords.keywordname("dcterms:title")
        'title'

        """
        if keyword not in self.keywords:
            raise InvalidKeywordError(keyword)
        return self.keywords[keyword].name

    def typename(self, type) -> str:
        """Return the short name of `type`.

        Example:

        >>> keywords = Keywords()
        >>> keywords.typename("dcat:Dataset")
        'Dataset'

        """
        if type in self.data.resources:
            return type
        prefixed = prefix_iri(type, self.data.prefixes)
        for name, r in self.data.resources.items():
            if prefixed == r.iri:
                return name
        raise NoSuchTypeError(type)

    def get_prefixes(self) -> dict:
        """Return prefixes dict."""
        return self.data.prefixes

    def add_prefix(self, prefix, namespace, replace=False):
        """Bind `prefix` to `namespace`.

        If `namespace` is None, is the prefix removed.

        If `replace` is true, will existing namespace will be overridden.
        """
        if namespace is None:
            del self.data.prefixes[str(prefix)]
        elif replace:
            self.data.prefixes[str(prefix)] = str(namespace)
        else:
            self.data.prefixes.setdefault(str(prefix), str(namespace))

    def get_context(self) -> dict:
        """Return JSON-LD context as a dict.

        Note: The returned dict corresponds to the value of the "@context"
        keyword in a JSON-LD document.
        """
        ctx = {}
        ctx["@version"] = 1.1

        # Add prefixes to context
        prefixes = self.data.get("prefixes", {})
        for prefix, ns in prefixes.items():
            ctx[prefix] = ns

        resources = self.data.get("resources", {})

        # Translate datatypes
        translate = {"rdf:JSON": "@json"}

        # Add keywords (properties) to context
        for resource in resources.values():
            for k, v in resource.get("keywords", {}).items():
                iri = v["iri"]
                if "datatype" in v:
                    dt = v["datatype"]
                    if isinstance(dt, str):
                        dt = translate.get(dt, dt)
                    else:
                        dt = [translate.get(t, t) for t in dt]

                    d = {}
                    if v.get("reverse", "").lower() == "true":
                        d["@reverse"] = iri
                    else:
                        d["@id"] = iri

                    if dt == "rdf:langString" or "language" in v:
                        d["@language"] = v.get("language", "en")
                    else:
                        d["@type"] = dt

                    ctx[k] = d  # type: ignore
                elif v["range"] == "rdfs:Literal":
                    ctx[k] = iri
                else:
                    ctx[k] = {  # type: ignore
                        "@id": iri,
                        "@type": "@id",
                    }

        # Add resources (classes) to context
        for k, v in resources.items():
            ctx.setdefault(k, v.iri)

        return ctx

    def write_context(self, outfile: "FileLoc") -> None:
        """Write JSON-LD context file."""
        context = {"@context": self.get_context()}
        with open(outfile, "wt", encoding="utf-8") as f:
            json.dump(context, f, indent=2)
            f.write(os.linesep)

    def write_doc_keywords(self, outfile: "FileLoc") -> None:
        """Write Markdown file with documentation of the keywords."""
        # pylint: disable=too-many-locals,too-many-branches
        ts = Triplestore("rdflib")
        for prefix, ns in self.data.get("prefixes", {}).items():
            ts.bind(prefix, ns)

        domain = f" for domain: {self.domain}" if self.domain else ""
        out = [
            "<!-- Do not edit! This file is generated with Tripper. "
            "Edit the keywords.yaml file instead. -->",
            "",
            f"# Keywords{domain}",
            f"The tables below lists the keywords the domain {self.domain}.",
            "",
            "The meaning of the columns are as follows:",
            "",
            "- **Keyword**: The keyword referring to a property used for "
            "the data documentation.",
            "- **Range**: Refer to the class for the values of the keyword.",
            "- **Conformance**: Whether the keyword is mandatory, recommended "
            "or optional when documenting the given type of resources.",
            "- **Definition**: The definition of the keyword.",
            "- **Usage note**: Notes about how to use the keyword.",
            "",
            "## Special keywords (from JSON-LD)",
            "See the [JSON-LD specification] for more details.",
            "",
            # pylint: disable=line-too-long
            "| Keyword    | Range         | Conformance | Definition                                                              | Usage note |",
            "|------------|---------------|-------------|-------------------------------------------------------------------------|------------|",
            "| [@id]      | IRI           | mandatory   | IRI identifying the resource to document.                               |            |",
            "| [@type]    | IRI           | recommended | Ontological class defining the class of a node.                         |            |",
            "| [@context] | dict&#124list | optional    | Context defining namespace prefixes and additional keywords.            |            |",
            "| [@base]    | namespace     | optional    | Base IRI against which relative IRIs are resolved.                      |            |",
            "| [@vocab]   | namespace     | optional    | Used to expand properties and values in @type with a common prefix IRI. |            |",
            "| [@graph]   | list          | optional    | Used for documenting multiple resources.                                |            |",
            "",
        ]
        order = {"mandatory": 1, "recommended": 2, "optional": 3}
        refs = []

        resources = self.data.get("resources", {})
        for resource_name, resource in resources.items():
            out.append("")
            out.append(f"## Properties on [{resource_name}]")
            if "description" in resource:
                out.append(resource.description)
            if "subClassOf" in resource:
                out.append("")
                subcl = (
                    [resource.subClassOf]
                    if isinstance(resource.subClassOf, str)
                    else resource.subClassOf
                )
                out.append(
                    f"- subClassOf: {', '.join(f'[{sc}]' for sc in subcl)}"
                )
                for sc in subcl:
                    refs.append(f"[{sc}]: {ts.expand_iri(sc)}")
            if "iri" in resource:
                refs.append(
                    f"[{resource_name}]: {ts.expand_iri(resource.iri)}"
                )
            header = [
                "Keyword",
                "Range",
                "Conformance",
                "Definition",
                "Usage note",
            ]
            table = []
            for keyword, d in resource.get("keywords", {}).items():
                rangestr = f"[{d.range}]" if "range" in d else ""
                if "datatype" in d:
                    rangestr += (
                        ", " + ", ".join(d.datatype)
                        if isinstance(d.datatype, list)
                        else f"<br>({d.datatype})"
                    )
                table.append(
                    [
                        f"[{keyword}]",
                        rangestr,
                        f"{d.conformance}" if "conformance" in d else "",
                        f"{d.description}" if "description" in d else "",
                        f"{d.usageNote}" if "usageNote" in d else "",
                    ]
                )
                refs.append(f"[{keyword}]: {ts.expand_iri(d.iri)}")
                if "range" in d:
                    refs.append(f"[{d.range}]: {ts.expand_iri(d.range)}")
            table.sort(key=lambda row: order.get(row[2], 10))
            out.extend(self._to_table(header, table))
            out.append("")

        # References
        extra_refs = [
            # pylint: disable=line-too-long
            "[@id]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@type]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@context]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@base]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@vocab]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
            "[@graph]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        ]
        refs.extend(extra_refs)
        out.append("")
        out.append("")
        out.append("")
        out.extend(refs)
        with open(outfile, "wt", encoding="utf-8") as f:
            f.write("\n".join(out) + "\n")

    def write_doc_prefixes(self, outfile: "FileLoc") -> None:
        """Write Markdown file with documentation of the prefixes."""
        out = [
            "# Predefined prefixes",
            (
                "All namespace prefixes listed on this page are defined in "
                "the [default JSON-LD context]."
            ),
            (
                "See [User-defined prefixes] for how to extend this list "
                "with additional namespace prefixes."
            ),
        ]
        rows = [
            [prefix, ns]
            for prefix, ns in self.data.get("prefixes", {}).items()
        ]
        out.extend(self._to_table(["Prefix", "Namespace"], rows))
        out.append("")
        out.append("")
        out.extend(
            [
                # pylint: disable=line-too-long
                "[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.3/context.json",
                "[User-defined prefixes]: customisation.md/#user-defined-prefixes",
            ]
        )
        with open(outfile, "wt", encoding="utf-8") as f:
            f.write("\n".join(out) + "\n")

    def _to_table(self, header, rows):
        """Return header and rows as a ."""

        widths = [len(h) for h in header]
        for row in rows:
            for i, col in enumerate(row):
                n = len(col)
                if n > widths[i]:
                    widths[i] = n

        lines = []
        empty = ""
        if rows:
            lines.append("")
            lines.append(
                "| "
                + " | ".join(
                    f"{head:{widths[i]}}" for i, head in enumerate(header)
                )
                + " |"
            )
            lines.append(
                "| "
                + " | ".join(
                    f"{empty:-<{widths[i]}}" for i in range(len(header))
                )
                + " |"
            )
            for row in rows:
                lines.append(
                    "| "
                    + " | ".join(
                        f"{col:{widths[i]}}" for i, col in enumerate(row)
                    )
                    + " |"
                )

        return lines

`init(self, domain='default', yamlfile=None, timeout=3)` `special` ¶

Initialises keywords object.

Parameters:

Name	Type	Description	Default
`domain`	`Optional[Union[str, Sequence[str]]]`	Name of one of more domains to load keywords for.	`'default'`
`yamlfile`	`Optional[Union[FileLoc, Sequence[FileLoc]]]`	YAML file with keyword definitions to parse. May also be an URI in which case it will be accessed via HTTP GET.	`None`
`timeout`	`float`	Timeout in case `yamlfile` is a URI.	`3`

Attributes:

Name	Type	Description
`data`		The dict loaded from the keyword yamlfile.
`keywords`		A dict mapping keywords (name/prefixed/iri) to dicts describing the keywords.
`domain`		Name of the scientic domain that the keywords belong to.

Source code in tripper/datadoc/keywords.py

def __init__(
    self,
    domain: "Optional[Union[str, Sequence[str]]]" = "default",
    yamlfile: "Optional[Union[FileLoc, Sequence[FileLoc]]]" = None,
    timeout: float = 3,
) -> None:
    """Initialises keywords object.

    Arguments:
        domain: Name of one of more domains to load keywords for.
        yamlfile: YAML file with keyword definitions to parse.  May also
            be an URI in which case it will be accessed via HTTP GET.
        timeout: Timeout in case `yamlfile` is a URI.

    Attributes:
        data: The dict loaded from the keyword yamlfile.
        keywords: A dict mapping keywords (name/prefixed/iri) to dicts
            describing the keywords.
        domain: Name of the scientic domain that the keywords belong to.
    """
    self.data = AttrDict()
    self.keywords = AttrDict()
    self.domain = None

    if domain:
        self.add_domain(domain)

    if yamlfile:
        if isinstance(yamlfile, (str, Path)):
            self.parse(yamlfile, timeout=timeout)
        else:
            for path in yamlfile:
                self.parse(path, timeout=timeout)

`add(self, keywords, timeout=3)` ¶

Add keywords to current keyword object.

Source code in tripper/datadoc/keywords.py

def add(self, keywords: "KeywordsType", timeout: float = 3) -> None:
    """Add `keywords` to current keyword object."""

    def _add(kw):
        if kw is None:
            pass
        elif isinstance(kw, Keywords):
            self.data.update(kw.data)
            self.keywords.update(kw.keywords)
            self.domain = merge(self.domain, kw.domain)
        elif isinstance(kw, Path):
            self.parse(kw, timeout=timeout)
        elif isinstance(kw, str):
            if kw.startswith("/") or kw.startswith("./") or is_url(kw):
                self.parse(kw, timeout=timeout)
            else:
                self.add_domain(kw, timeout=timeout)
        elif isinstance(kw, Sequence):
            for e in kw:
                _add(e)
        else:
            raise TypeError(
                "`keywords` must be a Keywords object, a Path object, "
                f"a string or a sequence of these.  Got: {type(kw)}"
            )

    _add(keywords)

`add_domain(self, domain, timeout=3)` ¶

Add keywords for domain, where domain is the name of a scientific domain or a list of scientific domain names.

Source code in tripper/datadoc/keywords.py

def add_domain(
    self, domain: "Union[str, Sequence[str]]", timeout: float = 3
) -> None:
    """Add keywords for `domain`, where `domain` is the name of a
    scientific domain or a list of scientific domain names."""
    if isinstance(domain, str):
        domain = [domain]

    for name in domain:  # type: ignore
        if self.domain is None:
            self.domain = name  # type: ignore
        for ep in get_entry_points("tripper.keywords"):
            if ep.value == name:
                self.parse(
                    self.rootdir / ep.name / "keywords.yaml",
                    timeout=timeout,
                )
                break
        else:
            if name == "default":
                # Fallback in case the entry point is not installed
                self.parse(
                    self.rootdir
                    / "tripper"
                    / "context"
                    / "0.3"
                    / "keywords.yaml",
                    timeout=timeout,
                )
            else:
                raise TypeError(f"Unknown domain name: {name}")

`add_prefix(self, prefix, namespace, replace=False)` ¶

Bind prefix to namespace.

If namespace is None, is the prefix removed.

If replace is true, will existing namespace will be overridden.

Source code in tripper/datadoc/keywords.py

def add_prefix(self, prefix, namespace, replace=False):
    """Bind `prefix` to `namespace`.

    If `namespace` is None, is the prefix removed.

    If `replace` is true, will existing namespace will be overridden.
    """
    if namespace is None:
        del self.data.prefixes[str(prefix)]
    elif replace:
        self.data.prefixes[str(prefix)] = str(namespace)
    else:
        self.data.prefixes.setdefault(str(prefix), str(namespace))

`copy(self)` ¶

Returns a copy of self.

Source code in tripper/datadoc/keywords.py

def copy(self):
    """Returns a copy of self."""
    new = Keywords(domain=None)
    new.data.update(self.data)
    new.keywords.update(self.keywords)
    new.domain = self.domain
    return new

`expanded(self, keyword)` ¶

Return the keyword expanded to its full IRI.

Source code in tripper/datadoc/keywords.py

def expanded(self, keyword: str) -> str:
    """Return the keyword expanded to its full IRI."""
    if keyword in self.keywords:
        iri = self.keywords[keyword].iri
    elif keyword in self.data.resources:
        iri = self.data.resources[keyword].iri
    elif ":" in keyword:
        iri = keyword
    else:
        raise InvalidKeywordError(keyword)
    return expand_iri(iri, self.data.get("prefixes", {}))

`get_context(self)` ¶

Return JSON-LD context as a dict.

Note: The returned dict corresponds to the value of the "@context" keyword in a JSON-LD document.

Source code in tripper/datadoc/keywords.py

def get_context(self) -> dict:
    """Return JSON-LD context as a dict.

    Note: The returned dict corresponds to the value of the "@context"
    keyword in a JSON-LD document.
    """
    ctx = {}
    ctx["@version"] = 1.1

    # Add prefixes to context
    prefixes = self.data.get("prefixes", {})
    for prefix, ns in prefixes.items():
        ctx[prefix] = ns

    resources = self.data.get("resources", {})

    # Translate datatypes
    translate = {"rdf:JSON": "@json"}

    # Add keywords (properties) to context
    for resource in resources.values():
        for k, v in resource.get("keywords", {}).items():
            iri = v["iri"]
            if "datatype" in v:
                dt = v["datatype"]
                if isinstance(dt, str):
                    dt = translate.get(dt, dt)
                else:
                    dt = [translate.get(t, t) for t in dt]

                d = {}
                if v.get("reverse", "").lower() == "true":
                    d["@reverse"] = iri
                else:
                    d["@id"] = iri

                if dt == "rdf:langString" or "language" in v:
                    d["@language"] = v.get("language", "en")
                else:
                    d["@type"] = dt

                ctx[k] = d  # type: ignore
            elif v["range"] == "rdfs:Literal":
                ctx[k] = iri
            else:
                ctx[k] = {  # type: ignore
                    "@id": iri,
                    "@type": "@id",
                }

    # Add resources (classes) to context
    for k, v in resources.items():
        ctx.setdefault(k, v.iri)

    return ctx

`get_prefixes(self)` ¶

Return prefixes dict.

Source code in tripper/datadoc/keywords.py

def get_prefixes(self) -> dict:
    """Return prefixes dict."""
    return self.data.prefixes

`isnested(self, keyword)` ¶

Returns whether the keyword corresponds to an object property.

Source code in tripper/datadoc/keywords.py

def isnested(self, keyword: str) -> bool:
    """Returns whether the keyword corresponds to an object property."""
    d = self.keywords[keyword]
    if "datatype" in d or d.range == "rdfs:Literal":
        return False
    return True

`keywordname(self, keyword)` ¶

Return the short name of keyword.

Examples:

keywords = Keywords() keywords.keywordname("dcterms:title") 'title'

Source code in tripper/datadoc/keywords.py

def keywordname(self, keyword: str) -> str:
    """Return the short name of `keyword`.

    Example:

    >>> keywords = Keywords()
    >>> keywords.keywordname("dcterms:title")
    'title'

    """
    if keyword not in self.keywords:
        raise InvalidKeywordError(keyword)
    return self.keywords[keyword].name

`parse(self, yamlfile, timeout=3)` ¶

Parse YAML file with keyword definitions.

Source code in tripper/datadoc/keywords.py

def parse(
    self,
    yamlfile: "Union[Path, str]",
    timeout: float = 3,
) -> None:
    """Parse YAML file with keyword definitions."""
    # pylint: disable=too-many-nested-blocks
    with openfile(yamlfile, timeout=timeout, mode="rt") as f:
        d = yaml.safe_load(f)

    self.add(d.get("basedOn"))
    recursive_update(self.data, d)

    resources = self.data.get("resources", {})
    for resource_name, resource in resources.items():
        for keyword, value in resource.get("keywords", {}).items():

            # Simple validation
            valid_keys = [
                "name",
                "iri",
                "domain",
                "range",
                "datatype",
                "conformance",
                "description",
                "usageNote",
                "default",
            ]
            for k in value.keys():
                if k not in valid_keys:
                    raise InvalidKeywordError(
                        f"keyword '{keyword}' has invalid key: {k}"
                    )
            valid_conformances = ["mandatory", "recommended", "optional"]
            if "conformance" in value:
                if value["conformance"] not in valid_conformances:
                    raise InvalidKeywordError(
                        f"keyword '{keyword}' has invalid conformance: "
                        f"'{value['conformance']}'. Valid values are "
                        f"{', '.join(valid_conformances)} "
                        f"when parsing '{yamlfile}'"
                    )

            if keyword in self.keywords:
                # Only allowed changes to existing keywords:
                #   - make conformance more strict
                #   - extend the domain
                #   - change default value
                for k, v in self.keywords[keyword].items():
                    if k == "conformance":
                        if "conformance" in value and (
                            valid_conformances.index(value.conformance)
                            > valid_conformances.index(v)
                        ):
                            raise InvalidKeywordError(
                                f"keyword '{keyword}' reduces strictness "
                                "of existing conformance: "
                                f"{value.conformance} when parsing "
                                f"'{yamlfile}'"
                            )
                        value.setdefault(k, v)
                    elif k == "domain":
                        add(value, "domain", resource_name)
                    elif k == "default":
                        value.setdefault(k, v)
                    elif k in value and value[k] != v:
                        raise RedefineKeywordError(
                            f"Cannot redefine '{k}' in keyword '{keyword}' "
                            f"when parsing '{yamlfile}'"
                        )
            else:
                value["name"] = keyword
                add(value, "domain", resource_name)
                recursive_update(
                    self.keywords,
                    {
                        keyword: value,
                        value["iri"]: value,
                        expand_iri(
                            value["iri"], self.data["prefixes"]
                        ): value,
                    },
                    append=False,
                )

`range(self, keyword)` ¶

Return the range of the keyword.

Source code in tripper/datadoc/keywords.py

def range(self, keyword: str) -> str:
    """Return the range of the keyword."""
    return self.keywords[keyword].range

`superclasses(self, cls)` ¶

Return a list with cls and it superclasses prefixed.

Examples:

keywords = Keywords() keywords.superclasses("Dataset") ... # doctest: +NORMALIZE_WHITESPACE ['dcat:Dataset', 'dcat:Resource', 'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

keywords.superclasses("dcat:Dataset") ... # doctest: +NORMALIZE_WHITESPACE ['dcat:Dataset', 'dcat:Resource', 'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

Source code in tripper/datadoc/keywords.py

def superclasses(self, cls: str) -> "Union[str, list]":
    """Return a list with `cls` and it superclasses prefixed.

    Example:

    >>> keywords = Keywords()
    >>> keywords.superclasses("Dataset")
    ... # doctest: +NORMALIZE_WHITESPACE
    ['dcat:Dataset',
     'dcat:Resource',
     'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

    >>> keywords.superclasses("dcat:Dataset")
    ... # doctest: +NORMALIZE_WHITESPACE
    ['dcat:Dataset',
     'dcat:Resource',
     'emmo:EMMO_194e367c_9783_4bf5_96d0_9ad597d48d9a']

    """
    if cls in self.data.resources:
        r = self.data.resources[cls]
    else:
        cls = prefix_iri(cls, self.data.get("prefixes", {}))
        rlst = [r for r in self.data.resources.values() if cls == r.iri]
        if not rlst:
            raise NoSuchTypeError(cls)
        if len(rlst) > 1:
            raise RuntimeError(
                f"{cls} matches more than one resource: "
                f"{', '.join(r.iri for r in rlst)}"
            )
        r = rlst[0]

    if "subClassOf" in r:
        if isinstance(r.subClassOf, str):
            return [r.iri, r.subClassOf]
        return [r.iri] + r.subClassOf
    return r.iri

`typename(self, type)` ¶

Return the short name of type.

Examples:

keywords = Keywords() keywords.typename("dcat:Dataset") 'Dataset'

Source code in tripper/datadoc/keywords.py

def typename(self, type) -> str:
    """Return the short name of `type`.

    Example:

    >>> keywords = Keywords()
    >>> keywords.typename("dcat:Dataset")
    'Dataset'

    """
    if type in self.data.resources:
        return type
    prefixed = prefix_iri(type, self.data.prefixes)
    for name, r in self.data.resources.items():
        if prefixed == r.iri:
            return name
    raise NoSuchTypeError(type)

`write_context(self, outfile)` ¶

Write JSON-LD context file.

Source code in tripper/datadoc/keywords.py

def write_context(self, outfile: "FileLoc") -> None:
    """Write JSON-LD context file."""
    context = {"@context": self.get_context()}
    with open(outfile, "wt", encoding="utf-8") as f:
        json.dump(context, f, indent=2)
        f.write(os.linesep)

`write_doc_keywords(self, outfile)` ¶

Write Markdown file with documentation of the keywords.

Source code in tripper/datadoc/keywords.py

def write_doc_keywords(self, outfile: "FileLoc") -> None:
    """Write Markdown file with documentation of the keywords."""
    # pylint: disable=too-many-locals,too-many-branches
    ts = Triplestore("rdflib")
    for prefix, ns in self.data.get("prefixes", {}).items():
        ts.bind(prefix, ns)

    domain = f" for domain: {self.domain}" if self.domain else ""
    out = [
        "<!-- Do not edit! This file is generated with Tripper. "
        "Edit the keywords.yaml file instead. -->",
        "",
        f"# Keywords{domain}",
        f"The tables below lists the keywords the domain {self.domain}.",
        "",
        "The meaning of the columns are as follows:",
        "",
        "- **Keyword**: The keyword referring to a property used for "
        "the data documentation.",
        "- **Range**: Refer to the class for the values of the keyword.",
        "- **Conformance**: Whether the keyword is mandatory, recommended "
        "or optional when documenting the given type of resources.",
        "- **Definition**: The definition of the keyword.",
        "- **Usage note**: Notes about how to use the keyword.",
        "",
        "## Special keywords (from JSON-LD)",
        "See the [JSON-LD specification] for more details.",
        "",
        # pylint: disable=line-too-long
        "| Keyword    | Range         | Conformance | Definition                                                              | Usage note |",
        "|------------|---------------|-------------|-------------------------------------------------------------------------|------------|",
        "| [@id]      | IRI           | mandatory   | IRI identifying the resource to document.                               |            |",
        "| [@type]    | IRI           | recommended | Ontological class defining the class of a node.                         |            |",
        "| [@context] | dict&#124list | optional    | Context defining namespace prefixes and additional keywords.            |            |",
        "| [@base]    | namespace     | optional    | Base IRI against which relative IRIs are resolved.                      |            |",
        "| [@vocab]   | namespace     | optional    | Used to expand properties and values in @type with a common prefix IRI. |            |",
        "| [@graph]   | list          | optional    | Used for documenting multiple resources.                                |            |",
        "",
    ]
    order = {"mandatory": 1, "recommended": 2, "optional": 3}
    refs = []

    resources = self.data.get("resources", {})
    for resource_name, resource in resources.items():
        out.append("")
        out.append(f"## Properties on [{resource_name}]")
        if "description" in resource:
            out.append(resource.description)
        if "subClassOf" in resource:
            out.append("")
            subcl = (
                [resource.subClassOf]
                if isinstance(resource.subClassOf, str)
                else resource.subClassOf
            )
            out.append(
                f"- subClassOf: {', '.join(f'[{sc}]' for sc in subcl)}"
            )
            for sc in subcl:
                refs.append(f"[{sc}]: {ts.expand_iri(sc)}")
        if "iri" in resource:
            refs.append(
                f"[{resource_name}]: {ts.expand_iri(resource.iri)}"
            )
        header = [
            "Keyword",
            "Range",
            "Conformance",
            "Definition",
            "Usage note",
        ]
        table = []
        for keyword, d in resource.get("keywords", {}).items():
            rangestr = f"[{d.range}]" if "range" in d else ""
            if "datatype" in d:
                rangestr += (
                    ", " + ", ".join(d.datatype)
                    if isinstance(d.datatype, list)
                    else f"<br>({d.datatype})"
                )
            table.append(
                [
                    f"[{keyword}]",
                    rangestr,
                    f"{d.conformance}" if "conformance" in d else "",
                    f"{d.description}" if "description" in d else "",
                    f"{d.usageNote}" if "usageNote" in d else "",
                ]
            )
            refs.append(f"[{keyword}]: {ts.expand_iri(d.iri)}")
            if "range" in d:
                refs.append(f"[{d.range}]: {ts.expand_iri(d.range)}")
        table.sort(key=lambda row: order.get(row[2], 10))
        out.extend(self._to_table(header, table))
        out.append("")

    # References
    extra_refs = [
        # pylint: disable=line-too-long
        "[@id]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@type]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@context]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@base]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@vocab]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
        "[@graph]: https://www.w3.org/TR/json-ld11/#syntax-tokens-and-keywords",
    ]
    refs.extend(extra_refs)
    out.append("")
    out.append("")
    out.append("")
    out.extend(refs)
    with open(outfile, "wt", encoding="utf-8") as f:
        f.write("\n".join(out) + "\n")

`write_doc_prefixes(self, outfile)` ¶

Write Markdown file with documentation of the prefixes.

Source code in tripper/datadoc/keywords.py

def write_doc_prefixes(self, outfile: "FileLoc") -> None:
    """Write Markdown file with documentation of the prefixes."""
    out = [
        "# Predefined prefixes",
        (
            "All namespace prefixes listed on this page are defined in "
            "the [default JSON-LD context]."
        ),
        (
            "See [User-defined prefixes] for how to extend this list "
            "with additional namespace prefixes."
        ),
    ]
    rows = [
        [prefix, ns]
        for prefix, ns in self.data.get("prefixes", {}).items()
    ]
    out.extend(self._to_table(["Prefix", "Namespace"], rows))
    out.append("")
    out.append("")
    out.extend(
        [
            # pylint: disable=line-too-long
            "[default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.3/context.json",
            "[User-defined prefixes]: customisation.md/#user-defined-prefixes",
        ]
    )
    with open(outfile, "wt", encoding="utf-8") as f:
        f.write("\n".join(out) + "\n")

`get_keywords(keywords=None, domain='default', yamlfile=None, timeout=3)` ¶

A convinient function that returns an Context instance.

Parameters:

Name	Type	Description	Default
`keywords`	`Optional[KeywordsType]`	Optional existing keywords object.	`None`
`domain`	`Optional[Union[str, Sequence[str]]]`	Name of one of more domains to load keywords for.	`'default'`
`yamlfile`	`Optional[Union[FileLoc, Sequence[FileLoc]]]`	YAML file with keyword definitions to parse. May also be an URI in which case it will be accessed via HTTP GET.	`None`
`timeout`	`float`	Timeout in case `yamlfile` is a URI.	`3`

Source code in tripper/datadoc/keywords.py

@lru_cache(maxsize=32)
def get_keywords(
    keywords: "Optional[KeywordsType]" = None,
    domain: "Optional[Union[str, Sequence[str]]]" = "default",
    yamlfile: "Optional[Union[FileLoc, Sequence[FileLoc]]]" = None,
    timeout: float = 3,
) -> "Keywords":
    """A convinient function that returns an Context instance.

    Arguments:
        keywords: Optional existing keywords object.
        domain: Name of one of more domains to load keywords for.
        yamlfile: YAML file with keyword definitions to parse.  May also
            be an URI in which case it will be accessed via HTTP GET.
        timeout: Timeout in case `yamlfile` is a URI.
    """
    kw = Keywords(domain=domain, yamlfile=yamlfile, timeout=timeout)
    if keywords:
        kw.add(keywords, timeout=timeout)
    return kw

`main(argv=None)` ¶

Main function providing CLI access to keywords.

Source code in tripper/datadoc/keywords.py

def main(argv=None):
    """Main function providing CLI access to keywords."""
    import argparse  # pylint: disable=import-outside-toplevel

    parser = argparse.ArgumentParser(
        description=(
            "Tool for generation of JSON-LD context and documentation from "
            "keyword definitions."
        )
    )
    parser.add_argument(
        "--yamlfile",
        "-i",
        metavar="YAMLFILE",
        action="append",
        help="Load keywords from this YAML file.",
    )
    parser.add_argument(
        "--domain",
        "-f",
        metavar="NAME",
        action="append",
        help="Load keywords from this domain.",
    )
    parser.add_argument(
        "--context",
        "-c",
        metavar="FILENAME",
        help="Generate JSON-LD context file.",
    )
    parser.add_argument(
        "--keywords",
        "-k",
        metavar="FILENAME",
        help="Generate keywords Markdown documentation.",
    )
    parser.add_argument(
        "--prefixes",
        "-p",
        metavar="FILENAME",
        help="Generate prefixes Markdown documentation.",
    )
    args = parser.parse_args(argv)

    if not args.domain and not args.yamlfile:
        args.domain = "default"

    keywords = Keywords(domain=args.domain, yamlfile=args.yamlfile)

    if args.context:
        keywords.write_context(args.context)

    if args.keywords:
        keywords.write_doc_keywords(args.keywords)

    if args.prefixes:
        keywords.write_doc_prefixes(args.prefixes)

keywords¶

Keywords ¶

__init__(self, domain='default', yamlfile=None, timeout=3) special ¶

add(self, keywords, timeout=3) ¶

add_domain(self, domain, timeout=3) ¶

add_prefix(self, prefix, namespace, replace=False) ¶

copy(self) ¶

expanded(self, keyword) ¶

get_context(self) ¶

get_prefixes(self) ¶

isnested(self, keyword) ¶

keywordname(self, keyword) ¶

parse(self, yamlfile, timeout=3) ¶

range(self, keyword) ¶

superclasses(self, cls) ¶

typename(self, type) ¶

write_context(self, outfile) ¶

write_doc_keywords(self, outfile) ¶

write_doc_prefixes(self, outfile) ¶

get_keywords(keywords=None, domain='default', yamlfile=None, timeout=3) ¶

main(argv=None) ¶

`Keywords` ¶

`init(self, domain='default', yamlfile=None, timeout=3)` `special` ¶

`add(self, keywords, timeout=3)` ¶

`add_domain(self, domain, timeout=3)` ¶

`add_prefix(self, prefix, namespace, replace=False)` ¶

`copy(self)` ¶

`expanded(self, keyword)` ¶

`get_context(self)` ¶

`get_prefixes(self)` ¶

`isnested(self, keyword)` ¶

`keywordname(self, keyword)` ¶

`parse(self, yamlfile, timeout=3)` ¶

`range(self, keyword)` ¶

`superclasses(self, cls)` ¶

`typename(self, type)` ¶

`write_context(self, outfile)` ¶

`write_doc_keywords(self, outfile)` ¶

`write_doc_prefixes(self, outfile)` ¶

`get_keywords(keywords=None, domain='default', yamlfile=None, timeout=3)` ¶

`main(argv=None)` ¶