Skip to content

rdflib

Backend for RDFLib.

For developers: The usage of s, p, and o represent the different parts of an RDF Triple: subject, predicate, and object.

There is a issue with rdflib raising an urllib.error.HTTPError exception if you don't have write permissions to the cache directory. See Known issues for more details.

RdflibStrategy

Triplestore strategy for rdflib.

Parameters:

Name Type Description Default
base_iri Optional[str]

Unused by the rdflib backend. The base_iri argument is still used for encapsulating the Triplestore class.

None
database Optional[str]

Unused - rdflib does not support multiple databases.

None
triplestore_url Optional[str]

If given, initialise the triplestore from this storage. When close() is called, the storage will be overwritten with the current content of the triplestore.

None
format Optional[str]

Format of storage specified with base_iri.

None
graph Optional[Graph]

A rdflib.Graph instance to expose with tripper, instead of creating a new empty Graph object.

None
Source code in tripper/backends/rdflib.py
class RdflibStrategy:
    """Triplestore strategy for rdflib.

    Arguments:
        base_iri: Unused by the rdflib backend.  The `base_iri` argument is
            still used for encapsulating the Triplestore class.
        database: Unused - rdflib does not support multiple databases.
        triplestore_url: If given, initialise the triplestore from this
            storage.  When `close()` is called, the storage will be
            overwritten with the current content of the triplestore.
        format: Format of storage specified with `base_iri`.
        graph: A rdflib.Graph instance to expose with tripper, instead of
            creating a new empty Graph object.
    """

    def __init__(
        self,
        base_iri: "Optional[str]" = None,  # pylint: disable=unused-argument
        database: "Optional[str]" = None,
        triplestore_url: "Optional[str]" = None,
        format: "Optional[str]" = None,  # pylint: disable=redefined-builtin
        graph: "Optional[Graph]" = None,
    ) -> None:
        # Note that although `base_iri` is unused in this backend, it may
        # still be used by calling Triplestore object.
        if database:
            warnings.warn("database", UnusedArgumentWarning, stacklevel=3)

        self.graph = graph if graph else Graph()
        self.triplestore_url = triplestore_url
        if self.triplestore_url is not None:
            if format is None:
                format = guess_format(self.triplestore_url)
            self.parse(location=self.triplestore_url, format=format)
        self.base_format = format

    def triples(self, triple: "Triple") -> "Generator[Triple, None, None]":
        """Returns a generator over matching triples."""
        return _convert_triples_to_tripper(
            self.graph.triples(totriple(triple))
        )

    def add_triples(self, triples: "Sequence[Triple]"):
        """Add a sequence of triples."""
        for triple in triples:
            self.graph.add(totriple(triple))

    def remove(self, triple: "Triple"):
        """Remove all matching triples from the backend."""
        self.graph.remove(totriple(triple))

    # Optional methods
    def close(self):
        """Close the internal RDFLib graph."""
        if self.triplestore_url:
            self.serialize(
                destination=self.triplestore_url, format=self.base_format
            )
        self.graph.close()

    def parse(
        self,
        source=None,
        location=None,
        data=None,
        format=None,  # pylint: disable=redefined-builtin
        **kwargs,
    ):
        """Parse source and add the resulting triples to triplestore.

        The source is specified using one of `source`, `location` or `data`.

        Parameters:
            source: File-like object or file name.
            location: String with relative or absolute URL to source.
            data: String containing the data to be parsed.
            format: Needed if format can not be inferred from source.
            kwargs: Additional less used keyword arguments.
                See https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.Graph.parse
        """
        self.graph.parse(
            source=source,
            location=location,
            data=data,
            format=format,
            **kwargs,
        )

    def serialize(
        self,
        destination=None,
        format="turtle",  # pylint: disable=redefined-builtin
        **kwargs,
    ) -> "Union[None, str]":
        """Serialise to destination.

        Parameters:
            destination: File name or object to write to. If None, the serialisation is
                returned.
            format: Format to serialise as. Supported formats, depends on the backend.
            kwargs: Passed to the rdflib.Graph.serialize() method.
                See https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.Graph.serialize

        Returns:
            Serialised string if `destination` is None.
        """
        result = self.graph.serialize(
            destination=destination, format=format, **kwargs
        )
        if destination is None:
            # Depending on the version of rdflib the return value of
            # graph.serialize() man either be a string or a bytes object...
            return result if isinstance(result, str) else result.decode()
        return None

    def query(
        self, query_object, **kwargs
    ) -> "Union[List[Tuple[str, ...]], bool, Generator[Triple, None, None]]":
        """SPARQL query.

        Parameters:
            query_object: String with the SPARQL query.
            kwargs: Keyword arguments passed to rdflib.Graph.query().

        Returns:
            The return type depends on type of query:
              - SELECT: list of tuples of IRIs for each matching row
              - ASK: bool
              - CONSTRUCT, DESCRIBE: generator over triples

            For more info, see
            https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result
        """
        result = self.graph.query(query_object=query_object, **kwargs)

        # The type of the result object depends not only on the type of query,
        # but also on the version of rdflib...  We try to be general here.
        if hasattr(result, "type"):
            resulttype = result.type
        elif result.__class__.__name__ == "ResultRow":
            resulttype = "SELECT"
        elif isinstance(result, bool):
            resulttype = "ASK"
        elif isinstance(result, Generator):
            resulttype = "CONSTRUCT"  # also DESCRIBE
        else:
            warnings.warn(
                "Unknown return type from rdflib.query(). Return it unprocessed."
            )
            return result  # type: ignore

        if resulttype == "SELECT":
            return [tuple(str(v) for v in row) for row in result]  # type: ignore
        if resulttype == "ASK":
            return bool(result)
        if resulttype in ("CONSTRUCT", "DESCRIBE"):
            return _convert_triples_to_tripper(result)
        assert False, "should never be reached"  # nosec

    def update(self, update_object, **kwargs) -> None:
        """Update triplestore with SPARQL.

        Parameters:
            update_object: String with the SPARQL query.
            kwargs: Keyword arguments passed to rdflib.Graph.update().

        Note:
            This method is intended for INSERT and DELETE queries. Use
            the query() method for SELECT queries.

        """
        return self.graph.update(update_object=update_object, **kwargs)

    def bind(self, prefix: str, namespace: str):
        """Bind prefix to namespace.

        Should only be defined if the backend supports namespaces.
        Called by triplestore.bind().
        """
        if namespace:
            self.graph.bind(prefix, namespace, replace=True)
        else:
            warnings.warn(
                "rdflib does not support removing namespace prefixes"
            )

    def namespaces(self) -> dict:
        """Returns a dict mapping prefixes to namespaces.

        Should only be defined if the backend supports namespaces.
        Used by triplestore.parse() to get prefixes after reading
        triples from an external source.
        """
        return {
            prefix: str(namespace)
            for prefix, namespace in self.graph.namespaces()
        }

add_triples(self, triples)

Add a sequence of triples.

Source code in tripper/backends/rdflib.py
def add_triples(self, triples: "Sequence[Triple]"):
    """Add a sequence of triples."""
    for triple in triples:
        self.graph.add(totriple(triple))

bind(self, prefix, namespace)

Bind prefix to namespace.

Should only be defined if the backend supports namespaces. Called by triplestore.bind().

Source code in tripper/backends/rdflib.py
def bind(self, prefix: str, namespace: str):
    """Bind prefix to namespace.

    Should only be defined if the backend supports namespaces.
    Called by triplestore.bind().
    """
    if namespace:
        self.graph.bind(prefix, namespace, replace=True)
    else:
        warnings.warn(
            "rdflib does not support removing namespace prefixes"
        )

close(self)

Close the internal RDFLib graph.

Source code in tripper/backends/rdflib.py
def close(self):
    """Close the internal RDFLib graph."""
    if self.triplestore_url:
        self.serialize(
            destination=self.triplestore_url, format=self.base_format
        )
    self.graph.close()

namespaces(self)

Returns a dict mapping prefixes to namespaces.

Should only be defined if the backend supports namespaces. Used by triplestore.parse() to get prefixes after reading triples from an external source.

Source code in tripper/backends/rdflib.py
def namespaces(self) -> dict:
    """Returns a dict mapping prefixes to namespaces.

    Should only be defined if the backend supports namespaces.
    Used by triplestore.parse() to get prefixes after reading
    triples from an external source.
    """
    return {
        prefix: str(namespace)
        for prefix, namespace in self.graph.namespaces()
    }

parse(self, source=None, location=None, data=None, format=None, **kwargs)

Parse source and add the resulting triples to triplestore.

The source is specified using one of source, location or data.

Parameters:

Name Type Description Default
source

File-like object or file name.

None
location

String with relative or absolute URL to source.

None
data

String containing the data to be parsed.

None
format

Needed if format can not be inferred from source.

None
kwargs

Additional less used keyword arguments. See https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.Graph.parse

{}
Source code in tripper/backends/rdflib.py
def parse(
    self,
    source=None,
    location=None,
    data=None,
    format=None,  # pylint: disable=redefined-builtin
    **kwargs,
):
    """Parse source and add the resulting triples to triplestore.

    The source is specified using one of `source`, `location` or `data`.

    Parameters:
        source: File-like object or file name.
        location: String with relative or absolute URL to source.
        data: String containing the data to be parsed.
        format: Needed if format can not be inferred from source.
        kwargs: Additional less used keyword arguments.
            See https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.Graph.parse
    """
    self.graph.parse(
        source=source,
        location=location,
        data=data,
        format=format,
        **kwargs,
    )

query(self, query_object, **kwargs)

SPARQL query.

Parameters:

Name Type Description Default
query_object

String with the SPARQL query.

required
kwargs

Keyword arguments passed to rdflib.Graph.query().

{}

Returns:

Type Description
The return type depends on type of query
  • SELECT: list of tuples of IRIs for each matching row
  • ASK: bool
  • CONSTRUCT, DESCRIBE: generator over triples

For more info, see https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result

Source code in tripper/backends/rdflib.py
def query(
    self, query_object, **kwargs
) -> "Union[List[Tuple[str, ...]], bool, Generator[Triple, None, None]]":
    """SPARQL query.

    Parameters:
        query_object: String with the SPARQL query.
        kwargs: Keyword arguments passed to rdflib.Graph.query().

    Returns:
        The return type depends on type of query:
          - SELECT: list of tuples of IRIs for each matching row
          - ASK: bool
          - CONSTRUCT, DESCRIBE: generator over triples

        For more info, see
        https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result
    """
    result = self.graph.query(query_object=query_object, **kwargs)

    # The type of the result object depends not only on the type of query,
    # but also on the version of rdflib...  We try to be general here.
    if hasattr(result, "type"):
        resulttype = result.type
    elif result.__class__.__name__ == "ResultRow":
        resulttype = "SELECT"
    elif isinstance(result, bool):
        resulttype = "ASK"
    elif isinstance(result, Generator):
        resulttype = "CONSTRUCT"  # also DESCRIBE
    else:
        warnings.warn(
            "Unknown return type from rdflib.query(). Return it unprocessed."
        )
        return result  # type: ignore

    if resulttype == "SELECT":
        return [tuple(str(v) for v in row) for row in result]  # type: ignore
    if resulttype == "ASK":
        return bool(result)
    if resulttype in ("CONSTRUCT", "DESCRIBE"):
        return _convert_triples_to_tripper(result)
    assert False, "should never be reached"  # nosec

remove(self, triple)

Remove all matching triples from the backend.

Source code in tripper/backends/rdflib.py
def remove(self, triple: "Triple"):
    """Remove all matching triples from the backend."""
    self.graph.remove(totriple(triple))

serialize(self, destination=None, format='turtle', **kwargs)

Serialise to destination.

Parameters:

Name Type Description Default
destination

File name or object to write to. If None, the serialisation is returned.

None
format

Format to serialise as. Supported formats, depends on the backend.

'turtle'
kwargs

Passed to the rdflib.Graph.serialize() method. See https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.Graph.serialize

{}

Returns:

Type Description
Union[None, str]

Serialised string if destination is None.

Source code in tripper/backends/rdflib.py
def serialize(
    self,
    destination=None,
    format="turtle",  # pylint: disable=redefined-builtin
    **kwargs,
) -> "Union[None, str]":
    """Serialise to destination.

    Parameters:
        destination: File name or object to write to. If None, the serialisation is
            returned.
        format: Format to serialise as. Supported formats, depends on the backend.
        kwargs: Passed to the rdflib.Graph.serialize() method.
            See https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.Graph.serialize

    Returns:
        Serialised string if `destination` is None.
    """
    result = self.graph.serialize(
        destination=destination, format=format, **kwargs
    )
    if destination is None:
        # Depending on the version of rdflib the return value of
        # graph.serialize() man either be a string or a bytes object...
        return result if isinstance(result, str) else result.decode()
    return None

triples(self, triple)

Returns a generator over matching triples.

Source code in tripper/backends/rdflib.py
def triples(self, triple: "Triple") -> "Generator[Triple, None, None]":
    """Returns a generator over matching triples."""
    return _convert_triples_to_tripper(
        self.graph.triples(totriple(triple))
    )

update(self, update_object, **kwargs)

Update triplestore with SPARQL.

Parameters:

Name Type Description Default
update_object

String with the SPARQL query.

required
kwargs

Keyword arguments passed to rdflib.Graph.update().

{}

Note

This method is intended for INSERT and DELETE queries. Use the query() method for SELECT queries.

Source code in tripper/backends/rdflib.py
def update(self, update_object, **kwargs) -> None:
    """Update triplestore with SPARQL.

    Parameters:
        update_object: String with the SPARQL query.
        kwargs: Keyword arguments passed to rdflib.Graph.update().

    Note:
        This method is intended for INSERT and DELETE queries. Use
        the query() method for SELECT queries.

    """
    return self.graph.update(update_object=update_object, **kwargs)

tordflib(value)

Help function converting a spo-value to proper rdflib type.

Source code in tripper/backends/rdflib.py
def tordflib(value: "Union[None, Literal, str]"):
    """Help function converting a spo-value to proper rdflib type."""
    if value is None:
        return None
    if isinstance(value, Literal):
        return rdflibLiteral(value, lang=value.lang, datatype=value.datatype)
    if value.startswith("_:"):
        return BNode(value[2:])
    return URIRef(value)

totriple(triple)

Help function converting a triple to rdflib triple.

Source code in tripper/backends/rdflib.py
def totriple(triple: "Triple"):
    """Help function converting a triple to rdflib triple."""
    s, p, o = triple
    return tordflib(s), tordflib(p), tordflib(o)