tabledoc¶

Basic interface for tabular documentation of datasets.

`TableDoc` ¶

Representation of tabular documentation of datasets.

Parameters:

Name	Type	Description	Default
`header`	`Sequence[str]`	Sequence of column header labels. Nested data can be represented by dot-separated label strings (e.g. "distribution.downloadURL")	required
`data`	`Sequence[Sequence[str]]`	Sequence of rows of data. Each row documents an entry.	required
`type`	`Optional[str]`	Type of data to save (applies to all rows). Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" or an IRI to a class in an ontology. Defaults to "dataset".	`'dataset'`
`prefixes`	`Optional[dict]`	Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs.	`None`
`context`	`Optional[Union[str, dict, list]]`	Additional user-defined context that should be returned on top of the default context. It may be a string with an URL to the user-defined context, a dict with the user-defined context or a sequence of strings and dicts.	`None`
`strip`	`bool`	Whether to strip leading and trailing whitespaces from cells.	`True`

Source code in tripper/dataset/tabledoc.py

class TableDoc:
    """Representation of tabular documentation of datasets.

    Arguments:
        header: Sequence of column header labels.  Nested data can
            be represented by dot-separated label strings (e.g.
            "distribution.downloadURL")
        data: Sequence of rows of data. Each row documents an entry.
        type: Type of data to save (applies to all rows).  Should
            either be one of the pre-defined names: "dataset",
            "distribution", "accessService", "parser" and "generator"
            or an IRI to a class in an ontology.  Defaults to
            "dataset".
        prefixes: Dict with prefixes in addition to those included in the
            JSON-LD context.  Should map namespace prefixes to IRIs.
        context: Additional user-defined context that should be
            returned on top of the default context.  It may be a
            string with an URL to the user-defined context, a dict
            with the user-defined context or a sequence of strings and
            dicts.
        strip: Whether to strip leading and trailing whitespaces from cells.

    """

    # pylint: disable=redefined-builtin,too-few-public-methods

    def __init__(
        self,
        header: "Sequence[str]",
        data: "Sequence[Sequence[str]]",
        type: "Optional[str]" = "dataset",
        prefixes: "Optional[dict]" = None,
        context: "Optional[Union[str, dict, list]]" = None,
        strip: bool = True,
    ):
        self.header = list(header)
        self.data = [list(row) for row in data]
        self.type = type
        self.prefixes = prefixes
        self.context = context
        self.strip = strip

    def save(self, ts: Triplestore) -> None:
        """Save tabular datadocumentation to triplestore."""
        for d in self.asdicts():
            save_dict(ts, d)

    def asdicts(self) -> "List[dict]":
        """Return the table as a list of dicts."""
        kw = {"_context": self.context} if self.context else {}

        results = []
        for row in self.data:
            d = AttrDict()
            for i, colname in enumerate(self.header):
                cell = row[i].strip() if row[i] and self.strip else row[i]
                if cell:
                    addnested(
                        d, colname.strip() if self.strip else colname, cell
                    )
            jsonld = as_jsonld(
                d, type=self.type, prefixes=self.prefixes, **kw  # type: ignore
            )
            results.append(jsonld)
        return results

    @staticmethod
    def fromdicts(
        dicts: "Sequence[dict]",
        type: "Optional[str]" = "dataset",
        prefixes: "Optional[dict]" = None,
        context: "Optional[Union[str, dict, list]]" = None,
        strip: bool = True,
    ) -> "TableDoc":
        """Create new TableDoc instance from a sequence of dicts.

        Arguments:
            dicts: Sequence of single-resource dicts.
            type: Type of data to save (applies to all rows).  Should
                either be one of the pre-defined names: "dataset",
                "distribution", "accessService", "parser" and
                "generator" or an IRI to a class in an ontology.
                Defaults to "dataset".
            prefixes: Dict with prefixes in addition to those included
                in the JSON-LD context.  Should map namespace prefixes
                to IRIs.
            context: Additional user-defined context that should be
                returned on top of the default context.  It may be a
                string with an URL to the user-defined context, a dict
                with the user-defined context or a sequence of strings
                and dicts.
            strip: Whether to strip leading and trailing whitespaces
                from cells.

        Returns:
            New TableDoc instance.

        """
        # Store the header as keys in a dict to keep ordering
        header = {}

        def addheader(d, prefix=""):
            """Add keys in `d` to header.

            Nested dicts will result in dot-separated keys.
            """
            for k, v in d.items():
                if isinstance(v, dict):
                    addheader(v, k + ".")
                else:
                    header[prefix + k] = True

        # Assign the header
        for d in dicts:
            addheader(d)

        # Assign table data. Nested dicts are accounted for
        data = []
        for dct in dicts:
            row = []
            for head in header:
                d = dct
                for key in head.split("."):
                    d = d.get(key, {})
                row.append(d if d != {} else None)
            data.append(row)

        return TableDoc(
            header=header.keys(),  # type: ignore
            data=data,  # type: ignore
            type=type,
            prefixes=prefixes,
            context=context,
            strip=strip,
        )

    @staticmethod
    def parse_csv(
        csvfile: "Union[Iterable[str], Path, str]",
        type: "Optional[str]" = "dataset",
        prefixes: "Optional[dict]" = None,
        context: "Optional[Union[dict, list]]" = None,
        encoding: str = "utf-8",
        dialect: "Optional[Union[csv.Dialect, str]]" = None,
        **kwargs,
    ) -> "TableDoc":
        # pylint: disable=line-too-long
        """Parse a csv file using the standard library csv module.

        Arguments:
            csvfile: Name of CSV file to parse or an iterable of strings.
            type: Type of data to save (applies to all rows).  Should
                either be one of the pre-defined names: "dataset",
                "distribution", "accessService", "parser" and "generator"
                or an IRI to a class in an ontology.  Defaults to
                "dataset".
            prefixes: Dict with prefixes in addition to those included in the
                JSON-LD context.  Should map namespace prefixes to IRIs.
            context: Dict with user-defined JSON-LD context.
            encoding: The encoding of the csv file.  Note that Excel may
                encode as "ISO-8859" (which was commonly used in the 1990th).
            dialect: A subclass of csv.Dialect, or the name of the dialect,
                specifying how the `csvfile` is formatted.  For more details,
                see [Dialects and Formatting Parameters].
            kwargs: Additional keyword arguments overriding individual
                formatting parameters.  For more details, see
                [Dialects and Formatting Parameters].

        Returns:
            New TableDoc instance.

        References:
        [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
        """

        def read(f, dialect):
            """Return csv reader from file-like object `f`."""
            if dialect is None and not kwargs:
                dialect = csv.Sniffer().sniff(f.read(1024), delimiters=",;\t ")
                f.seek(0)
            reader = csv.reader(f, dialect=dialect, **kwargs)
            header = next(reader)
            data = list(reader)
            return header, data

        if isinstance(csvfile, (str, Path)):
            with openfile(csvfile, mode="rt", encoding=encoding) as f:
                header, data = read(f, dialect)
        else:
            header, data = read(csvfile, dialect)

        return TableDoc(
            header=header,
            data=data,
            type=type,
            prefixes=prefixes,
            context=context,
        )

    def write_csv(
        self,
        csvfile: "Union[Path, str, Writer]",
        encoding: str = "utf-8",
        dialect: "Union[csv.Dialect, str]" = "excel",
        **kwargs,
    ) -> None:
        # pylint: disable=line-too-long
        """Write the table to a csv file using the standard library csv module.

        Arguments:
            csvfile: File-like object or name of CSV file to write.
            encoding: The encoding of the csv file.
            dialect: A subclass of csv.Dialect, or the name of the dialect,
                specifying how the `csvfile` is formatted.  For more details,
                see [Dialects and Formatting Parameters].
            kwargs: Additional keyword arguments overriding individual
                formatting parameters.  For more details, see
                [Dialects and Formatting Parameters].

        References:
        [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
        """

        def write(f):
            writer = csv.writer(f, dialect=dialect, **kwargs)
            writer.writerow(self.header)
            for row in self.data:
                writer.writerow(row)

        if isinstance(csvfile, (str, Path)):
            with open(csvfile, mode="wt", encoding=encoding) as f:
                write(f)
        else:
            write(csvfile)

`asdicts(self)` ¶

Return the table as a list of dicts.

Source code in tripper/dataset/tabledoc.py

def asdicts(self) -> "List[dict]":
    """Return the table as a list of dicts."""
    kw = {"_context": self.context} if self.context else {}

    results = []
    for row in self.data:
        d = AttrDict()
        for i, colname in enumerate(self.header):
            cell = row[i].strip() if row[i] and self.strip else row[i]
            if cell:
                addnested(
                    d, colname.strip() if self.strip else colname, cell
                )
        jsonld = as_jsonld(
            d, type=self.type, prefixes=self.prefixes, **kw  # type: ignore
        )
        results.append(jsonld)
    return results

`fromdicts(dicts, type='dataset', prefixes=None, context=None, strip=True)` `staticmethod` ¶

Create new TableDoc instance from a sequence of dicts.

Parameters:

Name	Type	Description	Default
`dicts`	`Sequence[dict]`	Sequence of single-resource dicts.	required
`type`	`Optional[str]`	Type of data to save (applies to all rows). Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" or an IRI to a class in an ontology. Defaults to "dataset".	`'dataset'`
`prefixes`	`Optional[dict]`	Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs.	`None`
`context`	`Optional[Union[str, dict, list]]`	Additional user-defined context that should be returned on top of the default context. It may be a string with an URL to the user-defined context, a dict with the user-defined context or a sequence of strings and dicts.	`None`
`strip`	`bool`	Whether to strip leading and trailing whitespaces from cells.	`True`

Returns:

Type	Description
`TableDoc`	New TableDoc instance.

Source code in tripper/dataset/tabledoc.py

@staticmethod
def fromdicts(
    dicts: "Sequence[dict]",
    type: "Optional[str]" = "dataset",
    prefixes: "Optional[dict]" = None,
    context: "Optional[Union[str, dict, list]]" = None,
    strip: bool = True,
) -> "TableDoc":
    """Create new TableDoc instance from a sequence of dicts.

    Arguments:
        dicts: Sequence of single-resource dicts.
        type: Type of data to save (applies to all rows).  Should
            either be one of the pre-defined names: "dataset",
            "distribution", "accessService", "parser" and
            "generator" or an IRI to a class in an ontology.
            Defaults to "dataset".
        prefixes: Dict with prefixes in addition to those included
            in the JSON-LD context.  Should map namespace prefixes
            to IRIs.
        context: Additional user-defined context that should be
            returned on top of the default context.  It may be a
            string with an URL to the user-defined context, a dict
            with the user-defined context or a sequence of strings
            and dicts.
        strip: Whether to strip leading and trailing whitespaces
            from cells.

    Returns:
        New TableDoc instance.

    """
    # Store the header as keys in a dict to keep ordering
    header = {}

    def addheader(d, prefix=""):
        """Add keys in `d` to header.

        Nested dicts will result in dot-separated keys.
        """
        for k, v in d.items():
            if isinstance(v, dict):
                addheader(v, k + ".")
            else:
                header[prefix + k] = True

    # Assign the header
    for d in dicts:
        addheader(d)

    # Assign table data. Nested dicts are accounted for
    data = []
    for dct in dicts:
        row = []
        for head in header:
            d = dct
            for key in head.split("."):
                d = d.get(key, {})
            row.append(d if d != {} else None)
        data.append(row)

    return TableDoc(
        header=header.keys(),  # type: ignore
        data=data,  # type: ignore
        type=type,
        prefixes=prefixes,
        context=context,
        strip=strip,
    )

`parse_csv(csvfile, type='dataset', prefixes=None, context=None, encoding='utf-8', dialect=None, **kwargs)` `staticmethod` ¶

Parse a csv file using the standard library csv module.

Parameters:

Name	Type	Description	Default
`csvfile`	`Union[Iterable[str], Path, str]`	Name of CSV file to parse or an iterable of strings.	required
`type`	`Optional[str]`	Type of data to save (applies to all rows). Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" or an IRI to a class in an ontology. Defaults to "dataset".	`'dataset'`
`prefixes`	`Optional[dict]`	Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs.	`None`
`context`	`Optional[Union[dict, list]]`	Dict with user-defined JSON-LD context.	`None`
`encoding`	`str`	The encoding of the csv file. Note that Excel may encode as "ISO-8859" (which was commonly used in the 1990th).	`'utf-8'`
`dialect`	`Optional[Union[csv.Dialect, str]]`	A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, see [Dialects and Formatting Parameters].	`None`
`kwargs`		Additional keyword arguments overriding individual formatting parameters. For more details, see [Dialects and Formatting Parameters].	`{}`

Returns:

Type	Description
`TableDoc`	New TableDoc instance.

References:

Source code in tripper/dataset/tabledoc.py

@staticmethod
def parse_csv(
    csvfile: "Union[Iterable[str], Path, str]",
    type: "Optional[str]" = "dataset",
    prefixes: "Optional[dict]" = None,
    context: "Optional[Union[dict, list]]" = None,
    encoding: str = "utf-8",
    dialect: "Optional[Union[csv.Dialect, str]]" = None,
    **kwargs,
) -> "TableDoc":
    # pylint: disable=line-too-long
    """Parse a csv file using the standard library csv module.

    Arguments:
        csvfile: Name of CSV file to parse or an iterable of strings.
        type: Type of data to save (applies to all rows).  Should
            either be one of the pre-defined names: "dataset",
            "distribution", "accessService", "parser" and "generator"
            or an IRI to a class in an ontology.  Defaults to
            "dataset".
        prefixes: Dict with prefixes in addition to those included in the
            JSON-LD context.  Should map namespace prefixes to IRIs.
        context: Dict with user-defined JSON-LD context.
        encoding: The encoding of the csv file.  Note that Excel may
            encode as "ISO-8859" (which was commonly used in the 1990th).
        dialect: A subclass of csv.Dialect, or the name of the dialect,
            specifying how the `csvfile` is formatted.  For more details,
            see [Dialects and Formatting Parameters].
        kwargs: Additional keyword arguments overriding individual
            formatting parameters.  For more details, see
            [Dialects and Formatting Parameters].

    Returns:
        New TableDoc instance.

    References:
    [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
    """

    def read(f, dialect):
        """Return csv reader from file-like object `f`."""
        if dialect is None and not kwargs:
            dialect = csv.Sniffer().sniff(f.read(1024), delimiters=",;\t ")
            f.seek(0)
        reader = csv.reader(f, dialect=dialect, **kwargs)
        header = next(reader)
        data = list(reader)
        return header, data

    if isinstance(csvfile, (str, Path)):
        with openfile(csvfile, mode="rt", encoding=encoding) as f:
            header, data = read(f, dialect)
    else:
        header, data = read(csvfile, dialect)

    return TableDoc(
        header=header,
        data=data,
        type=type,
        prefixes=prefixes,
        context=context,
    )

`save(self, ts)` ¶

Save tabular datadocumentation to triplestore.

Source code in tripper/dataset/tabledoc.py

def save(self, ts: Triplestore) -> None:
    """Save tabular datadocumentation to triplestore."""
    for d in self.asdicts():
        save_dict(ts, d)

`write_csv(self, csvfile, encoding='utf-8', dialect='excel', **kwargs)` ¶

Write the table to a csv file using the standard library csv module.

Parameters:

Name	Type	Description	Default
`csvfile`	`Union[Path, str, Writer]`	File-like object or name of CSV file to write.	required
`encoding`	`str`	The encoding of the csv file.	`'utf-8'`
`dialect`	`Union[csv.Dialect, str]`	A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, see [Dialects and Formatting Parameters].	`'excel'`
`kwargs`		Additional keyword arguments overriding individual formatting parameters. For more details, see [Dialects and Formatting Parameters].	`{}`

References:

Source code in tripper/dataset/tabledoc.py

def write_csv(
    self,
    csvfile: "Union[Path, str, Writer]",
    encoding: str = "utf-8",
    dialect: "Union[csv.Dialect, str]" = "excel",
    **kwargs,
) -> None:
    # pylint: disable=line-too-long
    """Write the table to a csv file using the standard library csv module.

    Arguments:
        csvfile: File-like object or name of CSV file to write.
        encoding: The encoding of the csv file.
        dialect: A subclass of csv.Dialect, or the name of the dialect,
            specifying how the `csvfile` is formatted.  For more details,
            see [Dialects and Formatting Parameters].
        kwargs: Additional keyword arguments overriding individual
            formatting parameters.  For more details, see
            [Dialects and Formatting Parameters].

    References:
    [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
    """

    def write(f):
        writer = csv.writer(f, dialect=dialect, **kwargs)
        writer.writerow(self.header)
        for row in self.data:
            writer.writerow(row)

    if isinstance(csvfile, (str, Path)):
        with open(csvfile, mode="wt", encoding=encoding) as f:
            write(f)
    else:
        write(csvfile)

tabledoc¶

TableDoc ¶

asdicts(self) ¶

fromdicts(dicts, type='dataset', prefixes=None, context=None, strip=True) staticmethod ¶

parse_csv(csvfile, type='dataset', prefixes=None, context=None, encoding='utf-8', dialect=None, **kwargs) staticmethod ¶

save(self, ts) ¶

write_csv(self, csvfile, encoding='utf-8', dialect='excel', **kwargs) ¶

`TableDoc` ¶

`asdicts(self)` ¶

`fromdicts(dicts, type='dataset', prefixes=None, context=None, strip=True)` `staticmethod` ¶

`parse_csv(csvfile, type='dataset', prefixes=None, context=None, encoding='utf-8', dialect=None, **kwargs)` `staticmethod` ¶

`save(self, ts)` ¶

`write_csv(self, csvfile, encoding='utf-8', dialect='excel', **kwargs)` ¶