Skip to content

generate

Generic generate strategy using DLite storage plugin.

hasInput = 'https://w3id.org/emmo#EMMO_36e69413_8c59_4799_946c_10b05d266e22' module-attribute

hasOutput = 'https://w3id.org/emmo#EMMO_c4bace1d_4db0_4cd3_87e9_18122bae2840' module-attribute

DLiteGenerateConfig

Bases: FunctionConfig

DLite generate strategy config.

Source code in oteapi_dlite/strategies/generate.py
236
237
238
239
240
241
242
class DLiteGenerateConfig(FunctionConfig):
    """DLite generate strategy config."""

    configuration: Annotated[
        DLiteStorageConfig,
        Field(description="DLite generate strategy-specific configuration."),
    ]

configuration: Annotated[DLiteStorageConfig, Field(description='DLite generate strategy-specific configuration.')] instance-attribute

DLiteGenerateStrategy

Generic DLite generate strategy utilising DLite storage plugins.

Registers strategies:

  • ("mediaType", "application/vnd.dlite-generate")
Source code in oteapi_dlite/strategies/generate.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
@dataclass
class DLiteGenerateStrategy:
    """Generic DLite generate strategy utilising DLite storage plugins.

    **Registers strategies**:

    - `("mediaType", "application/vnd.dlite-generate")`

    """

    function_config: DLiteGenerateConfig

    def initialize(self) -> DLiteResult:
        """Initialize."""
        return DLiteResult(
            collection_id=get_collection(
                self.function_config.configuration.collection_id
            ).uuid
        )

    def get(self) -> DLiteResult:
        """Execute the strategy.

        This method will be called through the strategy-specific endpoint
        of the OTE-API Services.

        Returns:
            SessionUpdate instance.
        """
        config = self.function_config.configuration
        cacheconfig = config.datacache_config

        driver = (
            config.driver
            if config.driver
            else get_driver(mediaType=config.mediaType)
        )

        coll = get_collection(config.collection_id)

        if config.label:
            inst = coll[config.label]
        elif config.datamodel:
            instances = coll.get_instances(
                metaid=config.datamodel,
                property_mappings=True,
                allow_incomplete=config.allow_incomplete,
            )
            inst = next(instances)
        elif config.store_collection:
            if config.store_collection_id:
                inst = coll.copy(newid=config.store_collection_id)
            else:
                inst = coll
        else:  # fail if there are more instances
            raise ValueError(
                "One of `label` or `datamodel` configurations should be given."
            )

        # Save instance
        if config.location:
            inst.save(driver, config.location, config.options)
        else:  # missing test
            if cacheconfig and cacheconfig.accessKey:
                key = cacheconfig.accessKey
            else:  # missing test
                key = "generate_data"
            cache = DataCache()
            with tempfile.TemporaryDirectory() as tmpdir:
                inst.save(driver, f"{tmpdir}/data", config.options)
                with Path(f"{tmpdir}/data").open("rb") as f:
                    cache.add(f.read(), key=key)

        # Store documentation of this instance in the knowledge base
        if config.kb_document_class:

            # Import here to avoid hard dependencies on tripper.
            from tripper import RDF
            from tripper.convert import save_container

            kb_settings = config.dlite_settings.get("tripper.triplestore")
            if isinstance(kb_settings, str):
                kb_settings = json.loads(kb_settings)
            if kb_settings and not isinstance(kb_settings, dict):
                raise ValueError(
                    "The `kb_document_class` configuration expects a dict "
                    "with settings for the tripper.triplestore."
                )

            if TYPE_CHECKING:  # pragma: no cover
                # This block will only be run by mypy when checking typing
                assert (
                    isinstance(kb_settings, dict) or kb_settings is None
                )  # nosec

            # IRI of new individual
            iri = individual_iri(
                class_iri=config.kb_document_class,
                base_iri=config.kb_document_base_iri,
            )

            triples = [(iri, RDF.type, config.kb_document_class)]
            if config.kb_document_context:
                for prop, val in config.kb_document_context.items():
                    triples.append((iri, prop, val))

            ts = get_triplestore(
                kb_settings=kb_settings,
                collection_id=config.collection_id,
            )
            try:
                if config.kb_document_computation:
                    comput = individual_iri(
                        class_iri=config.kb_document_computation,
                        base_iri=config.kb_document_base_iri,
                    )
                    triples.extend(
                        [
                            (comput, RDF.type, config.kb_document_computation),
                            (comput, hasOutput, iri),
                        ]
                    )

                    # Relate computation individual `comput` to its
                    # input individuals.
                    #
                    # This simple implementation works against KB.  It
                    # assumes that the input of
                    # `kb_document_computation` is documented in the
                    # KB and that there only exists one individual of each
                    # input class.
                    #
                    # In the case of multiple individuals of the input
                    # classes, the workflow executer must be involded
                    # in the documentation.  It can either do the
                    # documentation itself or provide a callback
                    # providing the needed info, which can be called
                    # from this strategy.

                    # Relate to input dataset individuals
                    restrictions = ts.restrictions(
                        config.kb_document_computation, hasInput
                    )
                    for r in restrictions:
                        input_class = r["value"]
                        indv = ts.value(predicate=RDF.type, object=input_class)
                        triples.append((comput, r["property"], indv))

                    # Add output dataset individuals
                    restrictions = ts.restrictions(
                        config.kb_document_computation, hasOutput
                    )
                    for r in restrictions:
                        output_class = r["value"]
                        indv = ts.value(
                            predicate=RDF.type,
                            object=output_class,
                            default=None,
                        )
                        if indv and indv != iri:
                            triples.append((comput, r["property"], indv))

                # Document data source
                resource = {
                    "dataresource": {
                        "type": config.kb_document_class,
                        "downloadUrl": config.location,
                        "mediaType": (
                            config.mediaType
                            if config.mediaType
                            else "application/vnd.dlite-parse"
                        ),
                        "configuration": {
                            "datamodel": (
                                config.datamodel
                                if config.datamodel
                                else inst.meta.uri
                            ),
                            "driver": config.driver,
                            "options": (  # Trying to be clever here...
                                config.options.replace("mode=w", "mode=r")
                                if config.options
                                else config.options
                            ),
                        },
                    },
                    # "parse": {},  # No supported by OTEAPI yet...
                    "mapping": {
                        "mappingType": "mappings",
                        # __TODO__
                        # Populate prefixes and triples from mapping
                        # strategy in current partial pipeline
                        # "prefixes": {},
                        # "triples": [],
                    },
                }
                update_dict(resource, config.kb_document_update)

                save_container(
                    ts,
                    resource,
                    iri,
                    recognised_keys="basic",
                )
                ts.add_triples(triples)

            finally:
                ts.close()

        # __TODO__
        # Can we safely assume that all strategies in a pipeline will be
        # executed in the same Python interpreter?  If not, we should write
        # the collection to a storage, such that it can be shared with the
        # other strategies.

        update_collection(coll)
        return DLiteResult(collection_id=coll.uuid)

function_config: DLiteGenerateConfig instance-attribute

get()

Execute the strategy.

This method will be called through the strategy-specific endpoint of the OTE-API Services.

Returns:

Type Description
DLiteResult

SessionUpdate instance.

Source code in oteapi_dlite/strategies/generate.py
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
def get(self) -> DLiteResult:
    """Execute the strategy.

    This method will be called through the strategy-specific endpoint
    of the OTE-API Services.

    Returns:
        SessionUpdate instance.
    """
    config = self.function_config.configuration
    cacheconfig = config.datacache_config

    driver = (
        config.driver
        if config.driver
        else get_driver(mediaType=config.mediaType)
    )

    coll = get_collection(config.collection_id)

    if config.label:
        inst = coll[config.label]
    elif config.datamodel:
        instances = coll.get_instances(
            metaid=config.datamodel,
            property_mappings=True,
            allow_incomplete=config.allow_incomplete,
        )
        inst = next(instances)
    elif config.store_collection:
        if config.store_collection_id:
            inst = coll.copy(newid=config.store_collection_id)
        else:
            inst = coll
    else:  # fail if there are more instances
        raise ValueError(
            "One of `label` or `datamodel` configurations should be given."
        )

    # Save instance
    if config.location:
        inst.save(driver, config.location, config.options)
    else:  # missing test
        if cacheconfig and cacheconfig.accessKey:
            key = cacheconfig.accessKey
        else:  # missing test
            key = "generate_data"
        cache = DataCache()
        with tempfile.TemporaryDirectory() as tmpdir:
            inst.save(driver, f"{tmpdir}/data", config.options)
            with Path(f"{tmpdir}/data").open("rb") as f:
                cache.add(f.read(), key=key)

    # Store documentation of this instance in the knowledge base
    if config.kb_document_class:

        # Import here to avoid hard dependencies on tripper.
        from tripper import RDF
        from tripper.convert import save_container

        kb_settings = config.dlite_settings.get("tripper.triplestore")
        if isinstance(kb_settings, str):
            kb_settings = json.loads(kb_settings)
        if kb_settings and not isinstance(kb_settings, dict):
            raise ValueError(
                "The `kb_document_class` configuration expects a dict "
                "with settings for the tripper.triplestore."
            )

        if TYPE_CHECKING:  # pragma: no cover
            # This block will only be run by mypy when checking typing
            assert (
                isinstance(kb_settings, dict) or kb_settings is None
            )  # nosec

        # IRI of new individual
        iri = individual_iri(
            class_iri=config.kb_document_class,
            base_iri=config.kb_document_base_iri,
        )

        triples = [(iri, RDF.type, config.kb_document_class)]
        if config.kb_document_context:
            for prop, val in config.kb_document_context.items():
                triples.append((iri, prop, val))

        ts = get_triplestore(
            kb_settings=kb_settings,
            collection_id=config.collection_id,
        )
        try:
            if config.kb_document_computation:
                comput = individual_iri(
                    class_iri=config.kb_document_computation,
                    base_iri=config.kb_document_base_iri,
                )
                triples.extend(
                    [
                        (comput, RDF.type, config.kb_document_computation),
                        (comput, hasOutput, iri),
                    ]
                )

                # Relate computation individual `comput` to its
                # input individuals.
                #
                # This simple implementation works against KB.  It
                # assumes that the input of
                # `kb_document_computation` is documented in the
                # KB and that there only exists one individual of each
                # input class.
                #
                # In the case of multiple individuals of the input
                # classes, the workflow executer must be involded
                # in the documentation.  It can either do the
                # documentation itself or provide a callback
                # providing the needed info, which can be called
                # from this strategy.

                # Relate to input dataset individuals
                restrictions = ts.restrictions(
                    config.kb_document_computation, hasInput
                )
                for r in restrictions:
                    input_class = r["value"]
                    indv = ts.value(predicate=RDF.type, object=input_class)
                    triples.append((comput, r["property"], indv))

                # Add output dataset individuals
                restrictions = ts.restrictions(
                    config.kb_document_computation, hasOutput
                )
                for r in restrictions:
                    output_class = r["value"]
                    indv = ts.value(
                        predicate=RDF.type,
                        object=output_class,
                        default=None,
                    )
                    if indv and indv != iri:
                        triples.append((comput, r["property"], indv))

            # Document data source
            resource = {
                "dataresource": {
                    "type": config.kb_document_class,
                    "downloadUrl": config.location,
                    "mediaType": (
                        config.mediaType
                        if config.mediaType
                        else "application/vnd.dlite-parse"
                    ),
                    "configuration": {
                        "datamodel": (
                            config.datamodel
                            if config.datamodel
                            else inst.meta.uri
                        ),
                        "driver": config.driver,
                        "options": (  # Trying to be clever here...
                            config.options.replace("mode=w", "mode=r")
                            if config.options
                            else config.options
                        ),
                    },
                },
                # "parse": {},  # No supported by OTEAPI yet...
                "mapping": {
                    "mappingType": "mappings",
                    # __TODO__
                    # Populate prefixes and triples from mapping
                    # strategy in current partial pipeline
                    # "prefixes": {},
                    # "triples": [],
                },
            }
            update_dict(resource, config.kb_document_update)

            save_container(
                ts,
                resource,
                iri,
                recognised_keys="basic",
            )
            ts.add_triples(triples)

        finally:
            ts.close()

    # __TODO__
    # Can we safely assume that all strategies in a pipeline will be
    # executed in the same Python interpreter?  If not, we should write
    # the collection to a storage, such that it can be shared with the
    # other strategies.

    update_collection(coll)
    return DLiteResult(collection_id=coll.uuid)

initialize()

Initialize.

Source code in oteapi_dlite/strategies/generate.py
257
258
259
260
261
262
263
def initialize(self) -> DLiteResult:
    """Initialize."""
    return DLiteResult(
        collection_id=get_collection(
            self.function_config.configuration.collection_id
        ).uuid
    )

DLiteStorageConfig

Bases: DLiteConfiguration

Configuration for a generic DLite storage filter.

The DLite storage driver to can be specified using either the driver or mediaType field.

Where the output should be written, is specified using either the location or datacache_config.accessKey field.

Either label or datamodel should be provided.

Source code in oteapi_dlite/strategies/generate.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
class DLiteStorageConfig(DLiteConfiguration):
    """Configuration for a generic DLite storage filter.

    The DLite storage driver to can be specified using either the `driver`
    or `mediaType` field.

    Where the output should be written, is specified using either the
    `location` or `datacache_config.accessKey` field.

    Either `label` or `datamodel` should be provided.
    """

    driver: Annotated[
        Optional[str],
        Field(
            description='Name of DLite driver (ex: "json").',
        ),
    ] = None
    mediaType: Annotated[
        Optional[str],
        Field(
            description='Media type for DLite driver (ex: "application/json").',
        ),
    ] = None
    options: Annotated[
        Optional[str],
        Field(
            description=(
                "Comma-separated list of options passed to the DLite "
                "storage plugin."
            ),
        ),
    ] = None
    location: Annotated[
        Optional[str],
        Field(
            description=(
                "Location of storage to write to.  If unset to store in data "
                "cache using the key provided with "
                "`datacache_config.accessKey` (defaults to 'generate_data')."
            ),
        ),
    ] = None
    label: Annotated[
        Optional[str],
        Field(
            description=(
                "Label of DLite instance in the collection to serialise."
            ),
        ),
    ] = None
    datamodel: Annotated[
        Optional[str],
        Field(
            description=(
                "URI to the datamodel of the new instance.  Needed when "
                "generating the instance from mappings.  Cannot be combined "
                "with `label`"
            ),
        ),
    ] = None
    store_collection: Annotated[
        bool,
        Field(
            description="Whether to store the entire collection in the session "
            "instead of a single instance.  Cannot be combined with `label` or "
            "`datamodel`.",
        ),
    ] = False
    store_collection_id: Annotated[
        Optional[str],
        Field(
            description="Used together with `store_collection` If given, store "
            "a copy of the collection with this id.",
        ),
    ] = None
    allow_incomplete: Annotated[
        Optional[bool],
        Field(
            description="Whether to allow incomplete property mappings.",
        ),
    ] = False
    datacache_config: Annotated[
        Optional[DataCacheConfig],
        Field(
            description="Configuration options for the local data cache.",
        ),
    ] = None
    kb_document_class: Annotated[
        Optional[str],
        Field(
            description=(
                "IRI of a class in the ontology."
                "\n\n"
                "If given, the generated DLite instance is documented in the "
                "knowledge base as an instance of this class."
                "\n\n"
                "Expects that the 'tripper.triplestore' setting has been "
                "set using the SettingsStrategy (vnd.dlite-settings). "
                "This settings should be a dict that can be passed "
                "as keyword arguments to `tripper.Triplestore()`."
                "\n\n"
                "Example of adding expected settings using OTELib:\n"
                "\n\n"
                ">>> kb_settings = client.create_filter(\n"
                "...     filterType='application/vnd.dlite-settings',\n"
                "...     configuration={\n"
                "...         'label': 'tripper.triplestore',\n"
                "...         'settings': {\n"
                "...             'backend': 'rdflib',\n"
                "...             'triplestore_url': '/path/to/local/kb.ttl',\n"
                "...         },\n"
                "...     },\n"
                "... )\n"
                ">>> generate = client.create_function(\n"
                "...     functionType='application/vnd.dlite-generate'\n"
                "...     configuration={\n"
                "...         kb_document_class='http://example.com#MyClass'\n"
                "...         ...\n"
                "...     },\n"
                "... )\n"
                ">>> pipeline = ... >> generate >> kb_settings\n"
                ">>> pipeline.get()\n"
            ),
        ),
    ] = None
    kb_document_update: Annotated[
        Optional[dict],
        Field(
            description=(
                "Dict updating the documentation (partial pipeline) created "
                "with `kb_document_class`."
                "\n\n"
                "This dict should be structured as follows: "
                "\n\n"
                "    {\n"
                '      "dataresource": {...},\n'
                '      "parse": {...}\n'
                '      "mapping": {...}\n'
                "    }\n"
                "\n"
                "where the provided items will override the the default "
                "configurations in respective partial pipeline created by "
                '`kb_document_class`.  Any of the items "dataresource", '
                '"parse" and "mapping" are optional.',
            ),
        ),
    ] = None
    kb_document_base_iri: Annotated[
        str, Field(description="Base IRI or prefix for created individuals.")
    ] = ":"
    kb_document_context: Annotated[
        Optional[dict],
        Field(
            description=(
                "If `kb_document_class` is given, this configuration will add "
                "additional context to the documentation of the generated "
                "individual."
                "\n\n"
                "This might be useful to make it easy to later access the "
                "generated individual."
                "\n\n"
                "This configuration should be a dict mapping providing the "
                "additional documentation of the driver. It should map OWL "
                "properties to either tripper literals or IRIs."
                "\n\n"
                "Example: `{RDF.type: ONTO.MyDataSet, "
                "EMMO.isDescriptionFor: ONTO.MyMaterial}`"
            ),
        ),
    ] = None
    kb_document_computation: Annotated[
        Optional[str],
        Field(
            description=(
                "IRI of a computation subclass."
                "\n\n"
                "Requires `kb_document_class`, and is used to "
                "document the computation (model) that the "
                "individual (of `kb_document_class`) to be documented "
                "is output of."
                "When `kb_document_computation` is given a new individual of "
                "the computation subclass is created. Input and "
                "output datasets are documented using the relation "
                " `emmo:hasInput` and `emmo:hasOutput`, "
                "respectively.  The individual of `kb_document_class` is "
                "one of the output individuals."
                "\n\n"
                "Note: This configuration relies on several assumptions:\n"
                "  - The `kb_document_computation` class exists in the "
                "knowledge base and is related to its input and output "
                "dataset classes via `emmo:hasInput` and `emmo:hasOutput` "
                "restrictions, respectively.\n"
                "  - There exists only one individual of each input dataset "
                "class.\n"
                "  - There exists at most one individual of each output "
                "dataset class.\n"
            ),
        ),
    ] = None

allow_incomplete: Annotated[Optional[bool], Field(description='Whether to allow incomplete property mappings.')] = False class-attribute instance-attribute

datacache_config: Annotated[Optional[DataCacheConfig], Field(description='Configuration options for the local data cache.')] = None class-attribute instance-attribute

datamodel: Annotated[Optional[str], Field(description='URI to the datamodel of the new instance. Needed when generating the instance from mappings. Cannot be combined with `label`')] = None class-attribute instance-attribute

driver: Annotated[Optional[str], Field(description='Name of DLite driver (ex: "json").')] = None class-attribute instance-attribute

kb_document_base_iri: Annotated[str, Field(description='Base IRI or prefix for created individuals.')] = ':' class-attribute instance-attribute

kb_document_class: Annotated[Optional[str], Field(description="IRI of a class in the ontology.\n\nIf given, the generated DLite instance is documented in the knowledge base as an instance of this class.\n\nExpects that the 'tripper.triplestore' setting has been set using the SettingsStrategy (vnd.dlite-settings). This settings should be a dict that can be passed as keyword arguments to `tripper.Triplestore()`.\n\nExample of adding expected settings using OTELib:\n\n\n>>> kb_settings = client.create_filter(\n... filterType='application/vnd.dlite-settings',\n... configuration={\n... 'label': 'tripper.triplestore',\n... 'settings': {\n... 'backend': 'rdflib',\n... 'triplestore_url': '/path/to/local/kb.ttl',\n... },\n... },\n... )\n>>> generate = client.create_function(\n... functionType='application/vnd.dlite-generate'\n... configuration={\n... kb_document_class='http://example.com#MyClass'\n... ...\n... },\n... )\n>>> pipeline = ... >> generate >> kb_settings\n>>> pipeline.get()\n")] = None class-attribute instance-attribute

kb_document_computation: Annotated[Optional[str], Field(description='IRI of a computation subclass.\n\nRequires `kb_document_class`, and is used to document the computation (model) that the individual (of `kb_document_class`) to be documented is output of.When `kb_document_computation` is given a new individual of the computation subclass is created. Input and output datasets are documented using the relation `emmo:hasInput` and `emmo:hasOutput`, respectively. The individual of `kb_document_class` is one of the output individuals.\n\nNote: This configuration relies on several assumptions:\n - The `kb_document_computation` class exists in the knowledge base and is related to its input and output dataset classes via `emmo:hasInput` and `emmo:hasOutput` restrictions, respectively.\n - There exists only one individual of each input dataset class.\n - There exists at most one individual of each output dataset class.\n')] = None class-attribute instance-attribute

kb_document_context: Annotated[Optional[dict], Field(description='If `kb_document_class` is given, this configuration will add additional context to the documentation of the generated individual.\n\nThis might be useful to make it easy to later access the generated individual.\n\nThis configuration should be a dict mapping providing the additional documentation of the driver. It should map OWL properties to either tripper literals or IRIs.\n\nExample: `{RDF.type: ONTO.MyDataSet, EMMO.isDescriptionFor: ONTO.MyMaterial}`')] = None class-attribute instance-attribute

kb_document_update: Annotated[Optional[dict], Field(description=('Dict updating the documentation (partial pipeline) created with `kb_document_class`.\n\nThis dict should be structured as follows: \n\n {\n "dataresource": {...},\n "parse": {...}\n "mapping": {...}\n }\n\nwhere the provided items will override the the default configurations in respective partial pipeline created by `kb_document_class`. Any of the items "dataresource", "parse" and "mapping" are optional.'))] = None class-attribute instance-attribute

label: Annotated[Optional[str], Field(description='Label of DLite instance in the collection to serialise.')] = None class-attribute instance-attribute

location: Annotated[Optional[str], Field(description="Location of storage to write to. If unset to store in data cache using the key provided with `datacache_config.accessKey` (defaults to 'generate_data').")] = None class-attribute instance-attribute

mediaType: Annotated[Optional[str], Field(description='Media type for DLite driver (ex: "application/json").')] = None class-attribute instance-attribute

options: Annotated[Optional[str], Field(description='Comma-separated list of options passed to the DLite storage plugin.')] = None class-attribute instance-attribute

store_collection: Annotated[bool, Field(description='Whether to store the entire collection in the session instead of a single instance. Cannot be combined with `label` or `datamodel`.')] = False class-attribute instance-attribute

store_collection_id: Annotated[Optional[str], Field(description='Used together with `store_collection` If given, store a copy of the collection with this id.')] = None class-attribute instance-attribute

KBError

Bases: ValueError

Invalid data in knowledge base.

Source code in oteapi_dlite/strategies/generate.py
30
31
class KBError(ValueError):
    """Invalid data in knowledge base."""

individual_iri(class_iri, base_iri=':', randbytes=6)

Return an IRI for an individual of a class.

Parameters:

Name Type Description Default
class_iri str

IRI of the class to create an individual of.

required
base_iri str

Base IRI of the created individual.

':'
randbytes int

Number of random bytes to include in the returned IRI.

6

Returns:

Type Description
str

IRI of a new individual.

Source code in oteapi_dlite/strategies/generate.py
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
def individual_iri(
    class_iri: str, base_iri: str = ":", randbytes: int = 6
) -> str:
    """Return an IRI for an individual of a class.

    Arguments:
        class_iri: IRI of the class to create an individual of.
        base_iri: Base IRI of the created individual.
        randbytes: Number of random bytes to include in the returned IRI.

    Returns:
        IRI of a new individual.

    """
    basename = (
        class_iri.split(":", 1)[-1]
        .rsplit("/", 1)[-1]
        .rsplit("#", 1)[-1]
        .lower()
    )
    return f"{base_iri}{basename}-{os.urandom(randbytes).hex()}"