Skip to content

parse

Generic parse strategy using DLite storage plugin.

DLiteParseConfig

Bases: DLiteResult

Configuration for generic DLite parser.

Source code in oteapi_dlite/strategies/parse.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class DLiteParseConfig(DLiteResult):
    """Configuration for generic DLite parser."""

    # "Required" resource strategy fields
    downloadUrl: Annotated[
        Optional[HostlessAnyUrl],
        Field(
            description=ResourceConfig.model_fields["downloadUrl"].description
        ),
    ] = None

    mediaType: Annotated[
        Optional[str],
        Field(description=ResourceConfig.model_fields["mediaType"].description),
    ] = None

    # Parser-specific configuration
    driver: Annotated[
        str,
        Field(
            description='Name of DLite driver (ex: "json").',
        ),
    ]
    location: Annotated[
        Optional[str],
        Field(
            description=(
                "Explicit location of storage.  Normally data is read from the "
                "data cache using `datacache_config.accessKey` (default: "
                "'key')."
            ),
        ),
    ] = None
    options: Annotated[
        Optional[str],
        Field(
            description=(
                "Comma-separated list of options passed to the DLite storage "
                "plugin."
            ),
        ),
    ] = None
    id: Annotated[
        Optional[str],
        Field(
            description="If given, the id of the instance in the storage.",
        ),
    ] = None
    label: Annotated[
        Optional[str],
        Field(
            description=(
                "Optional label of the new DLite instance in the collection."
            ),
        ),
    ] = None
    datamodel: Annotated[
        Optional[str],
        Field(
            description=(
                "DLite datamodel documenting the structure of the data set. "
                "Often unused, since the datamodel is implicitly defined in "
                "the DLite driver (DLite plugin), but for a documentation "
                "point of view this is a very important field."
            ),
        ),
    ] = None
    download_config: Annotated[
        AttrDict,
        Field(description="Configurations provided to a download strategy."),
    ] = AttrDict()
    datacache_config: Annotated[
        Optional[DataCacheConfig],
        Field(
            description="Configuration options for the local data cache.",
        ),
    ] = None

datacache_config: Annotated[Optional[DataCacheConfig], Field(description='Configuration options for the local data cache.')] = None class-attribute instance-attribute

datamodel: Annotated[Optional[str], Field(description='DLite datamodel documenting the structure of the data set. Often unused, since the datamodel is implicitly defined in the DLite driver (DLite plugin), but for a documentation point of view this is a very important field.')] = None class-attribute instance-attribute

downloadUrl: Annotated[Optional[HostlessAnyUrl], Field(description=ResourceConfig.model_fields['downloadUrl'].description)] = None class-attribute instance-attribute

download_config: Annotated[AttrDict, Field(description='Configurations provided to a download strategy.')] = AttrDict() class-attribute instance-attribute

driver: Annotated[str, Field(description='Name of DLite driver (ex: "json").')] instance-attribute

id: Annotated[Optional[str], Field(description='If given, the id of the instance in the storage.')] = None class-attribute instance-attribute

label: Annotated[Optional[str], Field(description='Optional label of the new DLite instance in the collection.')] = None class-attribute instance-attribute

location: Annotated[Optional[str], Field(description="Explicit location of storage. Normally data is read from the data cache using `datacache_config.accessKey` (default: 'key').")] = None class-attribute instance-attribute

mediaType: Annotated[Optional[str], Field(description=ResourceConfig.model_fields['mediaType'].description)] = None class-attribute instance-attribute

options: Annotated[Optional[str], Field(description='Comma-separated list of options passed to the DLite storage plugin.')] = None class-attribute instance-attribute

DLiteParseParserConfig

Bases: ParserConfig

DLite parse strategy resource config.

Source code in oteapi_dlite/strategies/parse.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
class DLiteParseParserConfig(ParserConfig):
    """DLite parse strategy resource config."""

    parserType: Annotated[
        Literal["application/vnd.dlite-parse"],
        Field(description=ParserConfig.model_fields["parserType"].description),
    ]
    configuration: Annotated[
        DLiteParseConfig,
        Field(description="DLite parse strategy-specific configuration."),
    ]
    entity: Annotated[
        Optional[AnyHttpUrl],
        Field(description=ParserConfig.model_fields["entity"].description),
    ] = None

configuration: Annotated[DLiteParseConfig, Field(description='DLite parse strategy-specific configuration.')] instance-attribute

entity: Annotated[Optional[AnyHttpUrl], Field(description=ParserConfig.model_fields['entity'].description)] = None class-attribute instance-attribute

parserType: Annotated[Literal['application/vnd.dlite-parse'], Field(description=ParserConfig.model_fields['parserType'].description)] instance-attribute

DLiteParseStrategy

Generic DLite parse strategy utilising DLite storage plugins.

Registers strategies:

  • ("mediaType", "application/vnd.dlite-parse")
Source code in oteapi_dlite/strategies/parse.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
@dataclass
class DLiteParseStrategy:
    """Generic DLite parse strategy utilising DLite storage plugins.

    **Registers strategies**:

    - `("mediaType", "application/vnd.dlite-parse")`

    """

    parse_config: DLiteParseParserConfig

    def initialize(self) -> DLiteResult:
        """Initialize."""
        return DLiteResult(
            collection_id=get_collection(
                self.parse_config.configuration.collection_id
            ).uuid
        )

    def get(self) -> DLiteResult:
        """Execute the strategy.

        This method will be called through the strategy-specific endpoint
        of the OTE-API Services.

        Returns:
            Reference to a DLite collection ID.

        """
        config = self.parse_config.configuration
        cacheconfig = config.datacache_config

        driver = (
            config.driver
            if config.driver
            else get_driver(
                mediaType=self.parse_config.mediaType,
            )
        )

        # Create instance
        if config.location:
            inst = dlite.Instance.from_location(
                driver=driver,
                location=config.location,
                options=config.options,
                id=config.id,
            )
        else:
            # Download the file
            download_config = config.model_dump()
            download_config["configuration"] = (
                config.download_config.model_dump()
            )
            output = create_strategy("download", download_config).get()

            if cacheconfig and cacheconfig.accessKey:
                key = cacheconfig.accessKey
            elif "key" in output:
                key = output["key"]
            else:
                raise RuntimeError(
                    "No data cache key provided for the downloaded content."
                )

            # See if we can extract file suffix from downloadUrl
            if config.downloadUrl:
                suffix = Path(str(config.downloadUrl)).suffix
            else:
                suffix = None

            cache = DataCache(config.datacache_config)
            with cache.getfile(key, suffix=suffix) as location:
                inst = dlite.Instance.from_location(
                    driver=driver,
                    location=str(location),
                    options=config.options,
                    id=config.id,
                )

        # Insert inst into collection
        coll = get_collection(config.collection_id)
        label = config.label if config.label else inst.uuid
        coll.add(label, inst)

        # __TODO__
        # See
        # https://github.com/EMMC-ASBL/oteapi-dlite/pull/84#discussion_r1050437185
        # and following comments.
        #
        # Since we cannot safely assume that all strategies in a
        # pipeline will be executed in the same Python interpreter,
        # the collection should be written to a storage, such that it
        # can be shared with the other strategies.

        update_collection(coll)
        return DLiteResult(collection_id=coll.uuid)

parse_config: DLiteParseParserConfig instance-attribute

get()

Execute the strategy.

This method will be called through the strategy-specific endpoint of the OTE-API Services.

Returns:

Type Description
DLiteResult

Reference to a DLite collection ID.

Source code in oteapi_dlite/strategies/parse.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def get(self) -> DLiteResult:
    """Execute the strategy.

    This method will be called through the strategy-specific endpoint
    of the OTE-API Services.

    Returns:
        Reference to a DLite collection ID.

    """
    config = self.parse_config.configuration
    cacheconfig = config.datacache_config

    driver = (
        config.driver
        if config.driver
        else get_driver(
            mediaType=self.parse_config.mediaType,
        )
    )

    # Create instance
    if config.location:
        inst = dlite.Instance.from_location(
            driver=driver,
            location=config.location,
            options=config.options,
            id=config.id,
        )
    else:
        # Download the file
        download_config = config.model_dump()
        download_config["configuration"] = (
            config.download_config.model_dump()
        )
        output = create_strategy("download", download_config).get()

        if cacheconfig and cacheconfig.accessKey:
            key = cacheconfig.accessKey
        elif "key" in output:
            key = output["key"]
        else:
            raise RuntimeError(
                "No data cache key provided for the downloaded content."
            )

        # See if we can extract file suffix from downloadUrl
        if config.downloadUrl:
            suffix = Path(str(config.downloadUrl)).suffix
        else:
            suffix = None

        cache = DataCache(config.datacache_config)
        with cache.getfile(key, suffix=suffix) as location:
            inst = dlite.Instance.from_location(
                driver=driver,
                location=str(location),
                options=config.options,
                id=config.id,
            )

    # Insert inst into collection
    coll = get_collection(config.collection_id)
    label = config.label if config.label else inst.uuid
    coll.add(label, inst)

    # __TODO__
    # See
    # https://github.com/EMMC-ASBL/oteapi-dlite/pull/84#discussion_r1050437185
    # and following comments.
    #
    # Since we cannot safely assume that all strategies in a
    # pipeline will be executed in the same Python interpreter,
    # the collection should be written to a storage, such that it
    # can be shared with the other strategies.

    update_collection(coll)
    return DLiteResult(collection_id=coll.uuid)

initialize()

Initialize.

Source code in oteapi_dlite/strategies/parse.py
139
140
141
142
143
144
145
def initialize(self) -> DLiteResult:
    """Initialize."""
    return DLiteResult(
        collection_id=get_collection(
            self.parse_config.configuration.collection_id
        ).uuid
    )