Skip to content

image

Strategy class for image/jpg.

ImageDataParseStrategy

Parse strategy for images.

This strategy uses Pillow to read a raw image from the data cache, converts it into a NumPy array and stores the new array in the data cache.

It also supports simple cropping and image conversions.

The key to the new array and other metadata is stored in the session. See SessionUpdateImageParse for more info.

Registers strategies:

  • ("mediaType", "image/jpg")
  • ("mediaType", "image/jpeg")
  • ("mediaType", "image/jp2")
  • ("mediaType", "image/png")
  • ("mediaType", "image/gif")
  • ("mediaType", "image/tiff")
  • ("mediaType", "image/eps")
Source code in oteapi/strategies/parse/image.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
@dataclass
class ImageDataParseStrategy:
    """Parse strategy for images.

    This strategy uses Pillow to read a raw image from the data cache,
    converts it into a NumPy array and stores the new array in the
    data cache.

    It also supports simple cropping and image conversions.

    The key to the new array and other metadata is stored in the session. See
    [`SessionUpdateImageParse`][oteapi.strategies.parse.image.SessionUpdateImageParse]
    for more info.

    **Registers strategies**:

    - `("mediaType", "image/jpg")`
    - `("mediaType", "image/jpeg")`
    - `("mediaType", "image/jp2")`
    - `("mediaType", "image/png")`
    - `("mediaType", "image/gif")`
    - `("mediaType", "image/tiff")`
    - `("mediaType", "image/eps")`

    """

    parse_config: ImageParserResourceConfig

    def initialize(self, session: "Optional[Dict[str, Any]]" = None) -> SessionUpdate:
        """Initialize strategy."""
        return SessionUpdate()

    def get(
        self, session: "Optional[Dict[str, Any]]" = None
    ) -> SessionUpdateImageParse:
        """Execute the strategy."""
        if not session:
            session = {}

        config = self.parse_config.configuration
        crop = config.crop if config.crop else session.get("imagecrop")

        mime_format = self.parse_config.mediaType.split("/")[1]
        image_format = SupportedFormat[mime_format].value

        # Proper download configurations
        conf = self.parse_config.dict()
        conf["configuration"] = config.download_config or {}
        download_config = ResourceConfig(**conf)

        downloader = create_strategy("download", download_config)
        session.update(downloader.initialize(session))

        downloader = create_strategy("download", download_config)
        output = downloader.get(session)
        session.update(output)

        if config.datacache_config and config.datacache_config.accessKey:
            cache_key = config.datacache_config.accessKey
        elif "key" in output:
            cache_key = output["key"]
        else:
            RuntimeError("No data cache key provided to the downloaded content")

        cache = DataCache(config.datacache_config)

        # Treat image according to filter values
        with cache.getfile(cache_key, suffix=mime_format) as filename:
            image = Image.open(filename, formats=[image_format])
            if crop:
                image = image.crop(crop)
            if config.image_mode:
                image = image.convert(mode=config.image_mode)

            if image_format == "GIF":
                if image.info.get("version", b"").startswith(b"GIF"):
                    image.info.update(
                        {"version": image.info.get("version", b"")[len(b"GIF") :]}
                    )

            # Use the buffer protocol to store the image in the datacache
            data = np.asarray(image)
            image_key = cache.add(
                data,
                key=config.image_key,
                tag=str(id(session)),
            )

            if image.mode == "P":
                image_palette_key = cache.add(
                    np.asarray(image.getpalette()), tag=str(id(session))
                )
            else:
                image_palette_key = None

            # The session must be json serialisable - filter out all
            # non-json serialisable fields in image.info
            if image.info:
                image_info = {
                    key: val
                    for key, val in image.info.items()
                    if isinstance(val, (str, int, float, type(None), bool, tuple, list))
                }
            else:
                image_info = {}

            session_update = SessionUpdateImageParse(
                image_key=image_key,
                image_size=image.size,
                image_mode=image.mode,
                image_palette_key=image_palette_key,
                image_info=image_info,
            )

            # Explicitly close the image to avoid crashes on Windows
            image.close()

        return session_update

get(session=None)

Execute the strategy.

Source code in oteapi/strategies/parse/image.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
def get(
    self, session: "Optional[Dict[str, Any]]" = None
) -> SessionUpdateImageParse:
    """Execute the strategy."""
    if not session:
        session = {}

    config = self.parse_config.configuration
    crop = config.crop if config.crop else session.get("imagecrop")

    mime_format = self.parse_config.mediaType.split("/")[1]
    image_format = SupportedFormat[mime_format].value

    # Proper download configurations
    conf = self.parse_config.dict()
    conf["configuration"] = config.download_config or {}
    download_config = ResourceConfig(**conf)

    downloader = create_strategy("download", download_config)
    session.update(downloader.initialize(session))

    downloader = create_strategy("download", download_config)
    output = downloader.get(session)
    session.update(output)

    if config.datacache_config and config.datacache_config.accessKey:
        cache_key = config.datacache_config.accessKey
    elif "key" in output:
        cache_key = output["key"]
    else:
        RuntimeError("No data cache key provided to the downloaded content")

    cache = DataCache(config.datacache_config)

    # Treat image according to filter values
    with cache.getfile(cache_key, suffix=mime_format) as filename:
        image = Image.open(filename, formats=[image_format])
        if crop:
            image = image.crop(crop)
        if config.image_mode:
            image = image.convert(mode=config.image_mode)

        if image_format == "GIF":
            if image.info.get("version", b"").startswith(b"GIF"):
                image.info.update(
                    {"version": image.info.get("version", b"")[len(b"GIF") :]}
                )

        # Use the buffer protocol to store the image in the datacache
        data = np.asarray(image)
        image_key = cache.add(
            data,
            key=config.image_key,
            tag=str(id(session)),
        )

        if image.mode == "P":
            image_palette_key = cache.add(
                np.asarray(image.getpalette()), tag=str(id(session))
            )
        else:
            image_palette_key = None

        # The session must be json serialisable - filter out all
        # non-json serialisable fields in image.info
        if image.info:
            image_info = {
                key: val
                for key, val in image.info.items()
                if isinstance(val, (str, int, float, type(None), bool, tuple, list))
            }
        else:
            image_info = {}

        session_update = SessionUpdateImageParse(
            image_key=image_key,
            image_size=image.size,
            image_mode=image.mode,
            image_palette_key=image_palette_key,
            image_info=image_info,
        )

        # Explicitly close the image to avoid crashes on Windows
        image.close()

    return session_update

initialize(session=None)

Initialize strategy.

Source code in oteapi/strategies/parse/image.py
139
140
141
def initialize(self, session: "Optional[Dict[str, Any]]" = None) -> SessionUpdate:
    """Initialize strategy."""
    return SessionUpdate()

ImageParserConfig

Bases: AttrDict

Configuration data model for ImageDataParseStrategy.

Source code in oteapi/strategies/parse/image.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class ImageParserConfig(AttrDict):
    """Configuration data model for
    [`ImageDataParseStrategy`][oteapi.strategies.parse.image.ImageDataParseStrategy]."""

    crop: Optional[Tuple[int, int, int, int]] = Field(
        None,
        description="Box cropping parameters (left, top, right, bottom).",
    )
    datacache_config: Optional[DataCacheConfig] = Field(
        None,
        description="Configuration options for the local data cache.",
    )
    download_config: AttrDict = Field(
        AttrDict(),
        description="Configurations passed to the downloader.",
    )
    image_key: Optional[str] = Field(
        None,
        description="Key to use when storing the image data in datacache.",
    )
    image_mode: Optional[str] = Field(
        None,
        description=(
            "Pillow mode to convert image into. See "
            "https://pillow.readthedocs.io/en/stable/handbook/concepts.html "
            "for details."
        ),
    )

ImageParserResourceConfig

Bases: ResourceConfig

Image parse strategy resource config.

Source code in oteapi/strategies/parse/image.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class ImageParserResourceConfig(ResourceConfig):
    """Image parse strategy resource config."""

    mediaType: Union[
        Literal["image/jpg"],
        Literal["image/jpeg"],
        Literal["image/jp2"],
        Literal["image/png"],
        Literal["image/gif"],
        Literal["image/tiff"],
        Literal["image/eps"],
    ] = Field(
        ...,
        description=ResourceConfig.__fields__["mediaType"].field_info.description,
    )
    configuration: ImageParserConfig = Field(
        ImageParserConfig(),
        description="Image parse strategy-specific configuration.",
    )

SessionUpdateImageParse

Bases: SessionUpdate

Configuration model for ImageParse.

See Pillow handbook for more details on image_mode, image_palette, and image_info.

Source code in oteapi/strategies/parse/image.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class SessionUpdateImageParse(SessionUpdate):
    """Configuration model for ImageParse.

    See [Pillow handbook](https://pillow.readthedocs.io/en/stable/handbook/concepts.html) for more details
    on `image_mode`, `image_palette`, and `image_info`.
    """

    image_key: str = Field(
        ...,
        description="Key with which the image content is stored in the data cache.",
    )
    image_size: Tuple[int, int] = Field(
        ...,
        description="Image size (width, height).",
    )
    image_mode: str = Field(
        ...,
        description="Image mode. Examples: 'L', 'P', 'RGB', 'RGBA'...",
    )
    image_palette_key: Optional[str] = Field(
        None,
        description="Datacache key for colour palette if mode is 'P'.",
    )
    image_info: dict = Field(
        {},
        description="Additional information about the image.",
    )

SupportedFormat

Bases: Enum

Supported formats for ImageDataParseStrategy.

Source code in oteapi/strategies/parse/image.py
70
71
72
73
74
75
76
77
78
79
class SupportedFormat(Enum):
    """Supported formats for `ImageDataParseStrategy`."""

    jpeg = "JPEG"
    jpg = "JPEG"
    jp2 = "JPEG2000"
    png = "PNG"
    gif = "GIF"
    tiff = "TIFF"
    eps = "EPS"