Source code for cheesechaser.pipe.image

"""
This module provides image processing pipes for retrieving and handling image resources.

It includes two main classes:

1. ``SimpleImagePipe``: For retrieving single image files.
2. ``DataAttachedImagePipe``: For retrieving image files along with associated JSON data.

These pipes work with a resource pool to mock and access resources, and handle various
scenarios such as missing files, multiple files, and attached data.

The module also defines a DataAttachedImage dataclass to represent images with optional
associated data.

Usage:
    >>> from cheesechaser.pipe import SimpleImagePipe, DataAttachedImagePipe
    >>>
    >>> # Create and use a SimpleImagePipe
    >>> simple_pipe = SimpleImagePipe(pool)
    >>> image = simple_pipe.retrieve(resource_id, resource_metainfo)
    >>>
    >>> # Create and use a DataAttachedImagePipe
    >>> data_pipe = DataAttachedImagePipe(pool)
    >>> image_with_data = data_pipe.retrieve(resource_id, resource_metainfo)
"""

import json
import mimetypes
import os
import warnings
from dataclasses import dataclass
from typing import Optional

from PIL import Image

from .base import Pipe
from ..datapool import ResourceNotFoundError, InvalidResourceDataError


[docs]class SimpleImagePipe(Pipe): """ A pipe for retrieving single image files from a resource pool. This pipe searches for image files in the given resource, opens the first found image file, and returns it as a PIL Image object. :raises ResourceNotFoundError: If no image file is found in the resource. :raises InvalidResourceDataError: If multiple image files are found in the resource. """
[docs] def retrieve(self, resource_id, resource_metainfo): """ Retrieve an image from the resource pool. :param resource_id: The identifier of the resource to retrieve. :param resource_metainfo: Metadata information about the resource. :return: A PIL Image object of the retrieved image. :rtype: PIL.Image.Image :raises ResourceNotFoundError: If no image file is found. :raises InvalidResourceDataError: If multiple image files are found. """ with self.pool.mock_resource(resource_id, resource_metainfo) as (td, resource_metainfo): files = os.listdir(td) image_files = [] for file in files: mimetype, _ = mimetypes.guess_type(file) if not mimetype or mimetype.startswith('image/'): image_files.append(file) if len(image_files) == 0: raise ResourceNotFoundError(f'Image not found for resource {resource_id!r}.') elif len(image_files) != 1: raise InvalidResourceDataError(f'Image file not unique for resource {resource_id!r} ' f'- {image_files!r}.') src_file = os.path.join(td, image_files[0]) image = Image.open(src_file) image.load() return image
[docs]@dataclass class DataAttachedImage: """ A dataclass representing an image with optional attached data. :param image: The PIL Image object. :param data: Optional dictionary containing additional data associated with the image. """ image: Image.Image data: Optional[dict]
[docs]class DataAttachedImagePipe(Pipe): """ A pipe for retrieving image files along with associated JSON data from a resource pool. This pipe searches for an image file and an optional JSON file in the given resource. It returns a DataAttachedImage object containing the image and any associated data. :raises ResourceNotFoundError: If no image file is found in the resource. :raises InvalidResourceDataError: If multiple image files or JSON files are found in the resource. """
[docs] def retrieve(self, resource_id, resource_metainfo): """ Retrieve an image and its associated data from the resource pool. :param resource_id: The identifier of the resource to retrieve. :param resource_metainfo: Metadata information about the resource. :return: A DataAttachedImage object containing the image and any associated data. :rtype: DataAttachedImage :raises ResourceNotFoundError: If no image file is found. :raises InvalidResourceDataError: If multiple image files or JSON files are found. """ with self.pool.mock_resource(resource_id, resource_metainfo) as (td, resource_metainfo): files = os.listdir(td) if len(files) == 0: raise ResourceNotFoundError(f'Image not found for resource {resource_id!r}.') else: json_files = [file for file in files if file.lower().endswith('.json')] non_json_files = [file for file in files if not file.lower().endswith('.json')] if not non_json_files: raise ResourceNotFoundError( f'Image file not found, but json data found for resource {resource_id!r}.') elif len(non_json_files) > 1: raise InvalidResourceDataError(f'Image files not unique for resource {resource_id!r}.') else: image_file = os.path.join(td, non_json_files[0] if non_json_files else None) if not json_files: warnings.warn(f'Json data file not found for resource {resource_id!r}.') json_file = None elif len(json_files) > 1: raise InvalidResourceDataError(f'Json data files not unique for resource {resource_id!r}.') else: json_file = os.path.join(td, json_files[0] if json_files else None) image = Image.open(image_file) image.load() if json_file: with open(json_file, 'r') as f: json_data = json.load(f) else: json_data = None return DataAttachedImage(image, json_data)