Source code for cheesechaser.datapool.nozomi

"""
This module provides a data pool implementation for Nozomi datasets.

It extends the functionality of the IncrementIDDataPool class to specifically handle
Nozomi datasets stored in a Hugging Face repository. The module defines a constant
for the repository name and a class that initializes the data pool with the
appropriate repository and revision information.

.. note::
    The dataset `deepghs/nozomi_standalone_full <https://huggingface.co/datasets/deepghs/nozomi_standalone_full>`_
    is gated, you have to get the access of it before using this module.
"""

from typing import Optional

from .base import IncrementIDDataPool

_NOZOMI_REPO = 'deepghs/nozomi_standalone_full'


[docs]class NozomiDataPool(IncrementIDDataPool): """ A data pool class specifically designed for Nozomi datasets. This class inherits from IncrementIDDataPool and initializes it with the Nozomi-specific repository information. It provides a simple way to create a data pool for Nozomi datasets with optional revision specification. :param revision: The revision of the Nozomi dataset to use, defaults to 'main' :type revision: str :param hf_token: Optional Hugging Face authentication token :type hf_token: Optional[str] Usage: >>> nozomi_pool = NozomiDataPool() # Uses the 'main' revision >>> nozomi_pool_dev = NozomiDataPool(revision='dev') # Uses the 'dev' revision >>> nozomi_pool_auth = NozomiDataPool(hf_token='your_token_here') # Uses authentication """
[docs] def __init__(self, revision: str = 'main', hf_token: Optional[str] = None): """ Initialize the NozomiDataPool with the specified revision and optional authentication token. This method sets up the data pool using the Nozomi-specific repository and the provided revision. It also allows for optional authentication using a Hugging Face token. :param revision: The revision of the Nozomi dataset to use, defaults to 'main' :type revision: str :param hf_token: Optional Hugging Face authentication token for accessing private repositories :type hf_token: Optional[str] """ IncrementIDDataPool.__init__( self, data_repo_id=_NOZOMI_REPO, data_revision=revision, idx_repo_id=_NOZOMI_REPO, idx_revision=revision, hf_token=hf_token, )