HOME

sh-3ll 1.0
DIR:/opt/cloudlinux/venv/lib64/python3.11/site-packages/pandas/io/__pycache__/
Current File : //opt/cloudlinux/venv/lib64/python3.11/site-packages/pandas/io/__pycache__/parquet.cpython-311.pyc
�

���hY\�� �dZddlmZddlZddlZddlZddlmZmZm	Z	ddl
Z
ddl
mZmZddl
mZddlmZddlmZdd	lmZdd
lmZddlmZddlmZdd
lmZmZddlmZddl m!Z!ddl"m#Z#m$Z$m%Z%m&Z&m'Z'erddl(m)Z)m*Z*m+Z+m,Z,m-Z-dAd�Z.			dBdCd#�Z/Gd$�d��Z0Gd%�d&e0��Z1Gd'�d(e0��Z2eed�)��							dDdEd7���Z3eed�)��d*ddej4ej4ddfdFd@���Z5dS)Gz parquet compat �)�annotationsN)�
TYPE_CHECKING�Any�Literal)�catch_warnings�filterwarnings)�_get_option)�lib)�import_optional_dependency��AbstractMethodError)�doc)�find_stack_level)�check_dtype_backend)�	DataFrame�
get_option)�_shared_docs)�arrow_table_to_pandas)�	IOHandles�
get_handle�
is_fsspec_url�is_url�stringify_path)�DtypeBackend�FilePath�
ReadBuffer�StorageOptions�WriteBuffer�engine�str�return�BaseImplc�d�|dkrtd��}|dkr_ttg}d}|D]:}	|��cS#t$r}|dt	|��zz
}Yd}~�3d}~wwxYwtd|�����|dkrt��S|dkrt��Std	���)
zreturn our implementation�autozio.parquet.engine�z
 - Nz�Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:�pyarrow�fastparquetz.engine must be one of 'pyarrow', 'fastparquet')r�PyArrowImpl�FastParquetImpl�ImportErrorr �
ValueError)r�engine_classes�
error_msgs�engine_class�errs     �H/opt/cloudlinux/venv/lib64/python3.11/site-packages/pandas/io/parquet.py�
get_enginer14s���
�����/�0�0��
����%��7���
�*�	1�	1�L�
1�#�|�~�~�%�%�%���
1�
1�
1��g��C���0�0�
�
�
�
�
�
�����
1�����
��

�
�
�
�	
������}�}��	�=�	 �	 �� � � �
�E�
F�
F�Fs�	=�
A&�A!�!A&�rbF�path�1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]�fsr�storage_options�StorageOptions | None�mode�is_dir�bool�Vtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any]c�`�t|��}|��tdd���}tdd���}|�'t||j��r|rt	d���nA|�t||jj��rn$tdt|��j	�����t|��r�|��|�Ttd��}td��}	|j�|��\}}n#t|j
f$rYnwxYw|�'td��}|jj|fi|pi��\}}n&|r$t!|��r|d	krtd
���d}	|sR|sPt|t"��r;t$j�|��st+||d|���}	d}|	j}||	|fS)
zFile handling for PyArrow.Nz
pyarrow.fs�ignore)�errors�fsspecz8storage_options not supported with a pyarrow FileSystem.z9filesystem must be a pyarrow or fsspec FileSystem, not a r&r2z8storage_options passed with buffer, or non-supported URLF��is_textr6)rr�
isinstance�
FileSystem�NotImplementedError�spec�AbstractFileSystemr+�type�__name__r�from_uri�	TypeError�ArrowInvalid�core�	url_to_fsrr �osr3�isdirr�handle)
r3r5r6r8r9�path_or_handle�pa_fsr?�pa�handless
          r0�_get_path_or_handlerUVs2��$�D�)�)�N�	�~�*�<��I�I�I��+�H�X�F�F�F�����B��0@�!A�!A���
�)�N����
��
�J�r�6�;�3Q�$R�$R�
���-��b���*�-�-���
��^�$�$�U����"�+�I�6�6�B�.�|�<�<�E�
�%*�%5�%>�%>�t�%D�%D�"��N�N���r��/�
�
�
���
����
�:�/��9�9�F�!6���!6��"�"�#2�#8�b�"�"��B���
�U�&��"8�"8�U�D�D�L�L��S�T�T�T��G��
(��
(�
�~�s�+�+�
(���
�
�n�-�-�	
(���D�%��
�
�
���� ����7�B�&�&s�C.�.D�Dc�8�eZdZed	d���Zd
d�Zddd�ZdS)
r"�dfrr!�Nonec�N�t|t��std���dS)Nz+to_parquet only supports IO with DataFrames)rBrr+)rWs r0�validate_dataframezBaseImpl.validate_dataframe�s0���"�i�(�(�	L��J�K�K�K�	L�	L�c� �t|����Nr)�selfrWr3�compression�kwargss     r0�writezBaseImpl.write����!�$�'�'�'r[Nc� �t|���r]r)r^r3�columnsr`s    r0�readz
BaseImpl.read�rbr[)rWrr!rX)rWrr])r!r)rH�
__module__�__qualname__�staticmethodrZrare�r[r0r"r"�sc�������L�L�L��\�L�(�(�(�(�(�(�(�(�(�(�(r[c�J�eZdZdd�Z					ddd�Zdddejddfdd�ZdS)r(r!rXc�F�tdd���ddl}ddl}||_dS)Nr&z(pyarrow is required for parquet support.��extrar)r�pyarrow.parquet�(pandas.core.arrays.arrow.extension_types�api)r^r&�pandass   r0�__init__zPyArrowImpl.__init__�sF��"��G�	
�	
�	
�	
�	����	8�7�7�7�����r[�snappyNrWrr3�FilePath | WriteBuffer[bytes]r_�
str | None�index�bool | Noner6r7�partition_cols�list[str] | Nonec�R�|�|��d|�dd��i}	|�||	d<|jjj|fi|	��}
|jrBdt
j|j��i}|
jj	}i|�|�}
|
�
|
��}
t|||d|du���\}}}t|tj��rlt|d��r\t|jt"t$f��r;t|jt$��r|j���}n|j}	|�|jjj|
|f|||d�|��n|jjj|
|f||d�|��|�|���dSdS#|�|���wwxYw)	N�schema�preserve_index�PANDAS_ATTRS�wb)r6r8r9�name)r_rx�
filesystem)r_r�)rZ�poprp�Table�from_pandas�attrs�json�dumpsr{�metadata�replace_schema_metadatarUrB�io�BufferedWriter�hasattrrr �bytes�decode�parquet�write_to_dataset�write_table�close)r^rWr3r_rvr6rxr�r`�from_pandas_kwargs�table�df_metadata�existing_metadata�merged_metadatarQrTs                r0razPyArrowImpl.write�s(��	
����#�#�#�.6��
�
�8�T�8R�8R�-S����38��/�0�*����*�2�D�D�1C�D�D��
�8�	C�)�4�:�b�h�+?�+?�@�K� %�� 5��B�!2�B�k�B�O��1�1�/�B�B�E�.A���+��!��-�/
�/
�/
�+����
�~�r�'8�9�9�	5����/�/�	5��>�.��e��=�=�	5�
�.�-�u�5�5�
5�!/�!4�!;�!;�!=�!=���!/�!4��	 ��)�1��� �1��"��!,�#1�)����
����-��� �,��"��!,�)�	��
�����"��
�
������#�"��w�"��
�
�����#���s�7<F
�
F&F�use_nullable_dtypesr:�
dtype_backend�DtypeBackend | lib.NoDefaultc�z�d|d<i}	tdd���}
|
dkrd|	d<t|||d���\}}}	|jjj|f|||d	�|��}
t��5t
d
dt��t|
||	���}ddd��n#1swxYwY|
dkr|�	dd
���}|
j
jr9d|
j
jvr+|
j
jd}tj
|��|_||�|���SS#|�|���wwxYw)NT�use_pandas_metadatazmode.data_manager)�silent�array�split_blocksr2)r6r8)rdr��filtersr=zmake_block is deprecated)r��to_pandas_kwargsF)�copysPANDAS_ATTRS)r	rUrpr��
read_tablerr�DeprecationWarningr�_as_managerr{r�r��loadsr�r�)r^r3rdr�r�r�r6r�r`r��managerrQrT�pa_table�resultr�s                r0rezPyArrowImpl.read�s���)-��$�%����1�$�?�?�?���g���/3��^�,�.A���+��	/
�/
�/
�+����	 �2�t�x�'�2����%��	��
���H� �!�!�

�

���.�&����
/��"/�%5�����

�

�

�

�

�

�

�

�

�

�

����

�

�

�

��'�!�!��+�+�G�%�+�@�@����'�
;�"�h�o�&>�>�>�"*�/�":�?�"K�K�#'�:�k�#:�#:�F�L���"��
�
�����#��w�"��
�
�����#���s0�*D!�&)B�D!�B�D!�"B�#A&D!�!D:�r!rX�rsNNNN)rWrr3rtr_rurvrwr6r7rxryr!rX)r�r:r�r�r6r7r!r)rHrfrgrrrar
�
no_defaultrerir[r0r(r(�s�������	�	�	�	�#+�!�15�+/��@ �@ �@ �@ �@ �J��$)�69�n�15��7 �7 �7 �7 �7 �7 �7 r[r(c�<�eZdZdd�Z					ddd�Z				ddd
�ZdS)r)r!rXc�6�tdd���}||_dS)Nr'z,fastparquet is required for parquet support.rl)rrp)r^r's  r0rrzFastParquetImpl.__init__+s+��1��!O�
�
�
������r[rsNrWrr_�*Literal['snappy', 'gzip', 'brotli'] | Noner6r7c����	�|�|��d|vr|�td���d|vr|�d��}|�d|d<|�td���t	|��}t|��rt
d���	�	�fd�|d<n�rtd	���td
���5|jj	||f|||d�|��ddd��dS#1swxYwYdS)
N�partition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning data�hive�file_scheme�9filesystem is not implemented for the fastparquet engine.r?c�J���j|dfi�pi�����S)Nr~)�open)r3�_r?r6s  ��r0�<lambda>z'FastParquetImpl.write.<locals>.<lambda>Vs8���+�&�+��d�3�3�.�4�"�3�3��d�f�f�r[�	open_withz?storage_options passed with file object or non-fsspec file pathT)�record)r_�write_indexr�)
rZr+r�rDrrrrrpra)
r^rWr3r_rvrxr6r�r`r?s
      `  @r0razFastParquetImpl.write3s�����	
����#�#�#��V�#�#��(B��K���
��V�#�#�#�Z�Z��7�7�N��%�$*�F�=�!��!�%�K���
�
�d�#�#������
	�/��9�9�F�#�#�#�#�#�F�;����	��Q���
��4�
(�
(�
(�	�	��D�H�N���
�(�!�+�
�
��

�
�
�	�	�	�	�	�	�	�	�	�	�	�	����	�	�	�	�	�	s�6C�C �#C c���i}|�dd��}|�dtj��}	d|d<|rtd���|	tjurtd���|�t	d���t|��}d}
t
|��r)td��}|j|d	fi|pi��j	|d
<nNt|t��r9tj
�|��st|d	d|���}
|
j}	|jj|fi|��}|jd
||d�|��|
�|
���SS#|
�|
���wwxYw)Nr�Fr��pandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginezHThe 'dtype_backend' argument is not supported for the fastparquet enginer�r?r2r5r@)rdr�ri)r�r
r�r+rDrrrr�r5rBr rNr3rOrrPrp�ParquetFile�	to_pandasr�)
r^r3rdr�r6r�r`�parquet_kwargsr�r�rTr?�parquet_files
             r0rezFastParquetImpl.readhs���*,��$�j�j�)>��F�F���
�
�?�C�N�C�C�
�).��~�&��	��%���
����.�.��%���
��!�%�K���
��d�#�#��������	"�/��9�9�F�#.�6�;�t�T�#U�#U�o�>S�QS�#U�#U�#X�N�4� � �
��c�
"�
"�	"�2�7�=�=��+>�+>�	"�!��d�E�?����G��>�D�	 �/�4�8�/��G�G��G�G�L�)�<�)�U�'�7�U�U�f�U�U��"��
�
�����#��w�"��
�
�����#���s�"E�E(r�r�)rWrr_r�r6r7r!rX)NNNN)r6r7r!r)rHrfrgrrrarerir[r0r)r)*s|����������CK���15��3�3�3�3�3�p��15��
0 �0 �0 �0 �0 �0 �0 r[r))r6r$rsrWr�$FilePath | WriteBuffer[bytes] | Noner_rurvrwrxryr��bytes | Nonec	��t|t��r|g}t|��}	|�tj��n|}
|	j||
f|||||d�|��|�0t|
tj��sJ�|
���SdS)a�	
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str, path object, file-like object, or None, default None
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``write()`` function. If None, the result is
        returned as bytes. If a string, it will be used as Root Directory path
        when writing a partitioned dataset. The engine fastparquet does not
        accept file-like objects.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
        default 'snappy'. Name of the compression to use. Use ``None``
        for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    N)r_rvrxr6r�)rBr r1r��BytesIOra�getvalue)rWr3rr_rvr6rxr�r`�impl�path_or_bufs           r0�
to_parquetr��s���B�.�#�&�&�*�(�)���f���D�AE�������SW�K��D�J�
��	� ��%�'��	�	��	�	�	��|��+�r�z�2�2�2�2�2��#�#�%�%�%��tr[�FilePath | ReadBuffer[bytes]rdr��bool | lib.NoDefaultr�r�r��&list[tuple] | list[list[tuple]] | Nonec
���t|��}	|tjur4d}
|dur|
dz
}
tj|
t
t
�����nd}t|��|	j|f||||||d�|��S)a�
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``read()`` function.
        The string could be a URL. Valid URL schemes include http, ftp, s3,
        gs, and file. For file URLs, a host is expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    columns : list, default=None
        If not None, only these columns will be read from the file.
    {storage_options}

        .. versionadded:: 1.3.0

    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame. (only applicable for the ``pyarrow``
        engine)
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. deprecated:: 2.0

    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
        Back-end data type applied to the resultant :class:`DataFrame`
        (still experimental). Behaviour is as follows:

        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
          (default).
        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
          DataFrame.

        .. versionadded:: 2.0

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    filters : List[Tuple] or List[List[Tuple]], default None
        To filter out data.
        Filter syntax: [[(column, op, val), ...],...]
        where op is [==, =, >, >=, <, <=, !=, in, not in]
        The innermost tuples are transposed into a set of filters applied
        through an `AND` operation.
        The outer list combines these sets of filters through an `OR`
        operation.
        A single list of tuples can also be used, meaning that no `OR`
        operation between set of filters is to be conducted.

        Using this argument will NOT result in row-wise filtering of the final
        partitions unless ``engine="pyarrow"`` is also specified.  For
        other engines, filtering is only performed at the partition level, that is,
        to prevent the loading of some row-groups and/or files.

        .. versionadded:: 2.1.0

    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame

    See Also
    --------
    DataFrame.to_parquet : Create a parquet object that serializes a DataFrame.

    Examples
    --------
    >>> original_df = pd.DataFrame(
    ...     {{"foo": range(5), "bar": range(5, 10)}}
    ...    )
    >>> original_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> df_parquet_bytes = original_df.to_parquet()
    >>> from io import BytesIO
    >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes))
    >>> restored_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> restored_df.equals(original_df)
    True
    >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"])
    >>> restored_bar
        bar
    0    5
    1    6
    2    7
    3    8
    4    9
    >>> restored_bar.equals(original_df[['bar']])
    True

    The function uses `kwargs` that are passed directly to the engine.
    In the following example, we use the `filters` argument of the pyarrow
    engine to filter the rows of the DataFrame.

    Since `pyarrow` is the default engine, we can omit the `engine` argument.
    Note that the `filters` argument is implemented by the `pyarrow` engine,
    which can benefit from multithreading and also potentially be more
    economical in terms of memory.

    >>> sel = [("foo", ">", 2)]
    >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel)
    >>> restored_part
        foo  bar
    0    3    8
    1    4    9
    zYThe argument 'use_nullable_dtypes' is deprecated and will be removed in a future version.TzFUse dtype_backend='numpy_nullable' instead of use_nullable_dtype=True.)�
stacklevelF)rdr�r6r�r�r�)	r1r
r��warnings�warn�
FutureWarningrrre)r3rrdr6r�r�r�r�r`r��msgs           r0�read_parquetr��s���r�f���D��#�.�0�0�
#�	��$�&�&��X�
�C�	�
�c�=�5E�5G�5G�H�H�H�H�H�#���
�&�&�&��4�9��	���'�/�#��	�	��	�	�	r[)rr r!r")Nr2F)r3r4r5rr6r7r8r r9r:r!r;)Nr$rsNNNN)rWrr3r�rr r_rurvrwr6r7rxryr�rr!r�)r3r�rr rdryr6r7r�r�r�r�r�rr�r�r!r)6�__doc__�
__future__rr�r�rN�typingrrrr�rr�pandas._config.configr	�pandas._libsr
�pandas.compat._optionalr�
pandas.errorsr
�pandas.util._decoratorsr�pandas.util._exceptionsr�pandas.util._validatorsrrqrr�pandas.core.shared_docsr�pandas.io._utilr�pandas.io.commonrrrrr�pandas._typingrrrrrr1rUr"r(r)r�r�r�rir[r0�<module>r�s�����"�"�"�"�"�"�	�	�	�	�����	�	�	�	�����������
������������
.�-�-�-�-�-�������>�>�>�>�>�>�-�-�-�-�-�-�'�'�'�'�'�'�4�4�4�4�4�4�7�7�7�7�7�7���������1�0�0�0�0�0�1�1�1�1�1�1�������������������������������G�G�G�G�J.2���<'�<'�<'�<'�<'�~
(�
(�
(�
(�
(�
(�
(�
(�E �E �E �E �E �(�E �E �E �Pn �n �n �n �n �h�n �n �n �b��\�"3�4�5�5�5�26��&��-1�'+��U�U�U�U�6�5�U�p��\�"3�4�5�5�5�� $�-1�03��25�.��6:�q�q�q�q�6�5�q�q�qr[