HOME


sh-3ll 1.0
DIR:/usr/local/lib64/python3.6/site-packages/pandas/tests/io/__pycache__/
Upload File :
Current File : //usr/local/lib64/python3.6/site-packages/pandas/tests/io/__pycache__/test_parquet.cpython-36.pyc
3

���h=n�@s�dZddlZddlmZddlmZddlZddlmZddl	Z
ddlZddlj
jZddlZddljZddlmZmZmZmZmZyddlZdZWnek
r�dZYnXyddlZdZWnek
r�dZYnXej j!d	�Z"ej#ej$d
ej j%edd�d
�ej$dej j%edd�d
�gd�dd��Z&ej#dd��Z'ej#dd��Z(ej#dd��Z)ej#dd��Z*ej#dd��Z+d8dd�Z,d d!�Z-d"d#�Z.d$d%�Z/d&d'�Z0d(d)�Z1d*d+�Z2d,d-�Z3d.d/�Z4Gd0d1�d1�Z5Gd2d3�d3e5�Z6Gd4d5�d5e5�Z7Gd6d7�d7e5�Z8dS)9z test parquet compat �N)�LooseVersion)�BytesIO)�catch_warnings)�FastParquetImpl�PyArrowImpl�
get_engine�read_parquet�
to_parquetTFz4ignore:RangeIndex.* is deprecated:DeprecationWarning�fastparquetzfastparquet is not installed)�reason)Zmarks�pyarrowzpyarrow is not installed)�paramscCs|jS)N)�param)�request�r�>/tmp/pip-build-5_djhm0z/pandas/pandas/tests/io/test_parquet.py�engine-srcCststjd�dS)Nzpyarrow is not installedr)�
_HAVE_PYARROW�pytest�skiprrrr�paAs
rcCststjd�dS)Nzfastparquet is not installedr
)�_HAVE_FASTPARQUETrrrrrr�fpHs
rcCstjdddgdd��S)N����foo)�A�B)�pd�	DataFramerrrr�	df_compatOsr!c	CsDtjtd�ttdd��tjdddd�dd	dgtjd
dd�d
��}|S)N�abcr�g@g@�float64)�dtypeTF�20130101r)�periods)�a�b�d�e�f)rr �list�range�np�arange�
date_range)�dfrrr�df_cross_compatTsr3cCs�tjtd�dtjdgdddgdddgddd	gttd
d��tjdd
�jd�tjdddd�dtjdgdddgtjddd�tj	d�tj
tj	d�gd��S)Nr"r(�csfoosbarsbazr�bar�bazrr#r��u1g@g@r$)r%g@g@TFr&)r'Z20130103)�stringZstring_with_nanZstring_with_none�bytes�unicode�intZuint�floatZfloat_with_nan�bool�datetimeZdatetime_with_nat)rr r-r/�nanr.r0�astyper1�	TimestampZNaTrrrr�df_fullfs

rCrc	
	s~�p
ddi��pi��dkr ��|r4|�d<|�d<�������fdd�}	�dkrrtj���|	|�WdQRXn|	|�dS)aVerify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    �compressionNrcsXxRt|�D]F}�j�f��tdd��t�f��}WdQRXtj�|��d�q
WdS)NT)�record)�check_names�
check_like)r.r	rr�tm�assert_frame_equal)�repeat�_�actual)rGrFr2�expected�path�read_kwargs�write_kwargsrr�compare�sz!check_round_trip.<locals>.compare)rH�ensure_clean)
r2rrNrPrOrMrFrGrJrQr)rGrFr2rMrNrOrPr�check_round_trip~s!

rScCs&tjt��t|dd�WdQRXdS)Nrr5)r�raises�
ValueErrorrS)r!rrr�test_invalid_engine�srVc	Cs$tjdd��t|�WdQRXdS)Nzio.parquet.enginer)r�option_contextrS)r!rrrr�test_options_py�srXc	Cs$tjdd��t|�WdQRXdS)Nzio.parquet.enginer
)rrWrS)r!rrrr�test_options_fp�srYc	Cs$tjdd��t|�WdQRXdS)Nzio.parquet.engine�auto)rrWrS)r!rrrrr�test_options_auto�sr[cCsttd�t�st�ttd�t�s$t�tjdd��<ttd�t�sDt�ttd�t�sVt�ttd�t�sht�WdQRXtjdd��<ttd�t�s�t�ttd�t�s�t�ttd�t�s�t�WdQRXtjdd��>ttd�t�s�t�ttd�t�s�t�ttd�t��st�WdQRXdS)Nrr
zio.parquet.enginerZ)�
isinstancerr�AssertionErrorrrrW)rrrrr�test_options_get_engine�sr^cCs0ddlm}|jd�}|jd�}ts(dnttj�t|�k}tsBdnttj�t|�k}to\|}tof|}|or|�r,|r�d|�d�}t	j
t|d��td	�WdQRXn&d
}t	j
t|d��td	�WdQRX|�rd|�d�}t	j
t|d��td	�WdQRXn&d}t	j
t|d��td	�WdQRXdS)
Nr)�VERSIONSrr
FzPandas requires version .z. or newer of .pyarrow.)�matchrZz%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)
Zpandas.compat._optionalr_�getrrr�__version__rr
rrT�ImportErrorr)r_Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr`rrr�"test_get_engine_auto_error_message�s0



rdcCsj|}tj��T}|j||dd�t||d�}tj||�t||ddgd�}tj||ddg�WdQRXdS)N)rrD)rr(r*)r�columns)rHrRr	rrI)r3rrr2rN�resultrrr�test_cross_engine_pa_fps
rgcCs�ttj�dkr&ttj�dkr&tjd�|}tj��j}|j||dd�tdd��Dt	||d�}tj
||�t	||dd	gd
�}tj
||dd	g�WdQRXWdQRXdS)Nz0.15z0.13z`Reading fastparquet with pyarrow in 0.14 fails: https://issues.apache.org/jira/browse/ARROW-6492)rrDT)rE)rr(r*)rre)rrrbrZxfailrHrRr	rrrI)r3rrr2rNrfrrr�test_cross_engine_fp_pa!s
rhc@seZdZdd�ZdS)�BasecCs>tj��,}tj|��t|||dd�WdQRXWdQRXdS)N)rD)rHrRrrTr	)�selfr2r�excrNrrr�check_error_on_write:s
zBase.check_error_on_writeN)�__name__�
__module__�__qualname__rlrrrrri9sric@sreZdZdd�Zdd�Zdd�Zejjddd	d
dg�dd
��Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zdd�ZdS)�	TestBasiccCsHxBtjdddg�ddtjd�tjdddg�gD]}|j||t�q.WdS)Nrrrrr&)r�SeriesrBr/�arrayrlrU)rjr�objrrr�
test_errorBszTestBasic.test_errorcCs6tjtd�ttdd��d��}ddg|_t||�dS)Nr"rr#)r9r<rr5)rr r-r.rerS)rjrr2rrr�test_columns_dtypesLs
zTestBasic.test_columns_dtypescCs�tjtd�ttdd��d��}ddg|_|j||t�ddg|_|j||t�tjddddd�tjddddd�g|_|j||t�dS)	Nr"rr#)r9r<rsfoosbari�)rr r-r.rerlrUr?)rjrr2rrr�test_columns_dtypes_invalidSs

z%TestBasic.test_columns_dtypes_invalidrDN�gzip�snappy�brotlicCsP|dkrtjd�n|dkr&tjd�tjddddgi�}t||d|id�dS)	NrxryrrrrrD)rP)r�importorskiprr rS)rjrrDr2rrr�test_compressiones
zTestBasic.test_compressioncCsJtjtd�ttdd��d��}tjdtd�i�}t|||ddgid�dS)Nr"rr#)r9r<r9re)rMrO)rr r-r.rS)rjrr2rMrrr�test_read_columnsqszTestBasic.test_read_columnscCs�|dk}tjddddgi�}t||�dddgtjddd�td	�dddgg}x:|D]2}||_t|tj�rv|jjd�|_t|||d
�qRWdddg|_d|j_	t||�dS)
Nr
rrrrr#r&)r'r")rFrr)
rr rSr1r-�indexr\Z
DatetimeIndex�
_with_freq�name)rjrrFr2�indexesr}rrr�test_write_indexzs

zTestBasic.test_write_indexcCs>|}tjddddgi�}tjjddd	g�}||_t||�dS)
Nrrrrr(r))r(r)r(r)r)r)rr �
MultiIndex�from_tuplesr}rS)rjrrr2r}rrr�test_write_multiindex�s
zTestBasic.test_write_multiindexcCs<tjjdd	d
g�}tjtjjdd�|d�}|j||t�dS)Nr(rrr)r#r)re)r(r)r(r)r)r)	rr�r�r r/�random�randnrlrU)rjrZ
mi_columnsr2rrr�test_write_column_multiindex�sz&TestBasic.test_write_column_multiindexcCs�|}tjdddd�}tjtjjdt|�d�td�d�}tjj	d	d
g|gddgd
�}|j
dd
�}x@||gD]4}||_t||�t||dddgi|ddgd�qjWdS)Nz01-Jan-2018z01-Dec-2018ZMS)�freqrr�ABC)reZLevel1ZLevel2�level�date)�namesrerr)rOrM)
rr1r r/r�r��lenr-r�Zfrom_product�copyr}rS)rjrr�datesr2Zindex1Zindex2r}rrr�test_multiindex_with_columns�s$
z&TestBasic.test_multiindex_with_columnsc	Cs�tjdddgdddgd��}ddd	�}|jd
d�}t||||d�tjdddgdddgd�d
ddgd�}t||||d�ddddddddgddddddddgg}tjttd��dd�td�D�d�|d�}|jd
d�}t||||d�dS)Nrrr�q�r�s)r(r)F)rDr}T)Zdrop)rPrMZzyxZwvuZtsr)r}r5r6rZqux�one�two�cSsg|]
}|�qSrr)�.0�irrr�
<listcomp>�sz7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r�r�)rr Zreset_indexrSr-r.)rjrr2rPrMZarraysrrr�test_write_ignoring_index�s
"&z#TestBasic.test_write_ignoring_index)rmrnrortrurvr�mark�parametrizer{r|r�r�r�r�r�rrrrrpAs
		rpc@seZdZdd�Zdd�Zdd�Zdd�Zd	d
�Zdd�Zd
d�Z	e
jd�ej
jddggg�dd���Zeje
jd�dd���Ze
jd�dd��Ze
jd�dd��Zdd�Zdd�Zdd �Zd!d"�Ze
jdd#d$�d%d&��Ze
jdd'd$�d(d)��Ze
jdd*d$�d+d,��Ze
jdd-d$�d.d/��Zd0S)1�TestParquetPyArrowcCsB|}tjdddd�}|jd�}||d<dddg|d<t||�dS)Nr&rzEurope/Brussels)r'�tz�datetime_tzTZbool_with_none)rr1r~rS)rjrrCr2�dtirrr�
test_basic�s
zTestParquetPyArrow.test_basiccCs<|}tjdddd�|d<t|||ddgdddgid	�dS)
Nr&rzEurope/Brussels)r'r�r�r9r<re)rMrO)rr1rS)rjrrCr2rrr�test_basic_subset_columns�s
z,TestParquetPyArrow.test_basic_subset_columnscCs6tjtjd�jdd�td�d�j�}|j||t�dS)N�r#r�aaa)re)	rr r/r0�reshaper-r�rlrU)rjrr2rrr�test_duplicate_columns�s$z)TestParquetPyArrow.test_duplicate_columnscCs�ttj�td�kr:tjdtjdddd�i�}|j||t�tjdtjddd�i�}|j||t	�tjddd	d
gi�}|j||t�dS)Nz
0.15.1.devr(�2013�Mr)r�r'z1 day)r'rg@)
rrrbrr �period_rangerl�	Exception�timedelta_range�NotImplementedError)rjrr2rrr�test_unsupported�sz#TestParquetPyArrow.test_unsupportedcCs�tj�}tjtd��|d<tjddddddgtjdddg�d�|d<tjddddddgddd	gd
d�|d<ttj�td�kr�t||�n|j	t
�}t|||d
�dS)NZabcdefr(r5rr6)r%r)r4r*T)�
categories�orderedz0.15.0)rM)rr �Categoricalr-ZCategoricalDtyperrrbrSrA�object)rjrr2rMrrr�test_categoricals"
z#TestParquetPyArrow.test_categoricalcCs2tjd�}|j�}t|d�}t||d||d�dS)N�s3fs)�
filesystemzpandas-test/pyarrow.parquet)rNrOrP)rrzZS3FileSystem�dictrS)rjr!�s3_resourcerr�Zs3�kwrrr�test_s3_roundtrip_explicit_fss

z0TestParquetPyArrow.test_s3_roundtrip_explicit_fscCst||dd�dS)Nz s3://pandas-test/pyarrow.parquet)rN)rS)rjr!r�rrrr�test_s3_roundtrip(sz$TestParquetPyArrow.test_s3_roundtripr��
partition_colrc	Csp|j�}ttj�td�ko*ttj�td�k}|rP|r:d}nd}||j|�||<t|||d|dd�ddd	�dS)
Nz1.0.0z2.0.0Zint32�categoryzs3://pandas-test/parquet_dir)�partition_colsrDTr)rMrNrPrGrJ)r�rrrbrArS)rjr!r�rr�Zexpected_dfZpa10Zpartition_col_typerrr�test_s3_roundtrip_for_dir,s"
z,TestParquetPyArrow.test_s3_roundtrip_for_dirrcCsd}tj|�}tj||�dS)Nzfhttps://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/io/data/parquet/simple.parquet)rrrHrI)rjr!�urlr2rrr�test_parquet_read_from_urlOs
z-TestParquetPyArrow.test_parquet_read_from_urlcCs*t�}|j|�tj|�}tj||�dS)N)rr	rrrHrI)rjr!�bufferZdf_from_bufrrr�test_read_file_like_obj_supportYs

z2TestParquetPyArrow.test_read_file_like_obj_supportcCsd|jdd�|jdd�tjtdd��tjd�WdQRXtjtdd��|jd�WdQRXdS)N�HOMEZTestingUserZUSERPROFILEz.*TestingUser.*)r`z~/file.parquet)ZsetenvrrT�OSErrorrrr	)rjr!Zmonkeypatchrrr�test_expand_user`sz#TestParquetPyArrow.test_expand_usercCstddg}|}tj��V}|j||dd�ddlj}|j|dd�}t|jj�dksRt	�|jjt
|�ksft	�WdQRXdS)Nr>r<)r�rDrF)�validate_schemar)rH�ensure_clean_dirr	�pyarrow.parquet�parquet�ParquetDatasetr��
partitions�partition_namesr]�set)rjrrCr�r2rN�pq�datasetrrr�test_partition_cols_supportedis

z0TestParquetPyArrow.test_partition_cols_supportedc	Csvd}|g}|}tj��V}|j||dd�ddlj}|j|dd�}t|jj�dksTt	�|jjt
|�ksht	�WdQRXdS)Nr>)r�rDrF)r�r)rHr�r	r�r�r�r�r�r�r]r�)	rjrrCr�Zpartition_cols_listr2rNr�r�rrr�test_partition_cols_stringus

z-TestParquetPyArrow.test_partition_cols_stringcCstj�}t||�dS)N)rr rS)rjrr2rrr�test_empty_dataframe�sz'TestParquetPyArrow.test_empty_dataframecCsVddl}tjdddgi�}|j|jd|j�d�g�}|jt�}t||d|i|d�dS)Nr�xr)�type�schema)rPrM)	rrr r��fieldZbool_rAr>rS)rjrrr2r�Zout_dfrrr�test_write_with_schema�s

z)TestParquetPyArrow.test_write_with_schemaz0.15.0)�min_versioncCs�tjtjdddgdd�tjdddgdd�tjdddgd	d�d
��}ttj�td�krZ|}n(|j|jjd�|j	jd�|j
jd
�d
�}t|||d�tjdtjddddgdd�i�}ttj�td�kr�|}n|j|jjd�d�}t|||d�dS)NrrrZInt64)r%ZUInt32r(r4r9)r(r)r4z0.16.0Zint64r�)rMr$)r()rr rqrrrbZassignr(rAr)r4rS)rjrr2rMrrr� test_additional_extension_arrays�s$ z3TestParquetPyArrow.test_additional_extension_arraysz0.16.0cCs(tjdtjdddd�i�}t||�dS)Nr*z
2012-01-01r�D)r'r�)rr r�rS)rjrr2rrr�test_additional_extension_types�sz2TestParquetPyArrow.test_additional_extension_typesz0.14cCs0tjdtjdddd�i�}t||ddid�dS)	Nr(z
2017-01-01Z1n�
)r�r'�versionz2.0)rP)rr r1rS)rjrr2rrr�test_timestamp_nanoseconds�sz-TestParquetPyArrow.test_timestamp_nanosecondsz0.17c
Cs^tjdttdd��i�}tj��$}|j||�t||dgdd�}WdQRXt|�dksZt	�dS)	Nr(rr�==F)�filtersZuse_legacy_datasetr)r(r�r)
rr r-r.rHrRr	rr�r])rjrr2rNrfrrr�test_filter_row_groups�s
z)TestParquetPyArrow.test_filter_row_groupsN)rmrnror�r�r�r�r�r�r��td�
skip_if_norr�r�r�rH�networkr�r�r�r�r�r�r�r�r�r�r�rrrrr��s*"		

r�c@s�eZdZejddd�dd��Zejjdd�dd	��Z	d
d�Z
dd
�Zdd�Zdd�Z
dd�Zdd�Zdd�Zdd�Zdd�Zdd�ZdS)�TestParquetFastParquetr
z0.3.2)r�cCsF|}tjdddd�}|jd�}||d<tjddd�|d<t||�dS)	Nr&rz
US/Eastern)r'r�r�z1 day)r'�	timedelta)rr1r~r�rS)rjrrCr2r�rrrr��s
z!TestParquetFastParquet.test_basicz
not supported)rcCs6tjtjd�jdd�td�d�j�}|j||t�dS)Nr�r#rr�)re)	rr r/r0r�r-r�rlrU)rjrr2rrrr��s$z-TestParquetFastParquet.test_duplicate_columnscCs@tjddddgi�}tjddtjdgidd�}t|||d�dS)	Nr(TFg�?gZfloat16)r%)rM)rr r/r@rS)rjrr2rMrrr�test_bool_with_none�sz*TestParquetFastParquet.test_bool_with_nonecCsNtjdtjdddd�i�}|j||t�tjddddgi�}|j||t�dS)Nr(r�r�r)r�r'rg@)rr r�rlrU)rjrr2rrrr��sz'TestParquetFastParquet.test_unsupportedcCs&tjdtjtd��i�}t||�dS)Nr(r")rr r�r-rS)rjrr2rrrr��sz'TestParquetFastParquet.test_categoricalc
Csfdttdd��i}tj|�}tj��(}|j||ddd�t||dgd�}WdQRXt|�dksbt	�dS)	Nr(rrr)rDZrow_group_offsets�==)r�)r(r�r)
r-r.rr rHrRr	rr�r])rjrr*r2rNrfrrrr��s

z-TestParquetFastParquet.test_filter_row_groupscCst||dd�dS)Nz$s3://pandas-test/fastparquet.parquet)rN)rS)rjr!r�rrrrr��sz(TestParquetFastParquet.test_s3_roundtripc
Cslddg}|}tj��N}|j|d|dd�tjj|�s8t�ddl}|j|d�j	}t
|�dks^t�WdQRXdS)Nr>r<r
)rr�rDrFr)rHr�r	�osrN�existsr]r
�ParquetFile�catsr�)rjrrCr�r2rNr
�actual_partition_colsrrrr��s
z4TestParquetFastParquet.test_partition_cols_supportedc
Cshd}|}tj��N}|j|d|dd�tjj|�s4t�ddl}|j|d�j	}t
|�dksZt�WdQRXdS)Nr>r
)rr�rDrFr)rHr�r	r�rNr�r]r
r�r�r�)rjrrCr�r2rNr
r�rrrr�
s
z1TestParquetFastParquet.test_partition_cols_stringc
Cslddg}|}tj��N}|j|dd|d�tjj|�s8t�ddl}|j|d�j	}t
|�dks^t�WdQRXdS)Nr>r<r
)rrD�partition_onrFr)rHr�r	r�rNr�r]r
r�r�r�)rjrrCr�r2rNr
r�rrr�test_partition_on_supporteds
z2TestParquetFastParquet.test_partition_on_supportedcCsNddg}|}tjt��.tj��}|j|dd||d�WdQRXWdQRXdS)Nr>r<r
)rrDr�r�)rrTrUrHr�r	)rjrrCr�r2rNrrr�3test_error_on_using_partition_cols_and_partition_on/s
zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_oncCs*tj�}|j�}d|j_t|||d�dS)Nr})rM)rr r�r}rrS)rjrr2rMrrrr�=sz+TestParquetFastParquet.test_empty_dataframeN)rmrnror�r�r�rr�rr�r�r�r�r�r�r�r�r�r�r�rrrrr��s

r�)NNNNNTFr)9�__doc__r?Zdistutils.versionr�iorr��warningsrZnumpyr/rZpandas.util._test_decorators�utilZ_test_decoratorsr�ZpandasrZpandas._testingZ_testingrHZpandas.io.parquetrrrrr	rrrcr
rr��filterwarningsZ
pytestmarkZfixturerZskipifrrrr!r3rCrSrVrXrYr[r^rdrgrhrirpr�r�rrrr�<module>sv


	
3+z