HOME


sh-3ll 1.0
DIR:/usr/local/lib64/python3.6/site-packages/pandas/io/sas/__pycache__/
Upload File :
Current File : //usr/local/lib64/python3.6/site-packages/pandas/io/sas/__pycache__/sas7bdat.cpython-36.pyc
3

���h{o�@s�dZddlmZddlmZmZddlZddlZddlm	Z	m
Z
ddlZddl
mZddlmZddljjjZddlmZejeejd	�d
d�ZGdd
�d
�ZGdd�d�ZGdd�deej�ZdS)a�
Read SAS7BDAT files

Based on code written by Jared Hobbs:
  https://bitbucket.org/jaredhobbs/sas7bdat

See also:
  https://github.com/BioStatMatt/sas7bdat

Partial documentation of the file format:
  https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf

Reference for binary data compression:
  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
�)�abc)�datetime�	timedeltaN)�EmptyDataError�OutOfBoundsDatetime)�get_filepath_or_buffer)�Parser)�
ReaderBase)�
sas_datetimes�unit�returncCs^ytj||dd�Stk
rX|dkr6|jdd��S|dkrL|jdd��Std��YnXd	S)
a�
    Convert to Timestamp if possible, otherwise to datetime.datetime.
    SAS float64 lacks precision for more than ms resolution so the fit
    to datetime.datetime is ok.

    Parameters
    ----------
    sas_datetimes : {Series, Sequence[float]}
       Dates or datetimes in SAS
    unit : {str}
       "d" if the floats represent dates, "s" for datetimes

    Returns
    -------
    Series
       Series of datetime64 dtype or datetime.datetime.
    z
1960-01-01)r�origin�scSstddd�t|d�S)Ni��)�seconds)rr)�	sas_float�r�8/tmp/pip-build-5_djhm0z/pandas/pandas/io/sas/sas7bdat.py�<lambda>7sz$_convert_datetimes.<locals>.<lambda>�dcSstddd�t|d�S)Ni�r)�days)rr)rrrrr;szunit must be 'd' or 's'N)�pdZto_datetimer�apply�
ValueError)r
rrrr�_convert_datetimes s

rc@seZdZdS)�_subheader_pointerN)�__name__�
__module__�__qualname__rrrrrAsrc@seZdZdS)�_columnN)rrrrrrrrEsrc@s�eZdZdZd>dd�Zdd�Zdd	�Zd
d�Zdd
�Zdd�Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zdd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd?d8d9�Zd:d;�Zd<d=�Z dS)@�SAS7BDATReadera!
    Read SAS files in SAS7BDAT format.

    Parameters
    ----------
    path_or_buf : path name or buffer
        Name of SAS file or file-like object pointing to SAS file
        contents.
    index : column identifier, defaults to None
        Column to use as index.
    convert_dates : boolean, defaults to True
        Attempt to convert dates to Pandas datetime values.  Note that
        some rarely used SAS date formats may be unsupported.
    blank_missing : boolean, defaults to True
        Convert empty strings to missing values (SAS uses blanks to
        indicate missing character variables).
    chunksize : int, defaults to None
        Return SAS7BDATReader object for iterations, returns chunks
        with given number of lines.
    encoding : string, defaults to None
        String encoding.
    convert_text : bool, defaults to True
        If False, text variables are left as raw bytes.
    convert_header_text : bool, defaults to True
        If False, header text, including column names, are left as raw
        bytes.
    NTc	
Cs�||_||_||_||_||_||_||_d|_d|_g|_	g|_
g|_g|_g|_
d|_g|_g|_g|_d|_d|_d|_t|�\|_}	}	}	t|jt�r�t|jd�|_|j|_|j�|j�dS)Nzlatin-1�r�rb)�index�
convert_dates�
blank_missing�	chunksize�encoding�convert_text�convert_header_text�default_encoding�compression�column_names_strings�column_names�column_formats�columns�%_current_page_data_subheader_pointers�_cached_page�_column_data_lengths�_column_data_offsets�
_column_types�_current_row_in_file_indexZ_current_row_on_page_indexr�_path_or_buf�
isinstance�str�open�handle�_get_properties�_parse_metadata)
�selfZpath_or_bufr#r$r%r&r'r(r)�_rrr�__init__gs6zSAS7BDATReader.__init__cCstj|jtjd�S)z5Return a numpy int64 array of the column data lengths)�dtype)�np�asarrayr2�int64)r=rrr�column_data_lengths�sz"SAS7BDATReader.column_data_lengthscCstj|jtjd�S)z0Return a numpy int64 array of the column offsets)r@)rArBr3rC)r=rrr�column_data_offsets�sz"SAS7BDATReader.column_data_offsetscCstj|jtjd�d�S)zj
        Returns a numpy character array of the column types:
           s (string) or d (double)
        ZS1)r@)rArBr4r@)r=rrr�column_types�szSAS7BDATReader.column_typescCs(y|jj�Wntk
r"YnXdS)N)r:�close�AttributeError)r=rrrrG�szSAS7BDATReader.closecCs
|jjd�|jjd�|_|jdttj��tjkrD|j�td��d\}}|j	tj
tj�}|tjkr�tj
}d|_d|_tj|_tj|_nd|_tj|_tj|_d|_|j	tjtj�}|tjkr�tj
}||}|j	tjtj�}|dkr�d	|_nd
|_|j	tjtj�d}|tjk�r"tj||_nd|�d�|_|j	tj tj!�}|d
k�rRd|_"n|dk�rdd|_"nd|_"|j	tj#tj$�}|j%d�|_&|j'�r�|j&j(|j)�p�|j*�|_&|j	tj+tj,�}|j%d�|_-|j'�r�|j-j(|j)�p�|j*�|_-t.ddd�}|j/tj0|tj1�}|t2j3|dd�|_4|j/tj5|tj6�}|t2j3|dd�|_7|j8tj9|tj:�|_;|jj|j;d�}|j|7_t|j�|j;k�r�|j�td��|j8tj<|tj=�|_>|j8tj?|tj@�|_A|j	tjB|tjC�}|j%d�|_D|j'�r|jDj(|j)�p�|j*�|_D|j	tjE|tjF�}|j%d�|_G|j'�rB|jGj(|j)�p<|j*�|_G|j	tjH|tjI�}|j%d�|_J|j'�r�|jJj(|j)�p||j*�|_J|j	tjK|tjL�}|j%d�}t|�dk�r�|j(|j)�p�|j*�|_Mn@|j	tjN|tjO�}|j%d�|_M|j'�r|jMj(|j)�p|j*�|_MdS)Nri z'magic number mismatch (not a SAS file?)T�F���<�>zunknown (code=�)�1�unix�2Zwindows�unknowns i�rr)rz*The SAS7BDAT file appears to be truncated.)rr)Pr6�seek�readr1�len�const�magicrGr�_read_bytesZalign_1_offsetZalign_1_lengthZu64_byte_checker_valueZ
align_2_value�U64�_int_lengthZpage_bit_offset_x64�_page_bit_offsetZsubheader_pointer_length_x64�_subheader_pointer_lengthZpage_bit_offset_x86Zsubheader_pointer_length_x86Zalign_2_offsetZalign_2_lengthZalign_1_checker_valueZendianness_offsetZendianness_length�
byte_orderZencoding_offsetZencoding_lengthZencoding_names�
file_encodingZplatform_offsetZplatform_length�platformZdataset_offsetZdataset_length�rstrip�namer)�decoder'r*Zfile_type_offsetZfile_type_length�	file_typer�_read_floatZdate_created_offsetZdate_created_lengthrZto_timedeltaZdate_createdZdate_modified_offsetZdate_modified_lengthZ
date_modified�	_read_intZheader_size_offsetZheader_size_length�
header_lengthZpage_size_offsetZpage_size_length�_page_lengthZpage_count_offsetZpage_count_lengthZ_page_countZsas_release_offsetZsas_release_lengthZsas_releaseZsas_server_type_offsetZsas_server_type_lengthZserver_typeZos_version_number_offsetZos_version_number_length�
os_versionZos_name_offsetZos_name_length�os_nameZos_maker_offsetZos_maker_length)r=Zalign1Zalign2�bufZtotal_align�epoch�xrrrr;�s�





zSAS7BDATReader._get_propertiescCs"|j|jpdd�}|dkrt�|S)Nr)�nrows)rTr&�
StopIteration)r=�darrr�__next__.szSAS7BDATReader.__next__cCsJ|dkr|j�td��|j||�}|dkr0dnd}tj|j||�dS)NrJrIzinvalid float width�frr)rJrI)rGrrX�struct�unpackr])r=�offset�widthrj�fdrrrrd5szSAS7BDATReader._read_floatcCsP|dkr|j�td��|j||�}dddd	d
�|}tj|j||�d}|S)
Nr�rJrIzinvalid int width�b�h�l�q)rrwrJrIr)rrwrJrI)rGrrXrrrsr])r=rtrurj�itZivrrrre>szSAS7BDATReader._read_intcCs�|jdkrX|jj|�|jj|�}t|�|krT|j�d|d�d|d�d�}t|��|S||t|j�krz|j�td��|j|||�SdS)NzUnable to read rz bytes from file position �.zThe cached page is too small.)r1r6rSrTrUrGr)r=rt�lengthrj�msgrrrrXGs
zSAS7BDATReader._read_bytescCsZd}xP|sT|jj|j�|_t|j�dkr*Pt|j�|jkrJ|j�td��|j�}qWdS)NFrz2Failed to read a meta data page from the SAS file.)r6rTrgr1rUrGr�_process_page_meta)r=�donerrrr<VszSAS7BDATReader._parse_metadatacCsV|j�tjtjgtj}|j|kr,|j�|jtj@}|jtjk}|pT|pT|jgkS)N)	�_read_page_headerrV�page_meta_typeZ
page_amd_type�page_mix_types�_current_page_type�_process_page_metadata�page_data_typer0)r=�pt�is_data_pageZis_mix_pagerrrr�as
z!SAS7BDATReader._process_page_metacCsX|j}tj|}|j|tj�|_tj|}|j|tj�|_tj	|}|j|tj
�|_dS)N)r[rVZpage_type_offsetreZpage_type_lengthr�Zblock_count_offsetZblock_count_lengthZ_current_page_block_countZsubheader_count_offsetZsubheader_count_length�_current_page_subheaders_count)r=�
bit_offsetZtxrrrr�ns


z SAS7BDATReader._read_page_headercCst|j}xht|j�D]Z}|jtj||�}|jdkr4q|jtjkrBq|j	|j
�}|j||j|j�}|j
||�qWdS)Nr)r[�ranger��_process_subheader_pointersrVZsubheader_pointers_offsetr~r+Ztruncated_subheader_id�_read_subheader_signaturert�_get_subheader_index�ptype�_process_subheader)r=r��i�pointer�subheader_signature�subheader_indexrrrr�ys
z%SAS7BDATReader._process_page_metadatacCs`tjj|�}|dkr\|tjkp$|dk}|tjk}|jdkrL|rL|rLtjj}n|j�t	d��|S)Nrr!zUnknown subheader signature)
rVZsubheader_signature_to_index�getZcompressed_subheader_idZcompressed_subheader_typer+�SASIndex�data_subheader_indexrGr)r=�	signaturer+r�r#�f1�f2rrrr��s

z#SAS7BDATReader._get_subheader_indexc
Cs�|j}|||}|j||j�}||j7}|j||j�}||j7}|j|d�}|d7}|j|d�}t�}	||	_||	_||	_||	_|	S)Nr)r\rerZrrtr~r+r�)
r=rtZsubheader_pointer_indexZsubheader_pointer_lengthZtotal_offsetZsubheader_offsetZsubheader_lengthZsubheader_compressionZsubheader_typerlrrrr��s

z*SAS7BDATReader._process_subheader_pointerscCs|j||j�}|S)N)rXrZ)r=rtr�rrrr��sz(SAS7BDATReader._read_subheader_signaturecCs�|j}|j}|tjjkr |j}n�|tjjkr4|j}n�|tjjkrH|j	}n�|tjj
kr\|j}nt|tjjkrp|j
}n`|tjjkr�|j}nL|tjjkr�|j}n8|tjjkr�|j}n$|tjjkr�|jj|�dStd��|||�dS)Nzunknown subheader index)rtr~rVr�Zrow_size_index�_process_rowsize_subheaderZcolumn_size_index�_process_columnsize_subheaderZcolumn_text_index�_process_columntext_subheaderZcolumn_name_index�_process_columnname_subheaderZcolumn_attributes_index�#_process_columnattributes_subheaderZformat_and_label_index�_process_format_subheaderZcolumn_list_index�_process_columnlist_subheaderZsubheader_counts_index�_process_subheader_countsr�r0�appendr)r=r�r�rtr~�	processorrrrr��s.z!SAS7BDATReader._process_subheadercCs�|j}|}|}|jr&|d7}|d7}n|d7}|d7}|j|tj||�|_|j|tj||�|_|j|tj||�|_	|j|tj
||�|_tj|}|j|||�|_
|j|d�|_|j|d�|_dS)Ni�i�ibizrw)rZrYrerVZrow_length_offset_multiplierZ
row_lengthZrow_count_offset_multiplier�	row_countZcol_count_p1_multiplier�col_count_p1Zcol_count_p2_multiplier�col_count_p2Z'row_count_on_mix_page_offset_multiplierZ_mix_page_row_count�_lcs�_lcp)r=rtr~�int_lenZ
lcs_offsetZ
lcp_offsetZmxrrrr��s(

z)SAS7BDATReader._process_rowsize_subheadercCsT|j}||7}|j||�|_|j|j|jkrPtd|j�d|j�d|j�d��dS)Nz Warning: column count mismatch (z + z != z)
)rZre�column_countr�r��print)r=rtr~r�rrrr��sz,SAS7BDATReader._process_columnsize_subheadercCsdS)Nr)r=rtr~rrrr��sz(SAS7BDATReader._process_subheader_countsc
Cs�||j7}|j|tj�}|j||�}|d|�jd�}|}|jrR|j|jpN|j	�}|j
j|�t|j
�dk�r�d}xtj
D]}||krz|}qzW||_||j8}|d}	|jr�|	d7}	|j|	|j�}|jd�}|dk�rd|_|d}	|jr�|	d7}	|j|	|j�}|d|j�|_n�|tjk�rV|d	}	|j�r6|	d7}	|j|	|j�}|d|j�|_nH|jdk�r�d|_|d}	|j�r�|	d7}	|j|	|j�}|d|j�|_|j�r�t|d
��r�|jj|j�p�|j	�|_dS)Nrs rr!�rJ�� �(�creator_proc)rZrerVZtext_block_size_lengthrXr`r)rbr'r*r,r�rUZcompression_literalsr+rYr�r�r�Zrle_compression�hasattr)
r=rtr~Ztext_block_sizerjZ	cname_raw�cnameZcompression_literalZclZoffset1rrrr��sX



z,SAS7BDATReader._process_columntext_subheaderc
Cs�|j}||7}|d|dd}x�t|�D]�}|tj|dtj}|tj|dtj}|tj|dtj}|j|tj�}	|j|tj	�}
|j|tj
�}|j|	}|jj
||
|
|��q,WdS)Nrw�rIr)rZr�rVZcolumn_name_pointer_lengthZ!column_name_text_subheader_offsetZcolumn_name_offset_offsetZcolumn_name_length_offsetreZ!column_name_text_subheader_lengthZcolumn_name_offset_lengthZcolumn_name_length_lengthr,r-r�)
r=rtr~r�Zcolumn_name_pointers_countr�Ztext_subheaderZcol_name_offsetZcol_name_length�idx�
col_offsetZcol_lenZname_strrrrr�-s


z,SAS7BDATReader._process_columnname_subheaderc
Cs�|j}|d|d|d}x�t|�D]�}||tj||d}|d|tj||d}|d|tj||d}|j||�}	|jj|	�|j|tj	�}	|j
j|	�|j|tj�}	|jj|	dkr�dnd�q(WdS)Nrwr�rIr�d�s)
rZr�rVZcolumn_data_offset_offsetZcolumn_data_length_offsetZcolumn_type_offsetrer3r�Zcolumn_data_length_lengthr2Zcolumn_type_lengthr4)
r=rtr~r�Zcolumn_attributes_vectors_countr�Zcol_data_offsetZcol_data_lenZ	col_typesrlrrrr�Msz2SAS7BDATReader._process_columnattributes_subheadercCsdS)Nr)r=rtr~rrrr�gsz,SAS7BDATReader._process_columnlist_subheadercCs�|j}|tjd|}|tjd|}|tjd|}|tjd|}|tjd|}|tjd|}	|j|tj	�}
t
|
t|j�d�}|j|tj
�}|j|tj�}
|j|tj�}t
|t|j�d�}|j|tj�}|j|	tj�}|j|}||||�}|j|}||||
�}t|j�}t�}||_|j||_||_||_|j||_|j||_|jj|�|jj|�dS)N�r)rZrVZ)column_format_text_subheader_index_offsetZcolumn_format_offset_offsetZcolumn_format_length_offsetZ(column_label_text_subheader_index_offsetZcolumn_label_offset_offsetZcolumn_label_length_offsetreZ)column_format_text_subheader_index_length�minrUr,Zcolumn_format_offset_lengthZcolumn_format_length_lengthZ(column_label_text_subheader_index_lengthZcolumn_label_offset_lengthZcolumn_label_length_lengthr/rZcol_idr-ra�label�formatr4�ctyper2r~r.r�)r=rtr~r�Ztext_subheader_formatZcol_format_offsetZcol_format_lenZtext_subheader_labelZcol_label_offsetZ
col_label_lenrlZ
format_idxZformat_startZ
format_lenZ	label_idxZlabel_startZ	label_lenZlabel_namesZcolumn_labelZformat_namesZ
column_formatZcurrent_column_number�colrrrr�ks@





z(SAS7BDATReader._process_format_subheadercCs�|dkr|jdk	r|j}n|dkr(|j}t|j�dkrF|j�td��|j|jkrVdS|j|j}||krn|}|jjd�}|jjd�}tj	||ft
d�|_tj|d|ftj
d�|_d|_t|�}|j|�|j�}|jdk	r�|j|j�}|S)NrzNo columns to parse from filer�r�)r@rI)r&r�rUr4rGrr5�countrA�empty�object�
_string_chunk�zerosZuint8�_byte_chunk�_current_row_in_chunk_indexrrT�_chunk_to_dataframer#Z	set_index)r=rm�mZnd�ns�p�rsltrrrrT�s.

zSAS7BDATReader.readcCs�g|_|jj|j�|_t|j�dkr(dSt|j�|jkrf|j�dt|j�d�d|jd�d�}t|��|j�|j	}|t
jkr�|j�|t
j
@}t
jgt
j}|r�|j	|kr�|j�SdS)NrTz-failed to read complete page from file (read rz of z bytes)F)r0r6rTrgr1rUrGrr�r�rVr�r�r�r��_read_next_page)r=rZ	page_typer�r�rrrr��s"

zSAS7BDATReader._read_next_pagec
Cs�|j}|j}t|||�}tj|d�}d
\}}�xft|j�D�]V}|j|}|j|dkr�|j|dd�fj	|j
dd�||<tj||tj
d�||<|jr�|j|tjkr�t||d�||<n"|j|tjkr�t||d�||<|d7}q<|j|dk�rx|j|dd�f||<|j�rD|jdk	�rD||jj|j�p<|j�||<|j�rn||jj�dk}	tj|j|	|f<|d7}q<|j�td	|j|����q<W|S)N)r#rr�r)r@rrr�zunknown column type )rr) r�r5r�rZ	DataFramer�r-r4r��viewr]rArBZfloat64r$r.rVZsas_date_formatsrZsas_datetime_formatsr�r(r'r8rbr*r%rU�nan�locrGr)
r=�nr��ixr�ZjsZjb�jra�iirrrr��s8
$


z"SAS7BDATReader._chunk_to_dataframe)NTTNNTT)N)!rrr�__doc__r?rDrErFrGr;rprdrerXr<r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�rTr�r�rrrrr JsJ
$		
4 0
"r )r��collectionsrrrrrZnumpyrAZ
pandas.errorsrrZpandasrZpandas.io.commonrZpandas.io.sas._sasrZpandas.io.sas.sas_constants�ioZsasZ
sas_constantsrVZpandas.io.sas.sasreaderr	ZSeriesr8rrr�Iteratorr rrrr�<module>s!