HOME


sh-3ll 1.0
DIR:/usr/local/lib64/python3.6/site-packages/pandas/io/sas/__pycache__/
Upload File :
Current File : //usr/local/lib64/python3.6/site-packages/pandas/io/sas/__pycache__/sas_xport.cpython-36.pyc
3

���ho8�@sdZddlmZddlmZddlZddlZddlZddlm	Z	ddl
Zddlm
Z
ddlmZdZd	Zd
ZdZdd
ddddddddddddddgZdZdZdZdZd e�d!e�d!e�d!e�d"�	Zd#e�d!e�d$�Zd%Zeed&�d'd(�Zed)�d*d+�Zd,d-�Zd.d/�Z Gd0d1�d1eej!�Z"dS)2z�
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/techsup/technote/ts140.pdf
�)�abc)�datetimeN)�Appender)�get_filepath_or_buffer)�
ReaderBasezPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  �ntypeZnhfun�field_lengthZnvar0�name�labelZnformZnflZnum_decimalsZnfjZnfillZniformZniflZnifdZnpos�_z�Parameters
----------
filepath_or_buffer : string or file-like object
    Path to SAS file or object implementing binary read method.z�index : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : string
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zEformat : string
    File format, only `xport` is currently supported.z_iterator : boolean, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.

�
a

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z�

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
z�Read observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
)�datestr�returncCs(ytj|d�Stk
r"tjSXdS)z3 Given a date in xport format, return Python date. z%d%b%y:%H:%M:%SN)r�strptime�
ValueError�pdZNaT)r
�r�9/tmp/pip-build-5_djhm0z/pandas/pandas/io/sas/sas_xport.py�_parse_date�sr)�scCsDi}d}x0|D](\}}||||�j�||<||7}qW|d=|S)a
    Parameters
    ----------
    s: str
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    rr)�strip)r�parts�out�startr	�lengthrrr�_split_line�s
rcCsT|dkrPtjt|�tjd��}tjd|�dd|���}|j|d�}||d<|S|S)N�ZS8�Sz,S)�dtype�f0)�np�zeros�lenr�view)�vec�nbytes�vec1rZvec2rrr�_handle_truncated_float_vec�s	r'c	Cstjd�}|j|d�}|d}|d}|d@}tjt|�tjd�}d|tj|d@�<d|tj|d	@�<d
|tj|d@�<||L}||?|d@d
d
|>B}|dM}||d?d@dd>|dd>|d@BO}tjt|�fdd�}||d<||d<|jdd�}|jd�}|S)zf
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    z>u4,>u4)rr�f1i����i �i@�i���l�����Ai��lz>f8Zf8)	r rr#r!r"Zuint8�where�emptyZastype)	r$rr&Zxport1Zxport2Zieee1�shiftZieee2Zieeerrr�_parse_float_vec�s(
		 
r5c@sleZdZeZddd�Zdd�Zdd�Zd	d
�Zdd�Z	e
d
�dd�Zddd�Zdd�Z
ee�ddd��ZdS)�XportReaderN�
ISO-8859-1cCsd||_d|_||_||_t|t�r6t||d�\}}}}t|ttf�rRt|d�|_	n||_	|j
�dS)Nr)�encoding�rb)�	_encoding�_lines_read�_index�
_chunksize�
isinstance�strr�bytes�open�filepath_or_buffer�_read_header)�selfrB�indexr8�	chunksize�compressionZshould_closerrr�__init__�s
zXportReader.__init__cCs|jj�dS)N)rB�close)rDrrrrIszXportReader.closecCs|jjd�j�S)N�P)rB�read�decode)rDrrr�_get_rowszXportReader._get_rowcCsd|jjd�|j�}|tkr,|j�td��|j�}ddgddgddgddgd	d
gg}t||�}|ddkr||j�td��t|d	�|d	<||_|j�}t|dd
��|d
<|j�}|j�}|j	t
�}|tk}	|o�|	s�|j�td��t|d&d'��}
ddgddgddgddgddgddgd	d
gg}t|j�|�}d
d
gdd
gddgddgg}|j
t|j�|��t|d
�|d
<t|d	�|d	<||_ddd�}
t|j�dd��}|
|}|d�r�|d|d7}|jj|�}g}d}�xt|�|
k�r�|d|
�||
d�}}|jd�}tjd|�}ttt|��}|d=|
|d|d<|d}|ddk�r�|dk�sn|dk�r�|j�d |�d!�}t|��x>|j�D]2\}}y|j�||<Wntk
�r�YnX�q�W||d7}||g7}�q�W|j�}|tk�s|j�td"��||_||_|jj�|_|j �|_!d#d$�|jD�|_"d%d$�t#|j�D�}t$j%|�}||_&dS)(Nrz#Header record is not an XPORT file.�prefixr.�versionrZOSr�created�zSAS     SAS     SASLIBz!Header record has invalid prefix.�modifiedzMember header not found�r*�set_nameZsasdatar
�(�type�numeric�char)r)r*�6�:rJ�z>hhhh8s40s8shhh2s8shhl52srrzFloating field width z is not between 2 and 8.zObservation header not found.cSsg|]}|dj��qS)r	)rL)�.0�xrrr�
<listcomp>ysz,XportReader._read_header.<locals>.<listcomp>cSs,g|]$\}}dt|�dt|d�f�qS)rrr)r?)r\�i�fieldrrrr^}s������)'rB�seekrM�_correct_line1rIrrr�	file_info�
startswith�_correct_header1�_correct_header2�int�update�member_inforKr"�ljust�struct�unpack�dict�zip�
_fieldkeys�	TypeError�itemsr�AttributeError�_correct_obs_header�fields�
record_length�tell�record_start�
_record_count�nobs�columns�	enumerater r�_dtype)rDZline1Zline2ZfifreZline3Zheader1Zheader2Z	headflag1Z	headflag2ZfieldnamelengthZmemrk�typesZ
fieldcountZ
datalengthZ	fielddatarvZ
obs_lengthr`Zfieldstruct�fl�msg�k�v�headerZdtypelrrrrrCs�"






"


zXportReader._read_headercCs|j|jpdd�S)Nr))�nrows)rKr=)rDrrr�__next__�szXportReader.__next__)rcCs�|jjdd�|jj�|j}|ddkr4tjd�|jdkrV|jj|j�||jS|jjdd�|jjd�}tj	|tj
d�}tj|dk�}t|�dkr�d}ndt|�}|jj|j�|||jS)	z�
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        rr*rJzxport file may be corrupted)rl  @@�ri����)
rBrcrxry�warnings�warnrwrKr �
frombufferZuint64Zflatnonzeror")rDZtotal_records_lengthZ	last_card�ixZtail_padrrrrz�s 	


zXportReader._record_countcCs|dkr|j}|j|d�S)a
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        N)r�)r=rK)rD�sizerrr�	get_chunk�s
zXportReader.get_chunkcCsl|jdd�}|ddk|ddk@|ddk@}|ddk|dd	k@|dd
kB|ddkB}||M}|S)Nzu1,u1,u2,u4)rr(r�f2Zf3rr0�Z�_�.)r#)rDr$r��missZmiss1rrr�_missing_double�s
$0zXportReader._missing_doublec
sd|dkr�j}t|�j�j�}|�j}|dkr>�j�t��jj|�}tj	|�j
|d�}tjt
|�d�}x�t�j�D]�\}}|dt|�}	�j|d}
|
dkr�t|	�j|d�}	�j|	�}t|	�}tj||<n@�j|ddk�rd	d
�|	D�}�jdk	�r�fdd
�|D�}|||<qxW�jdk�rFt
�j�j|�|_n|j�j�}�j|7_|S)Nr)r�count)rErrrWrrXcSsg|]}|j��qSr)�rstrip)r\�yrrrr^�sz$XportReader.read.<locals>.<listcomp>csg|]}|j�j��qSr)rLr:)r\r�)rDrrr^�s)r{�minr;rwrI�
StopIterationrBrKr r�r~rZ	DataFrame�ranger}r|r?rvr'r�r5�nanr:r<rEZ	set_index)
rDr��
read_lines�read_len�raw�dataZdf�jr]r$rr�r�r)rDrrK�s8

zXportReader.read)Nr7N)N)N)�__name__�
__module__�__qualname__�_xport_reader_doc�__doc__rHrIrMrCr�rirzr�r�r�_read_method_docrKrrrrr6�s
m#
r6)#r��collectionsrrrmr�Znumpyr Zpandas.util._decoratorsrZpandasrZpandas.io.commonrZpandas.io.sas.sasreaderrrdrgrhrurqZ_base_params_docZ_params2_docZ_format_params_docZ
_iterator_docZ
_read_sas_docr�r�r?rrr'r5�Iteratorr6rrrr�<module>	sR	9