HOME

sh-3ll 1.0
DIR:/opt/cloudlinux/venv/lib/python3.11/site-packages/xarray/core/
Current File : //opt/cloudlinux/venv/lib/python3.11/site-packages/xarray/core/indexing.py
from datetime import timedelta
import numpy as np
import pandas as pd

from . import utils
from .pycompat import iteritems, range, dask_array_type, suppress
from .utils import is_full_slice


def expanded_indexer(key, ndim):
    """Given a key for indexing an ndarray, return an equivalent key which is a
    tuple with length equal to the number of dimensions.

    The expansion is done by replacing all `Ellipsis` items with the right
    number of full slices and then padding the key with full slices so that it
    reaches the appropriate dimensionality.
    """
    if not isinstance(key, tuple):
        # numpy treats non-tuple keys equivalent to tuples of length 1
        key = (key,)
    new_key = []
    # handling Ellipsis right is a little tricky, see:
    # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
    found_ellipsis = False
    for k in key:
        if k is Ellipsis:
            if not found_ellipsis:
                new_key.extend((ndim + 1 - len(key)) * [slice(None)])
                found_ellipsis = True
            else:
                new_key.append(slice(None))
        else:
            new_key.append(k)
    if len(new_key) > ndim:
        raise IndexError('too many indices')
    new_key.extend((ndim - len(new_key)) * [slice(None)])
    return tuple(new_key)


def canonicalize_indexer(key, ndim):
    """Given an indexer for orthogonal array indexing, return an indexer that
    is a tuple composed entirely of slices, integer ndarrays and native python
    ints.
    """
    def canonicalize(indexer):
        if not isinstance(indexer, slice):
            indexer = np.asarray(indexer)
            if indexer.ndim == 0:
                indexer = int(np.asscalar(indexer))
            else:
                if indexer.ndim != 1:
                    raise ValueError('orthogonal array indexing only supports '
                                     '1d arrays')
                if indexer.dtype.kind == 'b':
                    indexer, = np.nonzero(indexer)
                elif indexer.dtype.kind != 'i':
                    raise ValueError('invalid subkey %r for integer based '
                                     'array indexing; all subkeys must be '
                                     'slices, integers or sequences of '
                                     'integers or Booleans' % indexer)
        return indexer

    return tuple(canonicalize(k) for k in expanded_indexer(key, ndim))


def _expand_slice(slice_, size):
    return np.arange(*slice_.indices(size))


def orthogonal_indexer(key, shape):
    """Given a key for orthogonal array indexing, returns an equivalent key
    suitable for indexing a numpy.ndarray with fancy indexing.
    """
    # replace Ellipsis objects with slices
    key = list(canonicalize_indexer(key, len(shape)))
    # replace 1d arrays and slices with broadcast compatible arrays
    # note: we treat integers separately (instead of turning them into 1d
    # arrays) because integers (and only integers) collapse axes when used with
    # __getitem__
    non_int_keys = [n for n, k in enumerate(key) if not isinstance(k, (int, np.integer))]

    def full_slices_unselected(n_list):
        def all_full_slices(key_index):
            return all(is_full_slice(key[n]) for n in key_index)
        if not n_list:
            return n_list
        elif all_full_slices(range(n_list[0] + 1)):
            return full_slices_unselected(n_list[1:])
        elif all_full_slices(range(n_list[-1], len(key))):
            return full_slices_unselected(n_list[:-1])
        else:
            return n_list

    # However, testing suggests it is OK to keep contiguous sequences of full
    # slices at the start or the end of the key. Keeping slices around (when
    # possible) instead of converting slices to arrays significantly speeds up
    # indexing.
    # (Honestly, I don't understand when it's not OK to keep slices even in
    # between integer indices if as array is somewhere in the key, but such are
    # the admittedly mind-boggling ways of numpy's advanced indexing.)
    array_keys = full_slices_unselected(non_int_keys)

    def maybe_expand_slice(k, length):
        return _expand_slice(k, length) if isinstance(k, slice) else k

    array_indexers = np.ix_(*(maybe_expand_slice(key[n], shape[n])
                              for n in array_keys))
    for i, n in enumerate(array_keys):
        key[n] = array_indexers[i]
    return tuple(key)


def _try_get_item(x):
    try:
        return x.item()
    except AttributeError:
        return x


def _asarray_tuplesafe(values):
    """
    Convert values into a numpy array of at most 1-dimension, while preserving
    tuples.

    Adapted from pandas.core.common._asarray_tuplesafe
    """
    if isinstance(values, tuple):
        result = utils.tuple_to_0darray(values)
    else:
        result = np.asarray(values)
        if result.ndim == 2:
            result = np.empty(len(values), dtype=object)
            result[:] = values

    return result


def convert_label_indexer(index, label, index_name='', method=None,
                          tolerance=None):
    """Given a pandas.Index and labels (e.g., from __getitem__) for one
    dimension, return an indexer suitable for indexing an ndarray along that
    dimension
    """
    # backwards compatibility for pandas<0.16 (method) or pandas<0.17
    # (tolerance)
    kwargs = {}
    if method is not None:
        kwargs['method'] = method
    if tolerance is not None:
        if pd.__version__ < '0.17':
            raise NotImplementedError(
                'the tolerance argument requires pandas v0.17 or newer')
        kwargs['tolerance'] = tolerance

    if isinstance(label, slice):
        if method is not None or tolerance is not None:
            raise NotImplementedError(
                'cannot use ``method`` argument if any indexers are '
                'slice objects')
        indexer = index.slice_indexer(_try_get_item(label.start),
                                      _try_get_item(label.stop),
                                      _try_get_item(label.step))
        if not isinstance(indexer, slice):
            # unlike pandas, in xarray we never want to silently convert a slice
            # indexer into an array indexer
            raise KeyError('cannot represent labeled-based slice indexer for '
                           'dimension %r with a slice over integer positions; '
                           'the index is unsorted or non-unique')
    else:
        label = _asarray_tuplesafe(label)
        if label.ndim == 0:
            indexer = index.get_loc(label.item(), **kwargs)
        elif label.dtype.kind == 'b':
            indexer, = np.nonzero(label)
        else:
            indexer = index.get_indexer(label, **kwargs)
            if np.any(indexer < 0):
                raise KeyError('not all values found in index %r'
                               % index_name)
    return indexer


def remap_label_indexers(data_obj, indexers, method=None, tolerance=None):
    """Given an xarray data object and label based indexers, return a mapping
    of equivalent location based indexers.
    """
    if method is not None and not isinstance(method, str):
        raise TypeError('``method`` must be a string')
    return dict((dim, convert_label_indexer(data_obj[dim].to_index(), label,
                                            dim, method, tolerance))
                for dim, label in iteritems(indexers))


def slice_slice(old_slice, applied_slice, size):
    """Given a slice and the size of the dimension to which it will be applied,
    index it with another slice to return a new slice equivalent to applying
    the slices sequentially
    """
    step = (old_slice.step or 1) * (applied_slice.step or 1)

    # For now, use the hack of turning old_slice into an ndarray to reconstruct
    # the slice start and stop. This is not entirely ideal, but it is still
    # definitely better than leaving the indexer as an array.
    items = _expand_slice(old_slice, size)[applied_slice]
    if len(items) > 0:
        start = items[0]
        stop = items[-1] + step
        if stop < 0:
            stop = None
    else:
        start = 0
        stop = 0
    return slice(start, stop, step)


def _index_indexer_1d(old_indexer, applied_indexer, size):
    assert isinstance(applied_indexer, (int, np.integer, slice, np.ndarray))
    if isinstance(applied_indexer, slice) and applied_indexer == slice(None):
        # shortcut for the usual case
        return old_indexer
    if isinstance(old_indexer, slice):
        if isinstance(applied_indexer, slice):
            indexer = slice_slice(old_indexer, applied_indexer, size)
        else:
            indexer = _expand_slice(old_indexer, size)[applied_indexer]
    else:
        indexer = old_indexer[applied_indexer]
    return indexer


class LazyIntegerRange(utils.NDArrayMixin):

    def __init__(self, *args, **kwdargs):
        """
        Parameters
        ----------
        See np.arange
        """
        self.args = args
        self.kwdargs = kwdargs
        assert 'dtype' not in self.kwdargs
        # range will fail if any arguments are not integers
        self.array = range(*args, **kwdargs)

    @property
    def shape(self):
        return (len(self.array),)

    @property
    def dtype(self):
        return np.dtype('int64')

    @property
    def ndim(self):
        return 1

    @property
    def size(self):
        return len(self.array)

    def __getitem__(self, key):
        return np.array(self)[key]

    def __array__(self, dtype=None):
        return np.arange(*self.args, **self.kwdargs)

    def __repr__(self):
        return ('%s(array=%r)' %
                (type(self).__name__, self.array))


class LazilyIndexedArray(utils.NDArrayMixin):
    """Wrap an array that handles orthogonal indexing to make indexing lazy
    """
    def __init__(self, array, key=None):
        """
        Parameters
        ----------
        array : array_like
            Array like object to index.
        key : tuple, optional
            Array indexer. If provided, it is assumed to already be in
            canonical expanded form.
        """
        if key is None:
            key = (slice(None),) * array.ndim
        self.array = array
        self.key = key

    def _updated_key(self, new_key):
        new_key = iter(canonicalize_indexer(new_key, self.ndim))
        key = []
        for size, k in zip(self.array.shape, self.key):
            if isinstance(k, (int, np.integer)):
                key.append(k)
            else:
                key.append(_index_indexer_1d(k, next(new_key), size))
        return tuple(key)

    @property
    def shape(self):
        shape = []
        for size, k in zip(self.array.shape, self.key):
            if isinstance(k, slice):
                shape.append(len(range(*k.indices(size))))
            elif isinstance(k, np.ndarray):
                shape.append(k.size)
        return tuple(shape)

    def __array__(self, dtype=None):
        array = orthogonally_indexable(self.array)
        return np.asarray(array[self.key], dtype=None)

    def __getitem__(self, key):
        return type(self)(self.array, self._updated_key(key))

    def __setitem__(self, key, value):
        key = self._updated_key(key)
        self.array[key] = value

    def __repr__(self):
        return ('%s(array=%r, key=%r)' %
                (type(self).__name__, self.array, self.key))


def orthogonally_indexable(array):
    if isinstance(array, np.ndarray):
        return NumpyIndexingAdapter(array)
    if isinstance(array, pd.Index):
        return PandasIndexAdapter(array)
    if isinstance(array, dask_array_type):
        return DaskIndexingAdapter(array)
    return array


class NumpyIndexingAdapter(utils.NDArrayMixin):
    """Wrap a NumPy array to use orthogonal indexing (array indexing
    accesses different dimensions independently, like netCDF4-python variables)
    """
    # note: this object is somewhat similar to biggus.NumpyArrayAdapter in that
    # it implements orthogonal indexing, except it casts to a numpy array,
    # isn't lazy and supports writing values.
    def __init__(self, array):
        self.array = np.asarray(array)

    def __array__(self, dtype=None):
        return np.asarray(self.array, dtype=dtype)

    def _convert_key(self, key):
        key = expanded_indexer(key, self.ndim)
        if any(not isinstance(k, (int, np.integer, slice)) for k in key):
            # key would trigger fancy indexing
            key = orthogonal_indexer(key, self.shape)
        return key

    def __getitem__(self, key):
        key = self._convert_key(key)
        return self.array[key]

    def __setitem__(self, key, value):
        key = self._convert_key(key)
        self.array[key] = value


class DaskIndexingAdapter(utils.NDArrayMixin):
    """Wrap a dask array to support orthogonal indexing
    """
    def __init__(self, array):
        self.array = array

    def __getitem__(self, key):
        key = expanded_indexer(key, self.ndim)
        if any(not isinstance(k, (int, np.integer, slice)) for k in key):
            value = self.array
            for axis, subkey in reversed(list(enumerate(key))):
                value = value[(slice(None),) * axis + (subkey,)]
        else:
            value = self.array[key]
        return value


class PandasIndexAdapter(utils.NDArrayMixin):
    """Wrap a pandas.Index to be better about preserving dtypes and to handle
    indexing by length 1 tuples like numpy
    """
    def __init__(self, array, dtype=None):
        self.array = utils.safe_cast_to_index(array)
        if dtype is None:
            if isinstance(array, pd.PeriodIndex):
                dtype = np.dtype('O')
            elif hasattr(array, 'categories'):
                # category isn't a real numpy dtype
                dtype = array.categories.dtype
            elif not utils.is_valid_numpy_dtype(array.dtype):
                dtype = np.dtype('O')
            else:
                dtype = array.dtype
        self._dtype = dtype

    @property
    def dtype(self):
        return self._dtype

    def __array__(self, dtype=None):
        if dtype is None:
            dtype = self.dtype
        array = self.array
        if isinstance(array, pd.PeriodIndex):
            with suppress(AttributeError):
                # this might not be public API
                array = array.asobject
        return np.asarray(array.values, dtype=dtype)

    @property
    def shape(self):
        # .shape is broken on pandas prior to v0.15.2
        return (len(self.array),)

    def __getitem__(self, key):
        if isinstance(key, tuple) and len(key) == 1:
            # unpack key so it can index a pandas.Index object (pandas.Index
            # objects don't like tuples)
            key, = key

        result = self.array[key]

        if isinstance(result, pd.Index):
            result = PandasIndexAdapter(result, dtype=self.dtype)
        elif result is pd.NaT:
            # work around the impossibility of casting NaT with asarray
            # note: it probably would be better in general to return
            # pd.Timestamp rather np.than datetime64 but this is easier
            # (for now)
            result = np.datetime64('NaT', 'ns')
        elif isinstance(result, timedelta):
            result = np.timedelta64(getattr(result, 'value', result), 'ns')
        elif self.dtype != object:
            result = np.asarray(result, dtype=self.dtype)

        return result

    def __repr__(self):
        return ('%s(array=%r, dtype=%r)'
                % (type(self).__name__, self.array, self.dtype))