HOME


sh-3ll 1.0
DIR:/usr/local/lib64/python3.6/site-packages/pandas/tests/arrays/categorical/
Upload File :
Current File : //usr/local/lib64/python3.6/site-packages/pandas/tests/arrays/categorical/test_dtypes.py
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

from pandas import Categorical, CategoricalIndex, Index, Series, Timestamp
import pandas._testing as tm


class TestCategoricalDtypes:
    def test_is_equal_dtype(self):

        # test dtype comparisons between cats

        c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
        c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
        c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
        assert c1.is_dtype_equal(c1)
        assert c2.is_dtype_equal(c2)
        assert c3.is_dtype_equal(c3)
        assert c1.is_dtype_equal(c2)
        assert not c1.is_dtype_equal(c3)
        assert not c1.is_dtype_equal(Index(list("aabca")))
        assert not c1.is_dtype_equal(c1.astype(object))
        assert c1.is_dtype_equal(CategoricalIndex(c1))
        assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab")))
        assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))

        # GH 16659
        s1 = Series(c1)
        s2 = Series(c2)
        s3 = Series(c3)
        assert c1.is_dtype_equal(s1)
        assert c2.is_dtype_equal(s2)
        assert c3.is_dtype_equal(s3)
        assert c1.is_dtype_equal(s2)
        assert not c1.is_dtype_equal(s3)
        assert not c1.is_dtype_equal(s1.astype(object))

    def test_set_dtype_same(self):
        c = Categorical(["a", "b", "c"])
        result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
        tm.assert_categorical_equal(result, c)

    def test_set_dtype_new_categories(self):
        c = Categorical(["a", "b", "c"])
        result = c._set_dtype(CategoricalDtype(list("abcd")))
        tm.assert_numpy_array_equal(result.codes, c.codes)
        tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))

    @pytest.mark.parametrize(
        "values, categories, new_categories",
        [
            # No NaNs, same cats, same order
            (["a", "b", "a"], ["a", "b"], ["a", "b"]),
            # No NaNs, same cats, different order
            (["a", "b", "a"], ["a", "b"], ["b", "a"]),
            # Same, unsorted
            (["b", "a", "a"], ["a", "b"], ["a", "b"]),
            # No NaNs, same cats, different order
            (["b", "a", "a"], ["a", "b"], ["b", "a"]),
            # NaNs
            (["a", "b", "c"], ["a", "b"], ["a", "b"]),
            (["a", "b", "c"], ["a", "b"], ["b", "a"]),
            (["b", "a", "c"], ["a", "b"], ["a", "b"]),
            (["b", "a", "c"], ["a", "b"], ["a", "b"]),
            # Introduce NaNs
            (["a", "b", "c"], ["a", "b"], ["a"]),
            (["a", "b", "c"], ["a", "b"], ["b"]),
            (["b", "a", "c"], ["a", "b"], ["a"]),
            (["b", "a", "c"], ["a", "b"], ["a"]),
            # No overlap
            (["a", "b", "c"], ["a", "b"], ["d", "e"]),
        ],
    )
    @pytest.mark.parametrize("ordered", [True, False])
    def test_set_dtype_many(self, values, categories, new_categories, ordered):
        c = Categorical(values, categories)
        expected = Categorical(values, new_categories, ordered)
        result = c._set_dtype(expected.dtype)
        tm.assert_categorical_equal(result, expected)

    def test_set_dtype_no_overlap(self):
        c = Categorical(["a", "b", "c"], ["d", "e"])
        result = c._set_dtype(CategoricalDtype(["a", "b"]))
        expected = Categorical([None, None, None], categories=["a", "b"])
        tm.assert_categorical_equal(result, expected)

    def test_codes_dtypes(self):

        # GH 8453
        result = Categorical(["foo", "bar", "baz"])
        assert result.codes.dtype == "int8"

        result = Categorical([f"foo{i:05d}" for i in range(400)])
        assert result.codes.dtype == "int16"

        result = Categorical([f"foo{i:05d}" for i in range(40000)])
        assert result.codes.dtype == "int32"

        # adding cats
        result = Categorical(["foo", "bar", "baz"])
        assert result.codes.dtype == "int8"
        result = result.add_categories([f"foo{i:05d}" for i in range(400)])
        assert result.codes.dtype == "int16"

        # removing cats
        result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
        assert result.codes.dtype == "int8"

    @pytest.mark.parametrize("ordered", [True, False])
    def test_astype(self, ordered):
        # string
        cat = Categorical(list("abbaaccc"), ordered=ordered)
        result = cat.astype(object)
        expected = np.array(cat)
        tm.assert_numpy_array_equal(result, expected)

        msg = "could not convert string to float"
        with pytest.raises(ValueError, match=msg):
            cat.astype(float)

        # numeric
        cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
        result = cat.astype(object)
        expected = np.array(cat, dtype=object)
        tm.assert_numpy_array_equal(result, expected)

        result = cat.astype(int)
        expected = np.array(cat, dtype=int)
        tm.assert_numpy_array_equal(result, expected)

        result = cat.astype(float)
        expected = np.array(cat, dtype=float)
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("dtype_ordered", [True, False])
    @pytest.mark.parametrize("cat_ordered", [True, False])
    def test_astype_category(self, dtype_ordered, cat_ordered):
        # GH 10696/18593
        data = list("abcaacbab")
        cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)

        # standard categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
        tm.assert_categorical_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(list("adc"), dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, dtype=dtype)
        tm.assert_categorical_equal(result, expected)

        if dtype_ordered is False:
            # dtype='category' can't specify ordered, so only test once
            result = cat.astype("category")
            expected = cat
            tm.assert_categorical_equal(result, expected)

    def test_iter_python_types(self):
        # GH-19909
        cat = Categorical([1, 2])
        assert isinstance(list(cat)[0], int)
        assert isinstance(cat.tolist()[0], int)

    def test_iter_python_types_datetime(self):
        cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")])
        assert isinstance(list(cat)[0], Timestamp)
        assert isinstance(cat.tolist()[0], Timestamp)