Initial commit: 首次建仓,建立目录结构

This commit is contained in:
FXY
2026-06-11 23:49:54 +08:00
commit 4038a476b5
9396 changed files with 2372905 additions and 0 deletions

View File

@ -0,0 +1,158 @@
"""
Assertion helpers for arithmetic tests.
"""
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
Series,
array,
)
import pandas._testing as tm
from pandas.core.arrays import (
BooleanArray,
NumpyExtensionArray,
)
def assert_cannot_add(left, right, msg="cannot add"):
"""
Helper function to assert that two objects cannot be added.
Parameters
----------
left : object
The first operand.
right : object
The second operand.
msg : str, default "cannot add"
The error message expected in the TypeError.
"""
with pytest.raises(TypeError, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
right + left
def assert_invalid_addsub_type(left, right, msg=None):
"""
Helper function to assert that two objects can
neither be added nor subtracted.
Parameters
----------
left : object
The first operand.
right : object
The second operand.
msg : str or None, default None
The error message expected in the TypeError.
"""
with pytest.raises(TypeError, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
right + left
with pytest.raises(TypeError, match=msg):
left - right
with pytest.raises(TypeError, match=msg):
right - left
def get_upcast_box(left, right, is_cmp: bool = False):
"""
Get the box to use for 'expected' in an arithmetic or comparison operation.
Parameters
left : Any
right : Any
is_cmp : bool, default False
Whether the operation is a comparison method.
"""
if isinstance(left, DataFrame) or isinstance(right, DataFrame):
return DataFrame
if isinstance(left, Series) or isinstance(right, Series):
if is_cmp and isinstance(left, Index):
# Index does not defer for comparisons
return np.array
return Series
if isinstance(left, Index) or isinstance(right, Index):
if is_cmp:
return np.array
return Index
return tm.to_array
def assert_invalid_comparison(left, right, box):
"""
Assert that comparison operations with mismatched types behave correctly.
Parameters
----------
left : np.ndarray, ExtensionArray, Index, or Series
right : object
box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
"""
# Not for tznaive-tzaware comparison
# Note: not quite the same as how we do this for tm.box_expected
xbox = box if box not in [Index, array] else np.array
def xbox2(x):
# Eventually we'd like this to be tighter, but for now we'll
# just exclude NumpyExtensionArray[bool]
if isinstance(x, NumpyExtensionArray):
return x._ndarray
if isinstance(x, BooleanArray):
# NB: we are assuming no pd.NAs for now
return x.astype(bool)
return x
result = xbox2(left == right)
expected = xbox(np.zeros(result.shape, dtype=np.bool_))
tm.assert_equal(result, expected)
result = xbox2(right == left)
tm.assert_equal(result, xbox(expected))
result = xbox2(left != right)
tm.assert_equal(result, ~expected)
result = xbox2(right != left)
tm.assert_equal(result, xbox(~expected))
msg = "|".join(
[
"Invalid comparison between",
"Cannot compare type",
"not supported between",
"invalid type promotion",
(
# GH#36706 npdev 1.20.0 2020-09-28
r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
"For example they cannot be stored in a single array unless the "
"dtype is `object`."
),
]
)
with pytest.raises(TypeError, match=msg):
left < right
with pytest.raises(TypeError, match=msg):
left <= right
with pytest.raises(TypeError, match=msg):
left > right
with pytest.raises(TypeError, match=msg):
left >= right
with pytest.raises(TypeError, match=msg):
right < left
with pytest.raises(TypeError, match=msg):
right <= left
with pytest.raises(TypeError, match=msg):
right > left
with pytest.raises(TypeError, match=msg):
right >= left

View File

@ -0,0 +1,139 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
"""
Several variants of integer value 1. The zero-dim integer array
behaves like an integer.
This fixture can be used to check that datetimelike indexes handle
addition and subtraction of integers and zero-dimensional arrays
of integers.
Examples
--------
dti = pd.date_range('2016-01-01', periods=2, freq='h')
dti
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'],
dtype='datetime64[ns]', freq='h')
dti + one
DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'],
dtype='datetime64[ns]', freq='h')
"""
return request.param
zeros = [
box_cls([0] * 5, dtype=dtype)
for box_cls in [Index, np.array, pd.array]
for dtype in [np.int64, np.uint64, np.float64]
]
zeros.extend([box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [Index, np.array]])
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([np.array(-0.0, dtype=np.float64)])
zeros.extend([0, 0.0, -0.0])
@pytest.fixture(params=zeros)
def zero(request):
"""
Several types of scalar zeros and length 5 vectors of zeros.
This fixture can be used to check that numeric-dtype indexes handle
division by any zero numeric-dtype.
Uses vector of length 5 for broadcasting with `numeric_idx` fixture,
which creates numeric-dtype vectors also of length 5.
Examples
--------
arr = RangeIndex(5)
arr / zeros
Index([nan, inf, inf, inf, inf], dtype='float64')
"""
return request.param
# ------------------------------------------------------------------
# Scalar Fixtures
@pytest.fixture(
params=[
pd.Timedelta("10m7s").to_pytimedelta(),
pd.Timedelta("10m7s"),
pd.Timedelta("10m7s").to_timedelta64(),
],
ids=lambda x: type(x).__name__,
)
def scalar_td(request):
"""
Several variants of Timedelta scalars representing 10 minutes and 7 seconds.
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Day(3),
pd.offsets.Hour(72),
pd.Timedelta(days=3).to_pytimedelta(),
pd.Timedelta("72:00:00"),
np.timedelta64(3, "D"),
np.timedelta64(72, "h"),
],
ids=lambda x: type(x).__name__,
)
def three_days(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 3-day timedelta
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Hour(2),
pd.offsets.Minute(120),
pd.Timedelta(hours=2).to_pytimedelta(),
pd.Timedelta(seconds=2 * 3600),
np.timedelta64(2, "h"),
np.timedelta64(120, "m"),
],
ids=lambda x: type(x).__name__,
)
def two_hours(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 2-hour timedelta
"""
return request.param
_common_mismatch = [
pd.offsets.YearBegin(2),
pd.offsets.MonthBegin(1),
pd.offsets.Minute(),
]
@pytest.fixture(
params=[
np.timedelta64(4, "h"),
pd.Timedelta(hours=23).to_pytimedelta(),
pd.Timedelta("23:00:00"),
*_common_mismatch,
]
)
def not_daily(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Daily frequencies.
"""
return request.param

View File

@ -0,0 +1,78 @@
import operator
import numpy as np
import pytest
from pandas.core.dtypes.missing import isna
import pandas._testing as tm
from pandas.core.ops.array_ops import (
comparison_op,
na_logical_op,
)
def test_na_logical_op_2d():
left = np.arange(8).reshape(4, 2)
right = left.astype(object)
right[0, 0] = np.nan
# Check that we fall back to the vec_binop branch
with pytest.raises(TypeError, match="unsupported operand type"):
operator.or_(left, right)
result = na_logical_op(left, right, operator.or_)
expected = right
tm.assert_numpy_array_equal(result, expected)
def test_object_comparison_2d():
left = np.arange(9).reshape(3, 3).astype(object)
right = left.T
result = comparison_op(left, right, operator.eq)
expected = np.eye(3).astype(bool)
tm.assert_numpy_array_equal(result, expected)
# Ensure that cython doesn't raise on non-writeable arg, which
# we can get from np.broadcast_to
right.flags.writeable = False
result = comparison_op(left, right, operator.ne)
tm.assert_numpy_array_equal(result, ~expected)
@pytest.mark.parametrize("rvalues", [1, [1, 1, 1], np.nan, None])
@pytest.mark.parametrize(
"op", [operator.eq, operator.ne, operator.lt, operator.le, operator.gt, operator.ge]
)
def test_comparison_for_subclasses(rvalues, op):
# GH#63205 Ensure subclasses of ndarray are correctly handled in comparison_op
# Define a custom ndarray subclass
class TestArray(np.ndarray):
def __new__(cls, input_array):
return np.asarray(input_array).view(cls)
def __array_finalize__(self, obj) -> None:
self._is_test_array = True
def expected_with_na_handling(lvalues, rvalues, op):
# Similar to comparison_op, handle zerodim arrays with na value separately
if (rvalues.ndim == 0) and isna(rvalues.item()):
# numpy does not like comparisons vs None
if op is operator.ne:
return np.ones(lvalues.shape, dtype=bool)
else:
return np.zeros(lvalues.shape, dtype=bool)
return op(lvalues, rvalues)
# Define test data
lvalues = [1, 2, 3]
# Test with both ndarray and TestArray
result = comparison_op(np.array(lvalues), np.array(rvalues), op)
expected = expected_with_na_handling(np.array(lvalues), np.array(rvalues), op)
tm.assert_numpy_array_equal(result, expected)
result = comparison_op(TestArray(lvalues), TestArray(rvalues), op)
expected = expected_with_na_handling(TestArray(lvalues), TestArray(rvalues), op)
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,28 @@
import pytest
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
def test_divmod_bool_raises(box_with_array):
# GH#46043 // raises, so divmod should too
ser = Series([True, False])
obj = tm.box_expected(ser, box_with_array)
msg = "operator 'floordiv' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
obj // obj
if box_with_array is DataFrame:
msg = "operator 'floordiv' not implemented for bool dtypes"
else:
msg = "operator 'divmod' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
divmod(obj, obj)
# go through __rdivmod__
with pytest.raises(NotImplementedError, match=msg):
divmod(True, obj)

View File

@ -0,0 +1,25 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
class TestCategoricalComparisons:
def test_categorical_nan_equality(self):
cat = Series(Categorical(["a", "b", "c", np.nan]))
expected = Series([True, True, True, False])
result = cat == cat
tm.assert_series_equal(result, expected)
def test_categorical_tuple_equality(self):
# GH 18050
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
expected = Series([True, False, True, False, False])
result = ser == (0, 0)
tm.assert_series_equal(result, expected)
result = ser.astype("category") == (0, 0)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,308 @@
import operator
import numpy as np
import pytest
from pandas.core.dtypes.common import is_list_like
import pandas as pd
from pandas import (
Categorical,
Index,
Interval,
IntervalIndex,
Period,
Series,
Timedelta,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
BooleanArray,
IntervalArray,
)
from pandas.tests.arithmetic.common import get_upcast_box
@pytest.fixture(
params=[
(Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
(Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
(
timedelta_range("0 days", periods=3).insert(3, pd.NaT),
timedelta_range("1 day", periods=3).insert(3, pd.NaT),
),
(
date_range("20170101", periods=3).insert(3, pd.NaT),
date_range("20170102", periods=3).insert(3, pd.NaT),
),
(
date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT),
date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT),
),
],
ids=lambda x: str(x[0].dtype),
)
def left_right_dtypes(request):
"""
Fixture for building an IntervalArray from various dtypes
"""
return request.param
@pytest.fixture
def interval_array(left_right_dtypes):
"""
Fixture to generate an IntervalArray of various dtypes containing NA if possible
"""
left, right = left_right_dtypes
return IntervalArray.from_arrays(left, right)
def create_categorical_intervals(left, right, closed="right"):
return Categorical(IntervalIndex.from_arrays(left, right, closed))
def create_series_intervals(left, right, closed="right"):
return Series(IntervalArray.from_arrays(left, right, closed))
def create_series_categorical_intervals(left, right, closed="right"):
return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
class TestComparison:
@pytest.fixture(params=[operator.eq, operator.ne])
def op(self, request):
return request.param
@pytest.fixture(
params=[
IntervalArray.from_arrays,
IntervalIndex.from_arrays,
create_categorical_intervals,
create_series_intervals,
create_series_categorical_intervals,
],
ids=[
"IntervalArray",
"IntervalIndex",
"Categorical[Interval]",
"Series[Interval]",
"Series[Categorical[Interval]]",
],
)
def interval_constructor(self, request):
"""
Fixture for all pandas native interval constructors.
To be used as the LHS of IntervalArray comparisons.
"""
return request.param
def elementwise_comparison(self, op, interval_array, other):
"""
Helper that performs elementwise comparisons between `array` and `other`
"""
other = other if is_list_like(other) else [other] * len(interval_array)
expected = np.array(
[op(x, y) for x, y in zip(interval_array, other, strict=True)]
)
if isinstance(other, Series):
return Series(expected, index=other.index)
return expected
def test_compare_scalar_interval(self, op, interval_array):
# matches first interval
other = interval_array[0]
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
# matches on a single endpoint but not both
other = Interval(interval_array.left[0], interval_array.right[1])
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed):
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
other = Interval(0, 1, closed=other_closed)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_scalar_na(self, op, interval_array, nulls_fixture, box_with_array):
box = box_with_array
obj = tm.box_expected(interval_array, box)
result = op(obj, nulls_fixture)
if nulls_fixture is pd.NA:
# GH#31882
exp = np.ones(interval_array.shape, dtype=bool)
expected = BooleanArray(exp, exp)
else:
expected = self.elementwise_comparison(op, interval_array, nulls_fixture)
if not (box is Index and nulls_fixture is pd.NA):
# don't cast expected from BooleanArray to ndarray[object]
xbox = get_upcast_box(obj, nulls_fixture, True)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)
rev = op(nulls_fixture, obj)
tm.assert_equal(rev, expected)
@pytest.mark.parametrize(
"other",
[
0,
1.0,
True,
"foo",
Timestamp("2017-01-01"),
Timestamp("2017-01-01", tz="US/Eastern"),
Timedelta("0 days"),
Period("2017-01-01", "D"),
],
)
def test_compare_scalar_other(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_list_like_interval(self, op, interval_array, interval_constructor):
# same endpoints
other = interval_constructor(interval_array.left, interval_array.right)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
# different endpoints
other = interval_constructor(
interval_array.left[::-1], interval_array.right[::-1]
)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
# all nan endpoints
other = interval_constructor([np.nan] * 4, [np.nan] * 4)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
def test_compare_list_like_interval_mixed_closed(
self, op, interval_constructor, closed, other_closed
):
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
other = interval_constructor(range(2), range(1, 3), closed=other_closed)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"other",
[
(
Interval(0, 1),
Interval(Timedelta("1 day"), Timedelta("2 days")),
Interval(4, 5, "both"),
Interval(10, 20, "neither"),
),
(0, 1.5, Timestamp("20170103"), np.nan),
(
Timestamp("20170102", tz="US/Eastern"),
Timedelta("2 days"),
"baz",
pd.NaT,
),
],
)
def test_compare_list_like_object(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_list_like_nan(self, op, interval_array, nulls_fixture):
other = [nulls_fixture] * 4
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"other",
[
np.arange(4, dtype="int64"),
np.arange(4, dtype="float64"),
date_range("2017-01-01", periods=4),
date_range("2017-01-01", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
period_range("2017-01-01", periods=4, freq="D"),
Categorical(list("abab")),
Categorical(date_range("2017-01-01", periods=4)),
pd.array(list("abcd")),
pd.array(["foo", 3.14, None, object()], dtype=object),
],
ids=lambda x: str(x.dtype),
)
def test_compare_list_like_other(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("length", [1, 3, 5])
@pytest.mark.parametrize("other_constructor", [IntervalArray, list])
def test_compare_length_mismatch_errors(self, op, other_constructor, length):
interval_array = IntervalArray.from_arrays(range(4), range(1, 5))
other = other_constructor([Interval(0, 1)] * length)
with pytest.raises(ValueError, match="Lengths must match to compare"):
op(interval_array, other)
@pytest.mark.parametrize(
"constructor, expected_type, assert_func",
[
(IntervalIndex, np.array, tm.assert_numpy_array_equal),
(Series, Series, tm.assert_series_equal),
],
)
def test_index_series_compat(self, op, constructor, expected_type, assert_func):
# IntervalIndex/Series that rely on IntervalArray for comparisons
breaks = range(4)
index = constructor(IntervalIndex.from_breaks(breaks))
# scalar comparisons
other = index[0]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
other = breaks[0]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
# list-like comparisons
other = IntervalArray.from_breaks(breaks)
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
other = [index[0], breaks[0], "foo"]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
@pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
def test_comparison_operations(self, scalars):
# GH #28981
expected = Series([False, False])
s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
result = s == scalars
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,410 @@
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
# behave identically.
# Specifically for object dtype
import datetime
from decimal import Decimal
import operator
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Series,
Timestamp,
option_context,
)
import pandas._testing as tm
from pandas.core import ops
# ------------------------------------------------------------------
# Comparisons
class TestObjectComparisons:
def test_comparison_object_numeric_nas(self, comparison_op):
ser = Series(np.random.default_rng(2).standard_normal(10), dtype=object)
shifted = ser.shift(2)
func = comparison_op
result = func(ser, shifted)
expected = func(ser.astype(float), shifted.astype(float))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
)
def test_object_comparisons(self, infer_string):
with option_context("future.infer_string", infer_string):
ser = Series(["a", "b", np.nan, "c", "a"])
result = ser == "a"
expected = Series([True, False, False, False, True])
tm.assert_series_equal(result, expected)
result = ser < "a"
expected = Series([False, False, False, False, False])
tm.assert_series_equal(result, expected)
result = ser != "a"
expected = -(ser == "a")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [None, object])
def test_more_na_comparisons(self, dtype):
left = Series(["a", np.nan, "c"], dtype=dtype)
right = Series(["a", np.nan, "d"], dtype=dtype)
result = left == right
expected = Series([True, False, False])
tm.assert_series_equal(result, expected)
result = left != right
expected = Series([False, True, True])
tm.assert_series_equal(result, expected)
result = left == np.nan
expected = Series([False, False, False])
tm.assert_series_equal(result, expected)
result = left != np.nan
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Arithmetic
class TestArithmetic:
def test_add_period_to_array_of_offset(self):
# GH#50162
per = pd.Period("2012-1-1", freq="D")
pi = pd.period_range("2012-1-1", periods=10, freq="D")
idx = per - pi
expected = pd.Index([x + per for x in idx], dtype=object)
result = idx + per
tm.assert_index_equal(result, expected)
result = per + idx
tm.assert_index_equal(result, expected)
# TODO: parametrize
def test_pow_ops_object(self):
# GH#22922
# pow is weird with masking & 1, so testing here
a = Series([1, np.nan, 1, np.nan], dtype=object)
b = Series([1, np.nan, np.nan, 1], dtype=object)
result = a**b
expected = Series(a.values**b.values, dtype=object)
tm.assert_series_equal(result, expected)
result = b**a
expected = Series(b.values**a.values, dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("op", [operator.add, ops.radd])
@pytest.mark.parametrize("other", ["category", "Int64"])
def test_add_extension_scalar(self, other, box_with_array, op):
# GH#22378
# Check that scalars satisfying is_extension_array_dtype(obj)
# do not incorrectly try to dispatch to an ExtensionArray operation
arr = Series(["a", "b", "c"])
expected = Series([op(x, other) for x in arr])
arr = tm.box_expected(arr, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = op(arr, other)
tm.assert_equal(result, expected)
def test_objarr_add_str(self, box_with_array):
ser = Series(["x", np.nan, "x"])
expected = Series(["xa", np.nan, "xa"])
ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = ser + "a"
tm.assert_equal(result, expected)
def test_objarr_radd_str(self, box_with_array):
ser = Series(["x", np.nan, "x"])
expected = Series(["ax", np.nan, "ax"])
ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = "a" + ser
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
[1, 2, 3],
[1.1, 2.2, 3.3],
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
["x", "y", 1],
],
)
@pytest.mark.parametrize("dtype", [None, object])
def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
ser = Series(data, dtype=dtype)
ser = tm.box_expected(ser, box_with_array)
msg = "|".join(
[
"can only concatenate str",
"did not contain a loop with signature matching types",
"unsupported operand type",
"must be str",
]
)
with pytest.raises(TypeError, match=msg):
"foo_" + ser
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
def test_objarr_add_invalid(self, op, box_with_array):
# invalid ops
box = box_with_array
obj_ser = Series(list("abc"), dtype=object, name="objects")
obj_ser = tm.box_expected(obj_ser, box)
msg = "|".join(
[
"can only concatenate str",
"unsupported operand type",
"must be str",
"has no kernel",
]
)
with pytest.raises(Exception, match=msg):
op(obj_ser, 1)
with pytest.raises(Exception, match=msg):
op(obj_ser, np.array(1, dtype=np.int64))
# TODO: Moved from tests.series.test_operators; needs cleanup
def test_operators_na_handling(self):
ser = Series(["foo", "bar", "baz", np.nan])
result = "prefix_" + ser
expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
tm.assert_series_equal(result, expected)
result = ser + "_suffix"
expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
tm.assert_series_equal(result, expected)
# TODO: parametrize over box
@pytest.mark.parametrize("dtype", [None, object])
def test_series_with_dtype_radd_timedelta(self, dtype):
# note this test is _not_ aimed at timedelta64-dtyped Series
# as of 2.0 we retain object dtype when ser.dtype == object
ser = Series(
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
dtype=dtype,
)
expected = Series(
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
dtype=dtype,
)
result = pd.Timedelta("3 days") + ser
tm.assert_series_equal(result, expected)
result = ser + pd.Timedelta("3 days")
tm.assert_series_equal(result, expected)
# TODO: cleanup & parametrize over box
def test_mixed_timezone_series_ops_object(self):
# GH#13043
ser = Series(
[
Timestamp("2015-01-01", tz="US/Eastern"),
Timestamp("2015-01-01", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser.dtype == object
exp = Series(
[
Timestamp("2015-01-02", tz="US/Eastern"),
Timestamp("2015-01-02", tz="Asia/Tokyo"),
],
name="xxx",
)
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
# object series & object series
ser2 = Series(
[
Timestamp("2015-01-03", tz="US/Eastern"),
Timestamp("2015-01-05", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser2.dtype == object
exp = Series(
[pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
)
tm.assert_series_equal(ser2 - ser, exp)
tm.assert_series_equal(ser - ser2, -exp)
ser = Series(
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
name="xxx",
dtype=object,
)
assert ser.dtype == object
exp = Series(
[pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
name="xxx",
dtype=object,
)
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
# TODO: cleanup & parametrize over box
def test_iadd_preserves_name(self):
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
ser = Series([1, 2, 3])
ser.index.name = "foo"
ser.index += 1
assert ser.index.name == "foo"
ser.index -= 1
assert ser.index.name == "foo"
def test_add_string(self):
# from bug report
index = pd.Index(["a", "b", "c"])
index2 = index + "foo"
assert "a" not in index2
assert "afoo" in index2
def test_iadd_string(self):
index = pd.Index(["a", "b", "c"])
# doesn't fail test unless there is a check before `+=`
assert "a" in index
index += "_x"
assert "a_x" in index
def test_add(self):
index = pd.Index([str(i) for i in range(10)])
expected = pd.Index(index.values * 2)
tm.assert_index_equal(index + index, expected)
tm.assert_index_equal(index + index.tolist(), expected)
tm.assert_index_equal(index.tolist() + index, expected)
# test add and radd
index = pd.Index(list("abc"))
expected = pd.Index(["a1", "b1", "c1"])
tm.assert_index_equal(index + "1", expected)
expected = pd.Index(["1a", "1b", "1c"])
tm.assert_index_equal("1" + index, expected)
def test_sub_fail(self):
index = pd.Index([str(i) for i in range(10)])
msg = "unsupported operand type|Cannot broadcast|sub' not supported"
with pytest.raises(TypeError, match=msg):
index - "a"
with pytest.raises(TypeError, match=msg):
index - index
with pytest.raises(TypeError, match=msg):
index - index.tolist()
with pytest.raises(TypeError, match=msg):
index.tolist() - index
def test_sub_object(self):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(0), Decimal(1)])
result = index - Decimal(1)
tm.assert_index_equal(result, expected)
result = index - pd.Index([Decimal(1), Decimal(1)])
tm.assert_index_equal(result, expected)
msg = "unsupported operand type"
with pytest.raises(TypeError, match=msg):
index - "foo"
with pytest.raises(TypeError, match=msg):
index - np.array([2, "foo"], dtype=object)
def test_rsub_object(self, fixed_now_ts):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(1), Decimal(0)])
result = Decimal(2) - index
tm.assert_index_equal(result, expected)
result = np.array([Decimal(2), Decimal(2)]) - index
tm.assert_index_equal(result, expected)
msg = "unsupported operand type"
with pytest.raises(TypeError, match=msg):
"foo" - index
with pytest.raises(TypeError, match=msg):
np.array([True, fixed_now_ts]) - index
class MyIndex(pd.Index):
# Simple index subclass that tracks ops calls.
_calls: int
@classmethod
def _simple_new(cls, values, name=None, dtype=None):
result = object.__new__(cls)
result._data = values
result._name = name
result._calls = 0
result._reset_identity()
return result
def __add__(self, other):
self._calls += 1
return self._simple_new(self._data)
def __radd__(self, other):
return self.__add__(other)
@pytest.mark.parametrize(
"other",
[
[datetime.timedelta(1), datetime.timedelta(2)],
[datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
[pd.Period("2000"), pd.Period("2001")],
["a", "b"],
],
ids=["timedelta", "datetime", "period", "object"],
)
def test_index_ops_defer_to_unknown_subclasses(other):
# https://github.com/pandas-dev/pandas/issues/31109
values = np.array(
[datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
)
a = MyIndex._simple_new(values)
other = pd.Index(other)
result = other + a
assert isinstance(result, MyIndex)
assert a._calls == 1

View File

@ -0,0 +1,487 @@
import operator
from pathlib import Path
import numpy as np
import pytest
from pandas.compat import HAS_PYARROW
from pandas.errors import Pandas4Warning
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
NA,
ArrowDtype,
Series,
StringDtype,
)
import pandas._testing as tm
from pandas.core.construction import extract_array
def string_dtype_highest_priority(dtype1, dtype2):
if HAS_PYARROW:
DTYPE_HIERARCHY = [
StringDtype("python", na_value=np.nan),
StringDtype("pyarrow", na_value=np.nan),
StringDtype("python", na_value=NA),
StringDtype("pyarrow", na_value=NA),
]
else:
DTYPE_HIERARCHY = [
StringDtype("python", na_value=np.nan),
StringDtype("python", na_value=NA),
]
h1 = DTYPE_HIERARCHY.index(dtype1)
h2 = DTYPE_HIERARCHY.index(dtype2)
return DTYPE_HIERARCHY[max(h1, h2)]
def test_eq_all_na():
pytest.importorskip("pyarrow")
a = pd.array([NA, NA], dtype=StringDtype("pyarrow"))
result = a == a
expected = pd.array([NA, NA], dtype="boolean[pyarrow]")
tm.assert_extension_array_equal(result, expected)
def test_reversed_logical_ops(any_string_dtype):
# GH#60234
dtype = any_string_dtype
warn = None if dtype == object else Pandas4Warning
left = Series([True, False, False, True])
right = Series(["", "", "b", "c"], dtype=dtype)
msg = "operations between boolean dtype and"
with tm.assert_produces_warning(warn, match=msg):
result = left | right
expected = left | right.astype(bool)
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(warn, match=msg):
result = left & right
expected = left & right.astype(bool)
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(warn, match=msg):
result = left ^ right
expected = left ^ right.astype(bool)
tm.assert_series_equal(result, expected)
def test_pathlib_path_division(any_string_dtype, request):
# GH#61940
if any_string_dtype == object:
mark = pytest.mark.xfail(
reason="with NA present we go through _masked_arith_op which "
"raises TypeError bc Path is not recognized by lib.is_scalar."
)
request.applymarker(mark)
item = Path("/Users/Irv/")
ser = Series(["A", "B", NA], dtype=any_string_dtype)
result = item / ser
expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object)
tm.assert_series_equal(result, expected)
result = ser / item
expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object)
tm.assert_series_equal(result, expected)
def test_mixed_object_comparison(any_string_dtype):
# GH#60228
dtype = any_string_dtype
ser = Series(["a", "b"], dtype=dtype)
mixed = Series([1, "b"], dtype=object)
result = ser == mixed
expected = Series([False, True], dtype=bool)
if dtype == object:
pass
elif dtype.storage == "python" and dtype.na_value is NA:
expected = expected.astype("boolean")
elif dtype.storage == "pyarrow" and dtype.na_value is NA:
expected = expected.astype("bool[pyarrow]")
tm.assert_series_equal(result, expected)
def test_pyarrow_numpy_string_invalid():
# GH#56008
pa = pytest.importorskip("pyarrow")
ser = Series([False, True])
ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
result = ser == ser2
expected_eq = Series(False, index=ser.index)
tm.assert_series_equal(result, expected_eq)
result = ser != ser2
expected_ne = Series(True, index=ser.index)
tm.assert_series_equal(result, expected_ne)
with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser2
# GH#59505
ser3 = ser2.astype("string[pyarrow]")
result3_eq = ser3 == ser
tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
result3_ne = ser3 != ser
tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))
with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser3
ser4 = ser2.astype(ArrowDtype(pa.string()))
result4_eq = ser4 == ser
tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
result4_ne = ser4 != ser
tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))
with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser4
def test_mul_bool_invalid(any_string_dtype):
# GH#62595
dtype = any_string_dtype
ser = Series(["a", "b", "c"], dtype=dtype)
if dtype == object:
pytest.skip("This is not expect to raise")
elif dtype.storage == "python":
msg = "Cannot multiply StringArray by bools. Explicitly cast to integers"
else:
msg = "Can only string multiply by an integer"
with pytest.raises(TypeError, match=msg):
False * ser
with pytest.raises(TypeError, match=msg):
ser * True
with pytest.raises(TypeError, match=msg):
ser * np.array([True, False, True], dtype=bool)
with pytest.raises(TypeError, match=msg):
np.array([True, False, True], dtype=bool) * ser
def test_add(any_string_dtype, request):
dtype = any_string_dtype
if dtype == object:
mark = pytest.mark.xfail(
reason="Need to update expected for numpy object dtype"
)
request.applymarker(mark)
a = Series(["a", "b", "c", None, None], dtype=dtype)
b = Series(["x", "y", None, "z", None], dtype=dtype)
result = a + b
expected = Series(["ax", "by", None, None, None], dtype=dtype)
tm.assert_series_equal(result, expected)
result = a.add(b)
tm.assert_series_equal(result, expected)
result = a.radd(b)
expected = Series(["xa", "yb", None, None, None], dtype=dtype)
tm.assert_series_equal(result, expected)
result = a.add(b, fill_value="-")
expected = Series(["ax", "by", "c-", "-z", None], dtype=dtype)
tm.assert_series_equal(result, expected)
def test_add_2d(any_string_dtype, request):
dtype = any_string_dtype
if dtype == object or dtype.storage == "pyarrow":
reason = "Failed: DID NOT RAISE <class 'ValueError'>"
mark = pytest.mark.xfail(raises=None, reason=reason)
request.applymarker(mark)
a = pd.array(["a", "b", "c"], dtype=dtype)
b = np.array([["a", "b", "c"]], dtype=object)
with pytest.raises(ValueError, match="3 != 1"):
a + b
s = Series(a)
with pytest.raises(ValueError, match="3 != 1"):
s + b
def test_add_sequence(any_string_dtype, request, using_infer_string):
dtype = any_string_dtype
if (
dtype != object
and dtype.storage == "python"
and dtype.na_value is np.nan
and HAS_PYARROW
and using_infer_string
):
mark = pytest.mark.xfail(
reason="As of GH#62522, the list gets wrapped with sanitize_array, "
"which casts to a higher-priority StringArray, so we get "
"NotImplemented."
)
request.applymarker(mark)
if dtype == np.dtype(object) and using_infer_string:
mark = pytest.mark.xfail(reason="Cannot broadcast list")
request.applymarker(mark)
a = pd.array(["a", "b", None, None], dtype=dtype)
other = ["x", None, "y", None]
result = a + other
expected = pd.array(["ax", None, None, None], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = other + a
expected = pd.array(["xa", None, None, None], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
def test_string_add_missing_values(string_dtype_no_object):
# GH#64968 Arrow-backed str arrays should return NA when added to missing
arr = pd.array(["y"], dtype=string_dtype_no_object)
expected = pd.array([NA], dtype=string_dtype_no_object)
for na_val in [None, np.nan, NA]:
# left side
result = arr + na_val
tm.assert_extension_array_equal(result, expected)
# right side
result = na_val + arr
tm.assert_extension_array_equal(result, expected)
def test_mul(any_string_dtype):
dtype = any_string_dtype
a = pd.array(["a", "b", None], dtype=dtype)
result = a * 2
expected = pd.array(["aa", "bb", None], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = 2 * a
tm.assert_extension_array_equal(result, expected)
def test_add_strings(any_string_dtype, request):
dtype = any_string_dtype
if dtype != np.dtype(object):
mark = pytest.mark.xfail(reason="GH-28527")
request.applymarker(mark)
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
assert arr.__add__(df) is NotImplemented
result = arr + df
expected = pd.DataFrame([["at", "by", "cv", "dw"]]).astype(dtype)
tm.assert_frame_equal(result, expected)
result = df + arr
expected = pd.DataFrame([["ta", "yb", "vc", "wd"]]).astype(dtype)
tm.assert_frame_equal(result, expected)
@pytest.mark.xfail(reason="GH-28527")
def test_add_frame(dtype):
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
df = pd.DataFrame([["x", np.nan, "y", np.nan]])
assert arr.__add__(df) is NotImplemented
result = arr + df
expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
tm.assert_frame_equal(result, expected)
result = df + arr
expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
tm.assert_frame_equal(result, expected)
def test_comparison_methods_scalar(comparison_op, any_string_dtype):
dtype = any_string_dtype
op_name = f"__{comparison_op.__name__}__"
a = pd.array(["a", None, "c"], dtype=dtype)
other = "a"
result = getattr(a, op_name)(other)
if dtype == object or dtype.na_value is np.nan:
expected = np.array([getattr(item, op_name)(other) for item in a])
if comparison_op == operator.ne:
expected[1] = True
else:
expected[1] = False
result = extract_array(result, extract_numpy=True)
tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
else:
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
expected = pd.array(expected, dtype=expected_dtype)
tm.assert_extension_array_equal(result, expected)
def test_comparison_methods_scalar_pd_na(comparison_op, any_string_dtype):
dtype = any_string_dtype
op_name = f"__{comparison_op.__name__}__"
a = pd.array(["a", None, "c"], dtype=dtype)
result = getattr(a, op_name)(NA)
if dtype == np.dtype(object) or dtype.na_value is np.nan:
if operator.ne == comparison_op:
expected = np.array([True, True, True])
else:
expected = np.array([False, False, False])
result = extract_array(result, extract_numpy=True)
tm.assert_numpy_array_equal(result, expected)
else:
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
expected = pd.array([None, None, None], dtype=expected_dtype)
tm.assert_extension_array_equal(result, expected)
tm.assert_extension_array_equal(result, expected)
def test_comparison_methods_scalar_not_string(comparison_op, any_string_dtype):
op_name = f"__{comparison_op.__name__}__"
dtype = any_string_dtype
a = pd.array(["a", None, "c"], dtype=dtype)
other = 42
if op_name not in ["__eq__", "__ne__"]:
with pytest.raises(TypeError, match="Invalid comparison|not supported between"):
getattr(a, op_name)(other)
return
result = getattr(a, op_name)(other)
result = extract_array(result, extract_numpy=True)
if dtype == np.dtype(object) or dtype.na_value is np.nan:
expected_data = {
"__eq__": [False, False, False],
"__ne__": [True, True, True],
}[op_name]
expected = np.array(expected_data)
tm.assert_numpy_array_equal(result, expected)
else:
expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
op_name
]
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
expected = pd.array(expected_data, dtype=expected_dtype)
tm.assert_extension_array_equal(result, expected)
def test_comparison_methods_array(comparison_op, any_string_dtype, any_string_dtype2):
op_name = f"__{comparison_op.__name__}__"
dtype = any_string_dtype
dtype2 = any_string_dtype2
a = pd.array(["a", None, "c"], dtype=dtype)
other = pd.array([None, None, "c"], dtype=dtype2)
result = comparison_op(a, other)
result = extract_array(result, extract_numpy=True)
# ensure operation is commutative
result2 = comparison_op(other, a)
result2 = extract_array(result2, extract_numpy=True)
tm.assert_equal(result, result2)
if (dtype == object or dtype.na_value is np.nan) and (
dtype2 == object or dtype2.na_value is np.nan
):
if operator.ne == comparison_op:
expected = np.array([True, True, False])
else:
expected = np.array([False, False, False])
expected[-1] = getattr(other[-1], op_name)(a[-1])
result = extract_array(result, extract_numpy=True)
tm.assert_numpy_array_equal(result, expected)
else:
if dtype == object:
max_dtype = dtype2
elif dtype2 == object:
max_dtype = dtype
else:
max_dtype = string_dtype_highest_priority(dtype, dtype2)
if max_dtype.storage == "python":
expected_dtype = "boolean"
else:
expected_dtype = "bool[pyarrow]"
expected = np.full(len(a), fill_value=None, dtype="object")
expected[-1] = getattr(other[-1], op_name)(a[-1])
expected = pd.array(expected, dtype=expected_dtype)
tm.assert_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_comparison_methods_array_arrow_extension(comparison_op, any_string_dtype):
# Test pd.ArrowDtype(pa.string()) against other string arrays
import pyarrow as pa
dtype2 = any_string_dtype
op_name = f"__{comparison_op.__name__}__"
dtype = ArrowDtype(pa.string())
a = pd.array(["a", None, "c"], dtype=dtype)
other = pd.array([None, None, "c"], dtype=dtype2)
result = comparison_op(a, other)
# ensure operation is commutative
result2 = comparison_op(other, a)
tm.assert_equal(result, result2)
expected = pd.array([None, None, True], dtype="bool[pyarrow]")
expected[-1] = getattr(other[-1], op_name)(a[-1])
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("box", [pd.array, pd.Index, Series])
def test_comparison_methods_list(comparison_op, any_string_dtype, box, request):
dtype = any_string_dtype
if box is pd.array and dtype != object and dtype.na_value is np.nan:
mark = pytest.mark.xfail(
reason="After wrapping list, op returns NotImplemented, see GH#62522"
)
request.applymarker(mark)
op_name = f"__{comparison_op.__name__}__"
a = box(pd.array(["a", None, "c"], dtype=dtype))
item = "c"
other = [None, None, "c"]
result = comparison_op(a, other)
# ensure operation is commutative
result2 = comparison_op(other, a)
tm.assert_equal(result, result2)
if dtype == np.dtype(object) or dtype.na_value is np.nan:
if operator.ne == comparison_op:
expected = np.array([True, True, False])
else:
expected = np.array([False, False, False])
expected[-1] = getattr(item, op_name)(item)
if box is not pd.Index:
# if GH#62766 is addressed this check can be removed
expected = box(expected, dtype=expected.dtype)
tm.assert_equal(result, expected)
else:
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
expected = np.full(len(a), fill_value=None, dtype="object")
expected[-1] = getattr(item, op_name)(item)
expected = pd.array(expected, dtype=expected_dtype)
expected = extract_array(expected, extract_numpy=True)
if box is not pd.Index:
# if GH#62766 is addressed this check can be removed
expected = tm.box_expected(expected, box)
tm.assert_equal(result, expected)