Ajout type contrat

This commit is contained in:
2026-04-29 11:52:03 +02:00
parent 375549cb30
commit 1c0e4c3048
10530 changed files with 1842149 additions and 158 deletions

View File

@@ -0,0 +1,554 @@
from datetime import datetime
import numpy as np
import pytest
from pandas.errors import Pandas4Warning
from pandas.core.dtypes.common import is_extension_array_dtype
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
NaT,
PeriodIndex,
Series,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.resample import _asfreq_compat
@pytest.fixture(
params=[
"linear",
"time",
"index",
"values",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"barycentric",
"krogh",
"from_derivatives",
"piecewise_polynomial",
"pchip",
"akima",
],
)
def all_1d_no_arg_interpolation_methods(request):
return request.param
@pytest.mark.parametrize("freq", ["2D", "1h"])
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_asfreq(frame_or_series, index, freq):
obj = frame_or_series(range(len(index)), index=index)
idx_range = date_range if isinstance(index, DatetimeIndex) else timedelta_range
result = obj.resample(freq).asfreq()
new_index = idx_range(obj.index[0], obj.index[-1], freq=freq)
expected = obj.reindex(new_index)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_asfreq_fill_value(index):
# test for fill value during resampling, issue 3715
ser = Series(range(len(index)), index=index, name="a")
idx_range = date_range if isinstance(index, DatetimeIndex) else timedelta_range
result = ser.resample("1h").asfreq()
new_index = idx_range(ser.index[0], ser.index[-1], freq="1h")
expected = ser.reindex(new_index)
tm.assert_series_equal(result, expected)
# Explicit cast to float to avoid implicit cast when setting None
frame = ser.astype("float").to_frame("value")
frame.iloc[1] = None
result = frame.resample("1h").asfreq(fill_value=4.0)
new_index = idx_range(frame.index[0], frame.index[-1], freq="1h")
expected = frame.reindex(new_index, fill_value=4.0)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "3 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
],
)
def test_resample_interpolate(index):
# GH#12925
df = DataFrame(range(len(index)), index=index)
result = df.resample("1min").asfreq().interpolate()
expected = df.resample("1min").interpolate()
tm.assert_frame_equal(result, expected)
def test_resample_interpolate_inplace_deprecated():
# GH#58690
dti = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
df = DataFrame(range(len(dti)), index=dti)
rs = df.resample("1min")
msg = "The 'inplace' keyword in DatetimeIndexResampler.interpolate"
with tm.assert_produces_warning(Pandas4Warning, match=msg):
rs.interpolate(inplace=False)
msg2 = "Cannot interpolate inplace on a resampled object"
with pytest.raises(ValueError, match=msg2):
with tm.assert_produces_warning(Pandas4Warning, match=msg):
rs.interpolate(inplace=True)
def test_resample_interpolate_regular_sampling_off_grid(
all_1d_no_arg_interpolation_methods,
):
pytest.importorskip("scipy")
# GH#21351
index = date_range("2000-01-01 00:01:00", periods=5, freq="2h")
ser = Series(np.arange(5.0), index)
method = all_1d_no_arg_interpolation_methods
result = ser.resample("1h").interpolate(method)
if method == "linear":
values = np.repeat(np.arange(0.0, 4.0), 2) + np.tile([1 / 3, 2 / 3], 4)
elif method == "nearest":
values = np.repeat(np.arange(0.0, 5.0), 2)[1:-1]
elif method == "zero":
values = np.repeat(np.arange(0.0, 4.0), 2)
else:
values = 0.491667 + np.arange(0.0, 4.0, 0.5)
values = np.insert(values, 0, np.nan)
index = date_range("2000-01-01 00:00:00", periods=9, freq="1h")
expected = Series(values, index=index)
tm.assert_series_equal(result, expected)
def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods):
pytest.importorskip("scipy")
# GH#21351
ser = Series(
np.linspace(0.0, 1.0, 5),
index=DatetimeIndex(
[
"2000-01-01 00:00:03",
"2000-01-01 00:00:22",
"2000-01-01 00:00:24",
"2000-01-01 00:00:31",
"2000-01-01 00:00:39",
]
),
)
# Resample to 5 second sampling and interpolate with the given method
ser_resampled = ser.resample("5s").interpolate(all_1d_no_arg_interpolation_methods)
# Check that none of the resampled values are NaN, except the first one
# which lies 3 seconds before the first actual data point
assert np.isnan(ser_resampled.iloc[0])
assert not ser_resampled.iloc[1:].isna().any()
def test_raises_on_non_datetimelike_index():
# this is a non datetimelike index
xp = DataFrame()
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
"but got an instance of 'RangeIndex'"
)
with pytest.raises(TypeError, match=msg):
xp.resample("YE")
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="D", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_empty_series(freq, index, resample_method):
# GH12771 & GH12868
ser = Series(index=index, dtype=float)
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
)
expected.index = _asfreq_compat(ser.index, freq)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = ser.copy()
expected.index = _asfreq_compat(ser.index, freq)
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@pytest.mark.parametrize("min_count", [0, 1])
def test_resample_empty_sum_string(string_dtype_no_object, min_count):
# https://github.com/pandas-dev/pandas/issues/60229
dtype = string_dtype_no_object
ser = Series(
pd.NA,
index=DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:10",
"2000-01-01 00:00:20",
"2000-01-01 00:00:30",
]
),
dtype=dtype,
)
rs = ser.resample("20s")
result = rs.sum(min_count=min_count)
value = "" if min_count == 0 else pd.NA
index = date_range(start="2000-01-01", freq="20s", periods=2, unit="us")
expected = Series(value, index=index, dtype=dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"freq",
[
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")),
"D",
"h",
],
)
def test_resample_nat_index_series(freq, resample_method):
# GH39227
ser = Series(range(5), index=PeriodIndex([NaT] * 5, freq=freq))
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = ser[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="D", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, index, resample_method):
# GH28427
ser = Series(index=index)
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
index = _asfreq_compat(ser.index, freq)
expected = Series([], dtype="int64", index=index, name=ser.name)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_empty_dataframe(index, freq, resample_method):
# GH13212
df = DataFrame(index=index)
# count retains dimensions too
if freq == "ME" and isinstance(df.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
df.resample(freq, group_keys=False)
return
elif freq == "ME" and isinstance(df.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
rs = df.resample(freq, group_keys=False)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
# TODO: no tests with len(df.columns) > 0
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
expected = DataFrame([], index=df.index[:0], columns=mi, dtype=np.float64)
expected.index = _asfreq_compat(df.index, freq)
elif resample_method != "size":
expected = df.copy()
else:
# GH14962
expected = Series([], dtype=np.int64)
expected.index = _asfreq_compat(df.index, freq)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
tm.assert_almost_equal(result, expected)
# test size for GH13212 (currently stays as df)
@pytest.mark.parametrize(
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_count_empty_dataframe(freq, index):
# GH28427
empty_frame_dti = DataFrame(index=index, columns=Index(["a"], dtype=object))
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
result = empty_frame_dti.resample(freq).count()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object))
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")]
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_size_empty_dataframe(freq, index):
# GH28427
empty_frame_dti = DataFrame(index=index, columns=Index(["a"], dtype=object))
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
result = empty_frame_dti.resample(freq).size()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = Series([], dtype="int64", index=index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [DatetimeIndex([]), TimedeltaIndex([])])
@pytest.mark.parametrize("freq", ["D", "h"])
@pytest.mark.parametrize(
"method", ["ffill", "bfill", "nearest", "asfreq", "interpolate", "mean"]
)
def test_resample_apply_empty_dataframe(index, freq, method):
# GH#55572
empty_frame_dti = DataFrame(index=index)
rs = empty_frame_dti.resample(freq)
result = rs.apply(getattr(rs, method))
expected_index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame([], index=expected_index)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="M", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
empty_series_dti = Series([], index, dtype)
rs = empty_series_dti.resample("D", group_keys=False)
try:
getattr(rs, resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object_)
pass
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="D", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_apply_to_empty_series(index, freq):
# GH 14313
ser = Series(index=index)
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
expected = ser.resample(freq).apply("sum")
tm.assert_series_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_resampler_is_iterable(index):
# GH 15314
series = Series(range(len(index)), index=index)
freq = "h"
tg = Grouper(freq=freq, convention="start")
grouped = series.groupby(tg)
resampled = series.resample(freq)
for (rk, rv), (gk, gv) in zip(resampled, grouped):
assert rk == gk
tm.assert_series_equal(rv, gv)
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
],
)
def test_resample_quantile(index):
# GH 15023
ser = Series(range(len(index)), index=index)
q = 0.75
freq = "h"
result = ser.resample(freq).quantile(q)
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("how", ["first", "last"])
def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
# GH#57019
if is_extension_array_dtype(any_real_nullable_dtype):
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
else:
na_value = np.nan
df = DataFrame(
{
"a": [2, 1, 1, 2],
"b": [na_value, 3.0, na_value, 4.0],
"c": [na_value, 3.0, na_value, 4.0],
},
index=date_range("2020-01-01", periods=4, freq="D", unit="ns"),
dtype=any_real_nullable_dtype,
)
rs = df.resample("ME")
method = getattr(rs, how)
result = method(skipna=skipna)
ts = pd.to_datetime("2020-01-31").as_unit("ns")
gb = df.groupby(df.shape[0] * [ts])
expected = getattr(gb, how)(skipna=skipna)
expected.index.freq = "ME"
tm.assert_frame_equal(result, expected)