diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 438306f01772a..5fde35b3f29f8 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -226,7 +226,7 @@ For label slicing, both endpoints are *included*: .. ipython:: python - df.loc["20130102":"20130104", ["A", "B"]] + df.loc["2013-01-02":"2013-01-04", ["A", "B"]] Selecting a single row and column label returns a scalar: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d11ab82294be1..b25a66bd2a69f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -739,6 +739,7 @@ Other Deprecations - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`) - Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`) - Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`) +- Deprecated non-ISO date string formats in :meth:`DatetimeIndex.__getitem__` with string labels. Use ISO format (YYYY-MM-DD) instead. (:issue:`58302`) - Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) - Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`) - Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 382b3678da75b..f1055d3393aef 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2,6 +2,7 @@ import datetime as dt import operator +import re from typing import ( TYPE_CHECKING, Self, @@ -110,6 +111,29 @@ def _new_DatetimeIndex(cls, d): return result +def _is_iso_format_string(date_str: str) -> bool: + """ + Check if a date string follows ISO8601 format. + + ISO format must start with a 4-digit year (YYYY), optionally followed by + hyphen-separated month and day or 'T' for time component. + + Examples of ISO format (True): + - 2024 + - 2024-01 + - 2024-01-10 + - 2024-01-10T00:00:00 + + Examples of non-ISO format (False): + - 2024/01/10 (/ separator) + - 2024 01 10 (space separator) + - 01/10/2024 (MM/DD/YYYY) + - 10/01/2024 (DD/MM/YYYY) + - 01-10-2024 (MM-DD-YYYY) + """ + return re.match(r"^\d{4}(?:-|T|$)", date_str) is not None + + @inherit_names( DatetimeArray._field_ops + [ @@ -613,6 +637,14 @@ def get_loc(self, key): parsed, reso = self._parse_with_reso(key) except ValueError as err: raise KeyError(key) from err + # GH#58302 - Deprecate non-ISO string formats in .loc indexing + if not _is_iso_format_string(key): + msg = ( + "Parsing non-ISO datetime strings in .loc is deprecated " + "and will be removed in a future version. Use ISO format " + f"(YYYY-MM-DD) instead. Got '{key}'." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) self._disallow_mismatched_indexing(parsed) if self._can_partial_date_slice(reso): @@ -688,6 +720,23 @@ def slice_indexer(self, start=None, end=None, step=None): def check_str_or_none(point) -> bool: return point is not None and not isinstance(point, str) + # GH#58302 - Deprecate non-ISO string formats in .loc indexing + if isinstance(start, str) and not _is_iso_format_string(start): + msg = ( + "Parsing non-ISO datetime strings in .loc is deprecated " + "and will be removed in a future version. Use ISO format " + f"(YYYY-MM-DD) instead. Got '{start}'." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + + if isinstance(end, str) and not _is_iso_format_string(end): + msg = ( + "Parsing non-ISO datetime strings in .loc is deprecated " + "and will be removed in a future version. Use ISO format " + f"(YYYY-MM-DD) instead. Got '{end}'." + ) + warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level()) + # GH#33146 if start and end are combinations of str and None and Index is not # monotonic, we can not use Index.slice_indexer because it does not honor the # actual elements, is only searching for start and end @@ -707,6 +756,7 @@ def check_str_or_none(point) -> bool: if end is not None: end_casted = self._maybe_cast_slice_bound(end, "right") + mask = (self <= end_casted) & mask in_index &= (end_casted == self).any() diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 519c2c3064e59..7ba0c0cd6bf46 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -72,6 +72,9 @@ def seed_df(seed_nans, n, m): @pytest.mark.parametrize("bins", [None, [0, 5]], ids=repr) @pytest.mark.parametrize("isort", [True, False]) @pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")]) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_series_groupby_value_counts( seed_nans, num_rows, diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4955b1fe0da54..f5201dc4e94dc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2851,6 +2851,9 @@ def test_groupby_with_Time_Grouper(unit): tm.assert_frame_equal(result, expected_output) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_groupby_series_with_datetimeindex_month_name(): # GH 48509 s = Series([0, 1, 0], index=date_range("2022-01-01", periods=3), name="jan") diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 11877024e7be0..fd0765dc3964f 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -489,6 +489,9 @@ def test_get_loc_timedelta_invalid_key(self, key): with pytest.raises(TypeError, match=msg): dti.get_loc(key) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_get_loc_reasonable_key_error(self): # GH#1062 index = DatetimeIndex(["1/3/2000"]) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 94175a56f1c4a..f12c958d22269 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( DataFrame, DatetimeIndex, @@ -19,6 +21,10 @@ class TestSlicing: + pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) + def test_string_index_series_name_converted(self): # GH#1644 df = DataFrame( @@ -464,3 +470,94 @@ def test_slice_reduce_to_series(self): ) result = df.loc["2000", "A"] tm.assert_series_equal(result, expected) + + +class TestDatetimeIndexNonISODeprecation: + """Tests for deprecation of non-ISO string formats in .loc indexing. GH#58302""" + + @pytest.fixture + def ser_daily(self): + """Create a Series with daily DatetimeIndex for testing.""" + return Series( + range(15), + index=DatetimeIndex(date_range(start="2024-01-01", freq="D", periods=15)), + ) + + @pytest.mark.parametrize( + "date_string", + [ + "1/10/2024", # MM/DD/YYYY format + "01/10/2024", # MM/DD/YYYY format with leading zero + ], + ) + def test_loc_indexing_non_iso_single_key_deprecation(self, ser_daily, date_string): + # GH#58302 + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser_daily.loc[date_string] + assert result == 9 + + @pytest.mark.parametrize( + "date_string,expected", + [ + ("2024-01-10", 9), # YYYY-MM-DD (ISO format) + ], + ) + def test_loc_indexing_iso_format_no_warning(self, ser_daily, date_string, expected): + # GH#58302 - ISO format (YYYY-MM-DD) should NOT warn + with tm.assert_produces_warning(None): + result = ser_daily.loc[date_string] + assert result == expected + + @pytest.mark.parametrize( + "start_string", + [ + "1/10/2024", # MM/DD/YYYY format + "01/10/2024", # MM/DD/YYYY format with leading zero + ], + ) + def test_loc_slicing_non_iso_start_deprecation(self, ser_daily, start_string): + # GH#58302 - Non-ISO start in slice should warn + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser_daily.loc[start_string:"2024-01-15"] + assert len(result) > 0 + + @pytest.mark.parametrize( + "end_string", + [ + "5-01-2024", # DD-MM-YYYY format + "05-01-2024", # DD-MM-YYYY format with leading zero + ], + ) + def test_loc_slicing_non_iso_end_deprecation(self, ser_daily, end_string): + # GH#58302 - Non-ISO end in slice should warn + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser_daily.loc["2024-01-01":end_string] + assert len(result) > 0 + + def test_loc_slicing_both_non_iso_deprecation(self, ser_daily): + # GH#58302 - Both non-ISO should warn (twice) + msg = "Parsing non-ISO datetime strings in .loc is deprecated" + + with tm.assert_produces_warning( + Pandas4Warning, match=msg, check_stacklevel=False + ): + result = ser_daily.loc["1/10/2024":"5-01-2024"] + assert len(result) > 0 + + def test_loc_slicing_iso_formats_no_warning(self, ser_daily): + # GH#58302 - ISO slice formats should NOT warn + with tm.assert_produces_warning(None): + result = ser_daily.loc["2024-01-05":"2024-01-10"] + assert len(result) == 6 + + def test_loc_non_string_keys_no_warning(self, ser_daily): + # GH#58302 - Non-string keys should not warn + with tm.assert_produces_warning(None): + result = ser_daily.loc[Timestamp("2024-01-10")] + assert result == 9 diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 75382cb735288..7eaafd1bb8d72 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -172,6 +172,9 @@ def test_getitem_list_periods(self): tm.assert_series_equal(ts[[Period("2012-01-02", freq="D")]], exp) @pytest.mark.arm_slow + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_getitem_seconds(self): # GH#6716 didx = date_range(start="2013/01/01 09:00:00", freq="s", periods=4000) @@ -206,6 +209,9 @@ def test_getitem_seconds(self): period_range, ], ) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_getitem_day(self, idx_range): # GH#6716 # Confirm DatetimeIndex and PeriodIndex works identically diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 8d173d850583f..58a6b3c2feef4 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -12,6 +12,10 @@ class TestPeriodIndex: + pytestmark = pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) + def test_getitem_periodindex_duplicates_string_slice(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN") diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 7f298e9bdd375..14702891e5fd3 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -308,6 +308,9 @@ def test_multiindex_slicers_datetimelike(self): ] tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_multiindex_slicers_edges(self): # GH 8132 # various edge cases diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index de2d914aab229..3d1131ffd57bc 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -261,6 +261,9 @@ def test_loc_getitem_single_boolean_arg(self, obj, key, exp): class TestLocBaseIndependent: # Tests for loc that do not depend on subclassing Base + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_loc_npstr(self): # GH#45580 df = DataFrame(index=date_range("2021", "2022")) @@ -1262,6 +1265,9 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string expected = DataFrame(col_data, columns=["A"], dtype=float) tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_loc_getitem_time_object(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") mask = (rng.hour == 9) & (rng.minute == 30) @@ -2415,6 +2421,9 @@ def test_loc_getitem_partial_slice_non_monotonicity( class TestLabelSlicing: + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_loc_getitem_slicing_datetimes_frame(self): # GH#7523 diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 3cd7f6c336956..ae3ff0adc6046 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -327,6 +327,9 @@ def test_resample_rounding(unit): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_resample_basic_from_daily(unit): # from daily dti = date_range( @@ -551,6 +554,9 @@ def test_resample_ohlc(unit): assert xs["close"] == s.iloc[4] +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_resample_ohlc_result(unit): # GH 12332 index = date_range("1-1-2000", "2-15-2000", freq="h").as_unit(unit) @@ -662,6 +668,9 @@ def test_resample_timestamp_to_period( tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_ohlc_5min(unit): def _ohlc(group): if isna(group).all(): @@ -1576,6 +1585,9 @@ def test_resample_dst_anchor(unit): ) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_resample_dst_anchor2(unit): dti = date_range( "2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris" diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index f3c52a674cf66..73be113cd1a15 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -134,6 +134,9 @@ def test_groupby_resample_on_api_with_getitem(): tm.assert_series_equal(result, exp) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_groupby_with_origin(): # GH 31809 diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 97cafc33611ed..be60633a5b051 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -28,6 +28,9 @@ import pandas._testing as tm +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_fancy_getitem(): dti = date_range( freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) @@ -46,6 +49,9 @@ def test_fancy_getitem(): ) +@pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" +) def test_fancy_setitem(): dti = date_range( freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 37d6c9b42e003..300365f887661 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -147,6 +147,9 @@ def test_getitem_pydatetime_tz(self, tzstr): assert ts[time_pandas] == ts[time_datetime] @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_string_index_alias_tz_aware(self, tz): rng = date_range("1/1/2000", periods=10, tz=tz) ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) @@ -233,6 +236,9 @@ def test_getitem_partial_str_slice_with_datetimeindex(self): tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_getitem_slice_strings_with_datetimeindex(self): idx = DatetimeIndex( ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index a67f3ec708f24..780b3b0345fa3 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -46,6 +46,9 @@ def test_series_getitem_multiindex_xs_by_label(self): result = ser.xs("one", level="L2") tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings( + "ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning" + ) def test_series_getitem_multiindex_xs(self): # GH#6258 dt = list(date_range("20130903", periods=3))