Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions pandas/tests/computation/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,21 @@ def _eval_single_bin(lhs, cmp1, rhs, engine):
ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"],
)
def lhs(request):
nan_df1 = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
nan_df1[nan_df1 > 0.5] = np.nan

opts = (
DataFrame(np.random.default_rng(2).standard_normal((10, 5))),
Series(np.random.default_rng(2).standard_normal(5)),
Series([1, 2, np.nan, np.nan, 5]),
nan_df1,
np.random.default_rng(2).standard_normal(),
)
return opts[request.param]
rng = np.random.default_rng(2)
if request.param == 0:
return DataFrame(rng.standard_normal((10, 5)))
elif request.param == 1:
return Series(rng.standard_normal(5))
elif request.param == 2:
return Series([1, 2, np.nan, np.nan, 5])
elif request.param == 3:
nan_df1 = DataFrame(rng.standard_normal((10, 5)))
nan_df1[nan_df1 > 0.5] = np.nan
return nan_df1
elif request.param == 4:
return rng.standard_normal()
else:
raise ValueError(f"{request.param}")


rhs = lhs
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def b(df, cols):
return df.drop_duplicates(subset=cols[:-1])


@pytest.mark.slow
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
Expand Down
50 changes: 16 additions & 34 deletions pandas/tests/io/parser/common/test_chunksize.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,21 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
assert result.a.dtype == float


def test_warn_if_chunks_have_mismatched_type(all_parsers, using_infer_string):
def test_warn_if_chunks_have_mismatched_type(
all_parsers, using_infer_string, monkeypatch
):
warning_type = None
parser = all_parsers
size = 10000
heuristic = 2**3
size = 10

# see gh-3866: if chunks are different types and can't
# be coerced using numerical types, then issue warning.
if parser.engine == "c" and parser.low_memory:
warning_type = DtypeWarning
# Use larger size to hit warning path
size = 499999
# Use a size to hit warning path dictated by DEFAULT_BUFFER_HEURISTIC
# monkeypatched below
size = heuristic - 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the comment above makes me think in this case size was made bigger. now you're making it smaller. am i interpreting the comment wrong?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point, the comment becomes outdated with this change. I'll update it.

For reference, I'm using the same technique used in the test above, test_chunks_have_consistent_numerical_type, to patch DEFAULT_BUFFER_HEURISTIC so we don't have to run a test with so much data to hit a condition.


integers = [str(i) for i in range(size)]
data = "a\n" + "\n".join(integers + ["a", "b"] + integers)
Expand All @@ -251,12 +255,14 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers, using_infer_string):
buf,
)
else:
df = parser.read_csv_check_warnings(
warning_type,
r"Columns \(0: a\) have mixed types. "
"Specify dtype option on import or set low_memory=False.",
buf,
)
with monkeypatch.context() as m:
m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
df = parser.read_csv_check_warnings(
warning_type,
r"Columns \(0: a\) have mixed types. "
"Specify dtype option on import or set low_memory=False.",
buf,
)
if parser.engine == "c" and parser.low_memory:
assert df.a.dtype == object
elif using_infer_string:
Expand Down Expand Up @@ -295,30 +301,6 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator):
tm.assert_frame_equal(result, expected)


def test_read_csv_memory_growth_chunksize(temp_file, all_parsers):
# see gh-24805
#
# Let's just make sure that we don't crash
# as we iteratively process all chunks.
parser = all_parsers

with open(temp_file, "w", encoding="utf-8") as f:
for i in range(1000):
f.write(str(i) + "\n")

if parser.engine == "pyarrow":
msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
with parser.read_csv(temp_file, chunksize=20) as result:
for _ in result:
pass
return

with parser.read_csv(temp_file, chunksize=20) as result:
for _ in result:
pass


def test_chunksize_with_usecols_second_block_shorter(all_parsers):
# GH#21211
parser = all_parsers
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,12 +265,11 @@ def test_bad_date_parse(all_parsers, cache, value):
)


@pytest.mark.parametrize("value", ["0"])
def test_bad_date_parse_with_warning(all_parsers, cache, value):
def test_bad_date_parse_with_warning(all_parsers, cache):
# if we have an invalid date make sure that we handle this with
# and w/o the cache properly.
parser = all_parsers
s = StringIO((f"{value},\n") * 50000)
s = StringIO(("0,\n") * (start_caching_at + 1))

if parser.engine == "pyarrow":
# pyarrow reads "0" as 0 (of type int64), and so
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/libs/test_hashtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def test_get_state(self, table_type, dtype):
assert "n_buckets" in state
assert "upper_bound" in state

@pytest.mark.parametrize("N", range(1, 110))
@pytest.mark.parametrize("N", range(1, 110, 4))
def test_no_reallocation(self, table_type, dtype, N):
keys = np.arange(N).astype(dtype)
preallocated_table = table_type(N)
Expand Down Expand Up @@ -517,7 +517,7 @@ def test_tracemalloc_for_empty_StringHashTable():
assert get_allocated_khash_memory() == 0


@pytest.mark.parametrize("N", range(1, 110))
@pytest.mark.parametrize("N", range(1, 110, 4))
def test_no_reallocation_StringHashTable(N):
keys = np.arange(N).astype(np.str_).astype(np.object_)
preallocated_table = ht.StringHashTable(N)
Expand Down
20 changes: 9 additions & 11 deletions pandas/tests/plotting/frame/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
date,
datetime,
)
import gc
import itertools
import re
import string
import weakref

import numpy as np
import pytest
Expand Down Expand Up @@ -2173,15 +2171,15 @@ def test_memory_leak(self, kind):
index=date_range("2000-01-01", periods=10, freq="B"),
)

# Use a weakref so we can see if the object gets collected without
# also preventing it from being collected
ref = weakref.ref(df.plot(kind=kind, **args))

# have matplotlib delete all the figures
plt.close("all")
# force a garbage collection
gc.collect()
assert ref() is None
ax = df.plot(kind=kind, **args)
# https://github.com/pandas-dev/pandas/issues/9003#issuecomment-70544889
if kind in ["line", "area"]:
for i, (cached_data, _, _) in enumerate(ax._plot_data):
ser = df.iloc[:, i]
assert not tm.shares_memory(ser, cached_data)
tm.assert_numpy_array_equal(ser._values, cached_data._values)
else:
assert not hasattr(ax, "_plot_data")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this new test looks fine, but it isn't obvious to me that it is testing the same thing as the old one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point. I added a comment pointing to the Github issue describing where the original reference cycle was.


def test_df_gridspec_patterns_vert_horiz(self):
# GH 10819
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule, unit):


def test_resample_ohlc(unit):
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="Min")
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 2), freq="Min")
s = Series(range(len(index)), index=index)
s.index.name = "index"
s.index = s.index.as_unit(unit)
Expand Down Expand Up @@ -1842,7 +1842,7 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit):
# GH 24127
n1_ = n1 * k
n2_ = n2 * k
dti = date_range("1991-09-05", "1991-09-12", freq=freq1).as_unit(unit)
dti = date_range("1991-09-05", "1991-09-06", freq=freq1).as_unit(unit)
ser = Series(range(len(dti)), index=dti)

result1 = ser.resample(str(n1_) + freq1).mean()
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def test_selection(self, freq, kwargs):
def test_annual_upsample_cases(
self, offset, period, conv, meth, month, simple_period_range_series
):
ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"Y-{month}")
ts = simple_period_range_series("1/1/1990", "12/31/1990", freq=f"Y-{month}")
warn = FutureWarning if period == "B" else None
msg = r"PeriodDtype\[B\] is deprecated"
with tm.assert_produces_warning(warn, match=msg):
Expand Down Expand Up @@ -214,7 +214,7 @@ def test_quarterly_upsample(
self, month, offset, period, convention, simple_period_range_series
):
freq = f"Q-{month}"
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=freq)
warn = FutureWarning if period == "B" else None
msg = r"PeriodDtype\[B\] is deprecated"
with tm.assert_produces_warning(warn, match=msg):
Expand Down Expand Up @@ -396,8 +396,7 @@ def test_fill_method_and_how_upsample(self):
@pytest.mark.parametrize("convention", ["start", "end"])
def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
freq = f"W-{day}"
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)

ts = simple_period_range_series("1/1/1990", "07/31/1990", freq=freq)
warn = None if target == "D" else FutureWarning
msg = r"PeriodDtype\[B\] is deprecated"
with tm.assert_produces_warning(warn, match=msg):
Expand Down
Loading