Skip to content

Commit

Permalink
TEST-modin-project#7151: Remove usage of pandas._testing private module
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Apr 5, 2024
1 parent a966395 commit 9322b35
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 74 deletions.
18 changes: 5 additions & 13 deletions modin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import os
import platform
import shutil
import subprocess
import sys
import time
Expand Down Expand Up @@ -340,16 +339,15 @@ def fixture(tmp_path):


@pytest.fixture
def make_parquet_file():
def make_parquet_file(tmp_path):
"""Pytest fixture factory that makes a parquet file/dir for testing.
Yields:
Function that generates a parquet file/dir
"""
filenames = []

def _make_parquet_file(
filename,
filename=None,
nrows=NROWS,
ncols=2,
force=True,
Expand All @@ -369,6 +367,8 @@ def _make_parquet_file(
partitioned_columns: Create a partitioned directory using pandas.
row_group_size: Maximum size of each row group.
"""
if filename is None:
filename = get_unique_filename(extension=".parquet", data_dir=tmp_path)
if force or not os.path.exists(filename):
df = pandas.DataFrame(
{f"col{x + 1}": np.arange(nrows) for x in range(ncols)}
Expand All @@ -395,19 +395,11 @@ def _make_parquet_file(
)
else:
df.to_parquet(filename, row_group_size=row_group_size)
filenames.append(filename)
return filename

# Return function that generates parquet files
yield _make_parquet_file

# Delete parquet file that was created
for path in filenames:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
else:
os.remove(path)


@pytest.fixture
def make_sql_connection():
Expand Down
111 changes: 50 additions & 61 deletions modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,44 +1387,41 @@ def _test_read_parquet(
"Skipping empty filters error case to avoid race condition - see #6460"
)

with ensure_clean(".parquet") as unique_filename:
unique_filename = path_type(unique_filename)
make_parquet_file(
filename=unique_filename,
row_group_size=row_group_size,
range_index_start=range_index_start,
range_index_step=range_index_step,
range_index_name=range_index_name,
)
unique_filename = make_parquet_file(
row_group_size=row_group_size,
range_index_start=range_index_start,
range_index_step=range_index_step,
range_index_name=range_index_name,
)
unique_filename = path_type(unique_filename)

eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
columns=columns,
filters=filters,
)
eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
columns=columns,
filters=filters,
)

@pytest.mark.parametrize(
"dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
)
def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend):
with ensure_clean(".parquet") as unique_filename:
make_parquet_file(filename=unique_filename, row_group_size=100)
unique_filename = make_parquet_file(row_group_size=100)

def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)
def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)

eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)
eval_io(
fn_name="read_parquet",
# read_parquet kwargs
engine=engine,
path=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)

# Tests issue #6778
def test_read_parquet_no_extension(self, engine, make_parquet_file):
Expand Down Expand Up @@ -1496,23 +1493,20 @@ def test_read_parquet_range_index(
def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):
if engine == "fastparquet" and os.name == "nt":
pytest.xfail(reason="https://github.com/pandas-dev/pandas/issues/51720")
with ensure_clean(".parquet") as f1, ensure_clean(
".parquet"
) as f2, ensure_clean(".parquet") as f3:
for f in [f1, f2, f3]:
make_parquet_file(filename=f)
eval_io(fn_name="read_parquet", path=[f1, f2, f3], engine=engine)

def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_file):
filenames = [None] * 3
for i in range(3):
filenames[i] = make_parquet_file()
eval_io(fn_name="read_parquet", path=filenames, engine=engine)

def test_read_parquet_indexing_by_column(self, engine, make_parquet_file):
# Test indexing into a column of Modin with various parquet file row lengths.
# Specifically, tests for https://github.com/modin-project/modin/issues/3527
# which fails when min_partition_size < nrows < min_partition_size * (num_partitions - 1)

nrows = (
MinPartitionSize.get() + 1
) # Use the minimal guaranteed failing value for nrows.
unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path)
make_parquet_file(filename=unique_filename, nrows=nrows)
unique_filename = make_parquet_file(nrows=nrows)

parquet_df = pd.read_parquet(unique_filename, engine=engine)
for col in parquet_df.columns:
Expand Down Expand Up @@ -1731,17 +1725,14 @@ def test_read_parquet_directory_range_index_consistent_metadata(
)
def test_read_parquet_partitioned_directory(
self,
tmp_path,
make_parquet_file,
columns,
filters,
range_index_start,
range_index_step,
engine,
):
unique_filename = get_unique_filename(extension=None, data_dir=tmp_path)
make_parquet_file(
filename=unique_filename,
unique_filename = make_parquet_file(
partitioned_columns=["col1"],
range_index_start=range_index_start,
range_index_step=range_index_step,
Expand Down Expand Up @@ -2063,11 +2054,10 @@ def test_read_parquet_s3_with_column_partitioning(
# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
# commment once we turn all default to pandas messages into errors.
def test_read_parquet_relative_to_user_home(make_parquet_file):
with ensure_clean(".parquet") as unique_filename:
make_parquet_file(filename=unique_filename)
_check_relative_io(
"read_parquet", unique_filename, "path", storage_default=("Hdk",)
)
unique_filename = make_parquet_file()
_check_relative_io(
"read_parquet", unique_filename, "path", storage_default=("Hdk",)
)


@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
Expand Down Expand Up @@ -2756,20 +2746,19 @@ def test_fwf_file_usecols(self, make_fwf_file, usecols):
"dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
)
def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend):
with ensure_clean(".fwf") as unique_filename:
make_fwf_file(filename=unique_filename)
unique_filename = make_fwf_file()

def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)
def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)

eval_io(
fn_name="read_fwf",
# read_csv kwargs
filepath_or_buffer=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)
eval_io(
fn_name="read_fwf",
# read_csv kwargs
filepath_or_buffer=unique_filename,
dtype_backend=dtype_backend,
comparator=comparator,
)

def test_fwf_file_chunksize(self, make_fwf_file):
unique_filename = make_fwf_file()
Expand Down

0 comments on commit 9322b35

Please sign in to comment.