TEST-modin-project#7151: Remove usage of pandas._testing private module

Signed-off-by: Anatoly Myachev <[email protected]>
anmyachev · Apr 5, 2024 · 9322b35 · 9322b35
1 parent a966395
commit 9322b35
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 74 deletions.
diff --git a/modin/conftest.py b/modin/conftest.py
@@ -16,7 +16,6 @@
 
 import os
 import platform
-import shutil
 import subprocess
 import sys
 import time
@@ -340,16 +339,15 @@ def fixture(tmp_path):
 
 
 @pytest.fixture
-def make_parquet_file():
+def make_parquet_file(tmp_path):
  """Pytest fixture factory that makes a parquet file/dir for testing.
 
  Yields:
  Function that generates a parquet file/dir
  """
- filenames = []
 
  def _make_parquet_file(
- filename,
+ filename=None,
  nrows=NROWS,
  ncols=2,
  force=True,
@@ -369,6 +367,8 @@ def _make_parquet_file(
  partitioned_columns: Create a partitioned directory using pandas.
  row_group_size: Maximum size of each row group.
  """
+ if filename is None:
+ filename = get_unique_filename(extension=".parquet", data_dir=tmp_path)
  if force or not os.path.exists(filename):
  df = pandas.DataFrame(
  {f"col{x + 1}": np.arange(nrows) for x in range(ncols)}
@@ -395,19 +395,11 @@ def _make_parquet_file(
  )
  else:
  df.to_parquet(filename, row_group_size=row_group_size)
-  filenames.append(filename)
+ return filename
 
  # Return function that generates parquet files
  yield _make_parquet_file
 
- # Delete parquet file that was created
- for path in filenames:
- if os.path.exists(path):
- if os.path.isdir(path):
- shutil.rmtree(path)
- else:
- os.remove(path)
-
 
 @pytest.fixture
 def make_sql_connection():

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
@@ -1387,44 +1387,41 @@ def _test_read_parquet(
  "Skipping empty filters error case to avoid race condition - see #6460"
  )
 
- with ensure_clean(".parquet") as unique_filename:
- unique_filename = path_type(unique_filename)
- make_parquet_file(
- filename=unique_filename,
- row_group_size=row_group_size,
- range_index_start=range_index_start,
- range_index_step=range_index_step,
- range_index_name=range_index_name,
- )
+ unique_filename = make_parquet_file(
+ row_group_size=row_group_size,
+ range_index_start=range_index_start,
+ range_index_step=range_index_step,
+ range_index_name=range_index_name,
+ )
+ unique_filename = path_type(unique_filename)
 
-  eval_io(
-  fn_name="read_parquet",
-  # read_parquet kwargs
-  engine=engine,
-  path=unique_filename,
-  columns=columns,
-  filters=filters,
-  )
+ eval_io(
+ fn_name="read_parquet",
+ # read_parquet kwargs
+ engine=engine,
+ path=unique_filename,
+ columns=columns,
+ filters=filters,
+ )
 
  @pytest.mark.parametrize(
  "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
  )
  def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend):
- with ensure_clean(".parquet") as unique_filename:
- make_parquet_file(filename=unique_filename, row_group_size=100)
+ unique_filename = make_parquet_file(row_group_size=100)
 
-  def comparator(df1, df2):
-  df_equals(df1, df2)
-  df_equals(df1.dtypes, df2.dtypes)
+ def comparator(df1, df2):
+ df_equals(df1, df2)
+ df_equals(df1.dtypes, df2.dtypes)
 
-  eval_io(
-  fn_name="read_parquet",
-  # read_parquet kwargs
-  engine=engine,
-  path=unique_filename,
-  dtype_backend=dtype_backend,
-  comparator=comparator,
-  )
+ eval_io(
+ fn_name="read_parquet",
+ # read_parquet kwargs
+ engine=engine,
+ path=unique_filename,
+ dtype_backend=dtype_backend,
+ comparator=comparator,
+ )
 
  # Tests issue #6778
  def test_read_parquet_no_extension(self, engine, make_parquet_file):
@@ -1496,23 +1493,20 @@ def test_read_parquet_range_index(
  def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):
  if engine == "fastparquet" and os.name == "nt":
  pytest.xfail(reason="https://github.com/pandas-dev/pandas/issues/51720")
- with ensure_clean(".parquet") as f1, ensure_clean(
- ".parquet"
- ) as f2, ensure_clean(".parquet") as f3:
- for f in [f1, f2, f3]:
- make_parquet_file(filename=f)
- eval_io(fn_name="read_parquet", path=[f1, f2, f3], engine=engine)
-
- def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_file):
+ filenames = [None] * 3
+ for i in range(3):
+ filenames[i] = make_parquet_file()
+ eval_io(fn_name="read_parquet", path=filenames, engine=engine)
+
+ def test_read_parquet_indexing_by_column(self, engine, make_parquet_file):
  # Test indexing into a column of Modin with various parquet file row lengths.
  # Specifically, tests for https://github.com/modin-project/modin/issues/3527
  # which fails when min_partition_size < nrows < min_partition_size * (num_partitions - 1)
 
  nrows = (
  MinPartitionSize.get() + 1
  ) # Use the minimal guaranteed failing value for nrows.
- unique_filename = get_unique_filename(extension="parquet", data_dir=tmp_path)
- make_parquet_file(filename=unique_filename, nrows=nrows)
+ unique_filename = make_parquet_file(nrows=nrows)
 
  parquet_df = pd.read_parquet(unique_filename, engine=engine)
  for col in parquet_df.columns:
@@ -1731,17 +1725,14 @@ def test_read_parquet_directory_range_index_consistent_metadata(
  )
  def test_read_parquet_partitioned_directory(
  self,
- tmp_path,
  make_parquet_file,
  columns,
  filters,
  range_index_start,
  range_index_step,
  engine,
  ):
- unique_filename = get_unique_filename(extension=None, data_dir=tmp_path)
- make_parquet_file(
- filename=unique_filename,
+ unique_filename = make_parquet_file(
  partitioned_columns=["col1"],
  range_index_start=range_index_start,
  range_index_step=range_index_step,
@@ -2063,11 +2054,10 @@ def test_read_parquet_s3_with_column_partitioning(
 # TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
 # commment once we turn all default to pandas messages into errors.
 def test_read_parquet_relative_to_user_home(make_parquet_file):
- with ensure_clean(".parquet") as unique_filename:
- make_parquet_file(filename=unique_filename)
- _check_relative_io(
- "read_parquet", unique_filename, "path", storage_default=("Hdk",)
- )
+ unique_filename = make_parquet_file()
+ _check_relative_io(
+ "read_parquet", unique_filename, "path", storage_default=("Hdk",)
+ )
 
 
 @pytest.mark.filterwarnings(default_to_pandas_ignore_string)
@@ -2756,20 +2746,19 @@ def test_fwf_file_usecols(self, make_fwf_file, usecols):
  "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
  )
  def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend):
- with ensure_clean(".fwf") as unique_filename:
- make_fwf_file(filename=unique_filename)
+ unique_filename = make_fwf_file()
 
-  def comparator(df1, df2):
-  df_equals(df1, df2)
-  df_equals(df1.dtypes, df2.dtypes)
+ def comparator(df1, df2):
+ df_equals(df1, df2)
+ df_equals(df1.dtypes, df2.dtypes)
 
-  eval_io(
-  fn_name="read_fwf",
-  # read_csv kwargs
-  filepath_or_buffer=unique_filename,
-  dtype_backend=dtype_backend,
-  comparator=comparator,
-  )
+ eval_io(
+ fn_name="read_fwf",
+ # read_csv kwargs
+ filepath_or_buffer=unique_filename,
+ dtype_backend=dtype_backend,
+ comparator=comparator,
+ )
 
  def test_fwf_file_chunksize(self, make_fwf_file):
  unique_filename = make_fwf_file()