Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTOR-#7242: Add type hints for modin/core/dataframe/algebra/ #7243

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
74 changes: 49 additions & 25 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@

"""Module houses builder class for Binary operator."""

from __future__ import annotations

import warnings
from typing import Optional
from typing import TYPE_CHECKING, Any, Callable, Optional, Union

import numpy as np
import pandas
Expand All @@ -24,13 +26,20 @@

from .operator import Operator

if TYPE_CHECKING:
from pandas._typing import DtypeObj

Check warning on line 30 in modin/core/dataframe/algebra/binary.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/binary.py#L30

Added line #L30 was not covered by tests

from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler

Check warning on line 32 in modin/core/dataframe/algebra/binary.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/binary.py#L32

Added line #L32 was not covered by tests


def maybe_compute_dtypes_common_cast(
first,
second,
trigger_computations=False,
axis=0,
func=None,
first: PandasQueryCompiler,
second: Union[PandasQueryCompiler, dict, list, tuple, np.ndarray, str, DtypeObj],
trigger_computations: bool = False,
axis: int = 0,
func: Optional[
Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]
] = None,
) -> Optional[pandas.Series]:
"""
Precompute data types for binary operations by finding common type between operands.
Expand All @@ -39,7 +48,7 @@
----------
first : PandasQueryCompiler
First operand for which the binary operation would be performed later.
second : PandasQueryCompiler, list-like or scalar
second : PandasQueryCompiler, dict, list, tuple, np.ndarray, str or DtypeObj
YarShev marked this conversation as resolved.
Show resolved Hide resolved
Second operand for which the binary operation would be performed later.
trigger_computations : bool, default: False
Whether to trigger computation of the lazy metadata for `first` and `second`.
Expand Down Expand Up @@ -155,7 +164,7 @@
],
index=common_columns,
)
dtypes = pandas.concat(
dtypes: pandas.Series = pandas.concat(
[
dtypes,
pandas.Series(
Expand All @@ -168,7 +177,10 @@


def maybe_build_dtypes_series(
first, second, dtype, trigger_computations=False
first: PandasQueryCompiler,
second: Union[PandasQueryCompiler, Any],
dtype: DtypeObj,
trigger_computations: bool = False,
) -> Optional[pandas.Series]:
"""
Build a ``pandas.Series`` describing dtypes of the result of a binary operation.
Expand All @@ -179,7 +191,7 @@
First operand for which the binary operation would be performed later.
second : PandasQueryCompiler, list-like or scalar
Second operand for which the binary operation would be performed later.
dtype : np.dtype
dtype : DtypeObj
Dtype of the result.
trigger_computations : bool, default: False
Whether to trigger computation of the lazy metadata for `first` and `second`.
Expand Down Expand Up @@ -217,8 +229,15 @@


def try_compute_new_dtypes(
first, second, infer_dtypes=None, result_dtype=None, axis=0, func=None
):
first: PandasQueryCompiler,
second: Union[PandasQueryCompiler, Any],
infer_dtypes: Optional[str] = None,
result_dtype: Optional[Union[DtypeObj, str]] = None,
axis: int = 0,
func: Optional[
Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]
] = None,
) -> Optional[pandas.Series]:
"""
Precompute resulting dtypes of the binary operation if possible.

Expand Down Expand Up @@ -285,11 +304,11 @@
@classmethod
def register(
cls,
func,
join_type="outer",
labels="replace",
infer_dtypes=None,
):
func: Callable[..., pandas.DataFrame],
join_type: str = "outer",
labels: str = "replace",
infer_dtypes: Optional[str] = None,
) -> Callable[..., PandasQueryCompiler]:
"""
Build template binary operator.

Expand Down Expand Up @@ -318,34 +337,39 @@
"""

def caller(
query_compiler, other, broadcast=False, *args, dtypes=None, **kwargs
):
query_compiler: PandasQueryCompiler,
other: Union[PandasQueryCompiler, Any],
broadcast: bool = False,
*args: tuple,
dtypes: Optional[Union[DtypeObj, str]] = None,
**kwargs: dict,
) -> PandasQueryCompiler:
"""
Apply binary `func` to passed operands.

Parameters
----------
query_compiler : QueryCompiler
query_compiler : PandasQueryCompiler
Left operand of `func`.
other : QueryCompiler, list-like object or scalar
other : PandasQueryCompiler, list-like object or scalar
Right operand of `func`.
broadcast : bool, default: False
If `other` is a one-column query compiler, indicates whether it is a Series or not.
Frames and Series have to be processed differently, however we can't distinguish them
at the query compiler level, so this parameter is a hint that passed from a high level API.
*args : args,
*args : tuple,
Arguments that will be passed to `func`.
dtypes : "copy", scalar dtype or None, default: None
Dtypes of the result. "copy" to keep old dtypes and None to compute them on demand.
**kwargs : kwargs,
**kwargs : dict,
Arguments that will be passed to `func`.

Returns
-------
QueryCompiler
PandasQueryCompiler
Result of binary function.
"""
axis = kwargs.get("axis", 0)
axis: int = kwargs.get("axis", 0)
if isinstance(other, type(query_compiler)) and broadcast:
assert (
len(other.columns) == 1
Expand Down
32 changes: 24 additions & 8 deletions modin/core/dataframe/algebra/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,31 @@

"""Module houses builder class for Fold operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional

from .operator import Operator

if TYPE_CHECKING:
import pandas

Check warning on line 23 in modin/core/dataframe/algebra/fold.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/fold.py#L23

Added line #L23 was not covered by tests

from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler

Check warning on line 25 in modin/core/dataframe/algebra/fold.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/algebra/fold.py#L25

Added line #L25 was not covered by tests


class Fold(Operator):
"""Builder class for Fold functions."""

@classmethod
def register(cls, fold_function):
def register(
cls, fold_function: Callable[..., pandas.DataFrame]
) -> Callable[..., PandasQueryCompiler]:
"""
Build Fold operator that will be performed across rows/columns.

Parameters
----------
fold_function : callable(pandas.DataFrame) -> pandas.DataFrame
fold_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
Function to apply across rows/columns.

Returns
Expand All @@ -35,25 +46,30 @@
Function that takes query compiler and executes Fold function.
"""

def caller(query_compiler, fold_axis=None, *args, **kwargs):
def caller(
query_compiler: PandasQueryCompiler,
fold_axis: Optional[int] = None,
*args: tuple,
**kwargs: dict,
) -> PandasQueryCompiler:
"""
Execute Fold function against passed query compiler.

Parameters
----------
query_compiler : BaseQueryCompiler
query_compiler : PandasQueryCompiler
The query compiler to execute the function on.
fold_axis : int, optional
0 or None means apply across full column partitions. 1 means
apply across full row partitions.
*args : iterable
Additional arguments passed to fold_function.
*args : tuple
Additional arguments passed to `fold_function`.
**kwargs: dict
Additional keyword arguments passed to fold_function.
Additional keyword arguments passed to `fold_function`.

Returns
-------
BaseQueryCompiler
PandasQueryCompiler
A new query compiler representing the result of executing the
function.
"""
Expand Down