Initial commit: 首次建仓,建立目录结构
This commit is contained in:
@ -0,0 +1,179 @@
|
||||
"""
|
||||
compat
|
||||
======
|
||||
|
||||
Cross-compatible functions for different versions of Python.
|
||||
|
||||
Other items:
|
||||
* platform checker
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pandas.compat._constants import (
|
||||
CHAINED_WARNING_DISABLED,
|
||||
IS64,
|
||||
ISMUSL,
|
||||
PY312,
|
||||
PY314,
|
||||
PYPY,
|
||||
WASM,
|
||||
)
|
||||
from pandas.compat.numpy import is_numpy_dev
|
||||
from pandas.compat.pyarrow import (
|
||||
HAS_PYARROW,
|
||||
PYARROW_INSTALLED,
|
||||
PYARROW_MIN_VERSION,
|
||||
pa_version_under14p0,
|
||||
pa_version_under14p1,
|
||||
pa_version_under16p0,
|
||||
pa_version_under17p0,
|
||||
pa_version_under18p0,
|
||||
pa_version_under19p0,
|
||||
pa_version_under20p0,
|
||||
pa_version_under21p0,
|
||||
pa_version_under22p0,
|
||||
pa_version_under23p0,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import F
|
||||
|
||||
|
||||
def set_function_name(f: F, name: str, cls: type) -> F:
|
||||
"""
|
||||
Bind the name/qualname attributes of the function.
|
||||
"""
|
||||
f.__name__ = name
|
||||
f.__qualname__ = f"{cls.__name__}.{name}"
|
||||
f.__module__ = cls.__module__
|
||||
return f
|
||||
|
||||
|
||||
def is_platform_little_endian() -> bool:
|
||||
"""
|
||||
Checking if the running platform is little endian.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform is little endian.
|
||||
"""
|
||||
return sys.byteorder == "little"
|
||||
|
||||
|
||||
def is_platform_windows() -> bool:
|
||||
"""
|
||||
Checking if the running platform is windows.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform is windows.
|
||||
"""
|
||||
return sys.platform in ["win32", "cygwin"]
|
||||
|
||||
|
||||
def is_platform_linux() -> bool:
|
||||
"""
|
||||
Checking if the running platform is linux.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform is linux.
|
||||
"""
|
||||
return sys.platform == "linux"
|
||||
|
||||
|
||||
def is_platform_mac() -> bool:
|
||||
"""
|
||||
Checking if the running platform is mac.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform is mac.
|
||||
"""
|
||||
return sys.platform == "darwin"
|
||||
|
||||
|
||||
def is_platform_arm() -> bool:
|
||||
"""
|
||||
Checking if the running platform use ARM architecture.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform uses ARM architecture.
|
||||
"""
|
||||
return platform.machine() in ("arm64", "aarch64") or platform.machine().startswith(
|
||||
"armv"
|
||||
)
|
||||
|
||||
|
||||
def is_platform_power() -> bool:
|
||||
"""
|
||||
Checking if the running platform use Power architecture.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform uses ARM architecture.
|
||||
"""
|
||||
return platform.machine() in ("ppc64", "ppc64le")
|
||||
|
||||
|
||||
def is_platform_riscv64() -> bool:
|
||||
"""
|
||||
Checking if the running platform use riscv64 architecture.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running platform uses riscv64 architecture.
|
||||
"""
|
||||
return platform.machine() == "riscv64"
|
||||
|
||||
|
||||
def is_ci_environment() -> bool:
|
||||
"""
|
||||
Checking if running in a continuous integration environment by checking
|
||||
the PANDAS_CI environment variable.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the running in a continuous integration environment.
|
||||
"""
|
||||
return os.environ.get("PANDAS_CI", "0") == "1"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CHAINED_WARNING_DISABLED",
|
||||
"HAS_PYARROW",
|
||||
"IS64",
|
||||
"ISMUSL",
|
||||
"PY312",
|
||||
"PY314",
|
||||
"PYARROW_INSTALLED",
|
||||
"PYARROW_MIN_VERSION",
|
||||
"PYPY",
|
||||
"WASM",
|
||||
"is_numpy_dev",
|
||||
"pa_version_under14p0",
|
||||
"pa_version_under14p1",
|
||||
"pa_version_under16p0",
|
||||
"pa_version_under17p0",
|
||||
"pa_version_under18p0",
|
||||
"pa_version_under19p0",
|
||||
"pa_version_under20p0",
|
||||
"pa_version_under21p0",
|
||||
"pa_version_under22p0",
|
||||
"pa_version_under23p0",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,35 @@
|
||||
"""
|
||||
_constants
|
||||
======
|
||||
|
||||
Constants relevant for the Python implementation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import sys
|
||||
import sysconfig
|
||||
|
||||
IS64 = sys.maxsize > 2**32
|
||||
|
||||
PY312 = sys.version_info >= (3, 12)
|
||||
PY314 = sys.version_info >= (3, 14)
|
||||
PYPY = platform.python_implementation() == "PyPy"
|
||||
WASM = (sys.platform == "emscripten") or (platform.machine() in ["wasm32", "wasm64"])
|
||||
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
|
||||
# the refcount for self in a chained __setitem__/.(i)loc indexing/method call
|
||||
REF_COUNT = 2 if PY314 else 3
|
||||
REF_COUNT_IDX = 2
|
||||
REF_COUNT_METHOD = 1 if PY314 else 2
|
||||
CHAINED_WARNING_DISABLED = PYPY
|
||||
|
||||
|
||||
__all__ = [
|
||||
"IS64",
|
||||
"ISMUSL",
|
||||
"PY312",
|
||||
"PY314",
|
||||
"PYPY",
|
||||
"WASM",
|
||||
]
|
||||
@ -0,0 +1,191 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Literal,
|
||||
overload,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.util.version import Version
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import types
|
||||
|
||||
# Update install.rst, actions-311-minimum_versions.yaml,
|
||||
# deps_minimum.toml & pyproject.toml when updating versions!
|
||||
|
||||
VERSIONS = {
|
||||
"adbc-driver-postgresql": "1.2.0",
|
||||
"adbc-driver-sqlite": "1.2.0",
|
||||
"bs4": "4.12.3",
|
||||
"bottleneck": "1.4.2",
|
||||
"fastparquet": "2024.11.0",
|
||||
"fsspec": "2024.10.0",
|
||||
"html5lib": "1.1",
|
||||
"hypothesis": "6.116.0",
|
||||
"gcsfs": "2024.10.0",
|
||||
"jinja2": "3.1.5",
|
||||
"lxml.etree": "5.3.0",
|
||||
"matplotlib": "3.9.3",
|
||||
"numba": "0.60.0",
|
||||
"numexpr": "2.10.2",
|
||||
"odfpy": "1.4.1",
|
||||
"openpyxl": "3.1.5",
|
||||
"psycopg2": "2.9.10", # (dt dec pq3 ext lo64)
|
||||
"pymysql": "1.1.1",
|
||||
"pyarrow": "13.0.0",
|
||||
"pyiceberg": "0.8.1",
|
||||
"pyreadstat": "1.2.8",
|
||||
"pytest": "8.3.4",
|
||||
"python-calamine": "0.3.0",
|
||||
"pytz": "2020.1", # keep this pinned (https://github.com/pandas-dev/pandas/pull/65133)
|
||||
"pyxlsb": "1.0.10",
|
||||
"s3fs": "2024.10.0",
|
||||
"scipy": "1.14.1",
|
||||
"sqlalchemy": "2.0.36",
|
||||
"tables": "3.10.1",
|
||||
"tabulate": "0.9.0",
|
||||
"xarray": "2024.10.0",
|
||||
"xlrd": "2.0.1",
|
||||
"xlsxwriter": "3.2.0",
|
||||
"zstandard": "0.23.0",
|
||||
"qtpy": "2.4.2",
|
||||
"pyqt5": "5.15.9",
|
||||
}
|
||||
|
||||
# A mapping from import name to package name (on PyPI) for packages where
|
||||
# these two names are different.
|
||||
|
||||
INSTALL_MAPPING = {
|
||||
"bs4": "beautifulsoup4",
|
||||
"bottleneck": "Bottleneck",
|
||||
"jinja2": "Jinja2",
|
||||
"lxml.etree": "lxml",
|
||||
"odf": "odfpy",
|
||||
"python_calamine": "python-calamine",
|
||||
"sqlalchemy": "SQLAlchemy",
|
||||
"tables": "pytables",
|
||||
}
|
||||
|
||||
|
||||
def get_version(module: types.ModuleType) -> str:
|
||||
version = getattr(module, "__version__", None)
|
||||
|
||||
if version is None:
|
||||
raise ImportError(f"Can't determine version for {module.__name__}")
|
||||
if module.__name__ == "psycopg2":
|
||||
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
|
||||
version = version.split()[0]
|
||||
return version
|
||||
|
||||
|
||||
@overload
|
||||
def import_optional_dependency(
|
||||
name: str,
|
||||
extra: str = ...,
|
||||
min_version: str | None = ...,
|
||||
*,
|
||||
errors: Literal["raise"] = ...,
|
||||
) -> types.ModuleType: ...
|
||||
|
||||
|
||||
@overload
|
||||
def import_optional_dependency(
|
||||
name: str,
|
||||
extra: str = ...,
|
||||
min_version: str | None = ...,
|
||||
*,
|
||||
errors: Literal["warn", "ignore"],
|
||||
) -> types.ModuleType | None: ...
|
||||
|
||||
|
||||
def import_optional_dependency(
|
||||
name: str,
|
||||
extra: str = "",
|
||||
min_version: str | None = None,
|
||||
*,
|
||||
errors: Literal["raise", "warn", "ignore"] = "raise",
|
||||
) -> types.ModuleType | None:
|
||||
"""
|
||||
Import an optional dependency.
|
||||
|
||||
By default, if a dependency is missing an ImportError with a nice
|
||||
message will be raised. If a dependency is present, but too old,
|
||||
we raise.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
The module name.
|
||||
extra : str
|
||||
Additional text to include in the ImportError message.
|
||||
errors : str {'raise', 'warn', 'ignore'}
|
||||
What to do when a dependency is not found or its version is too old.
|
||||
|
||||
* raise : Raise an ImportError
|
||||
* warn : Only applicable when a module's version is to old.
|
||||
Warns that the version is too old and returns None
|
||||
* ignore: If the module is not installed, return None, otherwise,
|
||||
return the module, even if the version is too old.
|
||||
It's expected that users validate the version locally when
|
||||
using ``errors="ignore"`` (see. ``io/html.py``)
|
||||
min_version : str, default None
|
||||
Specify a minimum version that is different from the global pandas
|
||||
minimum version required.
|
||||
Returns
|
||||
-------
|
||||
maybe_module : Optional[ModuleType]
|
||||
The imported module, when found and the version is correct.
|
||||
None is returned when the package is not found and `errors`
|
||||
is False, or when the package's version is too old and `errors`
|
||||
is ``'warn'`` or ``'ignore'``.
|
||||
"""
|
||||
assert errors in {"warn", "raise", "ignore"}
|
||||
|
||||
package_name = INSTALL_MAPPING.get(name)
|
||||
install_name = package_name if package_name is not None else name
|
||||
|
||||
msg = (
|
||||
f"`Import {install_name}` failed. {extra} "
|
||||
f"Use pip or conda to install the {install_name} package."
|
||||
)
|
||||
try:
|
||||
module = importlib.import_module(name)
|
||||
except ImportError as err:
|
||||
if errors == "raise":
|
||||
raise ImportError(msg) from err
|
||||
return None
|
||||
|
||||
# Handle submodules: if we have submodule, grab parent module from sys.modules
|
||||
parent = name.split(".", maxsplit=1)[0]
|
||||
if parent != name:
|
||||
install_name = parent
|
||||
module_to_get = sys.modules[install_name]
|
||||
else:
|
||||
module_to_get = module
|
||||
minimum_version = min_version if min_version is not None else VERSIONS.get(parent)
|
||||
if minimum_version:
|
||||
version = get_version(module_to_get)
|
||||
if version and Version(version) < Version(minimum_version):
|
||||
msg = (
|
||||
f"Pandas requires version '{minimum_version}' or newer of '{parent}' "
|
||||
f"(version '{version}' currently installed)."
|
||||
)
|
||||
if errors == "warn":
|
||||
warnings.warn(
|
||||
msg,
|
||||
UserWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return None
|
||||
elif errors == "raise":
|
||||
raise ImportError(msg)
|
||||
else:
|
||||
return None
|
||||
|
||||
return module
|
||||
@ -0,0 +1,50 @@
|
||||
"""support numpy compatibility across versions"""
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.util.version import Version
|
||||
|
||||
# numpy versioning
|
||||
_np_version = np.__version__
|
||||
_nlv = Version(_np_version)
|
||||
np_version_gt2 = _nlv >= Version("2.0.0")
|
||||
np_version_gt2_3 = _nlv >= Version("2.3.0")
|
||||
np_version_gt2_5 = _nlv >= Version("2.5.0")
|
||||
is_numpy_dev = _nlv.dev is not None
|
||||
_min_numpy_ver = "1.26.0"
|
||||
|
||||
|
||||
if _nlv < Version(_min_numpy_ver):
|
||||
raise ImportError(
|
||||
f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version.\n"
|
||||
f"Your numpy version is {_np_version}."
|
||||
)
|
||||
|
||||
|
||||
np_long: type
|
||||
np_ulong: type
|
||||
|
||||
if np_version_gt2:
|
||||
try:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
r".*In the future `np\.long` will be defined as.*",
|
||||
FutureWarning,
|
||||
)
|
||||
np_long = np.long
|
||||
np_ulong = np.ulong
|
||||
except AttributeError:
|
||||
np_long = np.int_
|
||||
np_ulong = np.uint
|
||||
else:
|
||||
np_long = np.int_
|
||||
np_ulong = np.uint
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_np_version",
|
||||
"is_numpy_dev",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,376 @@
|
||||
"""
|
||||
For compatibility with numpy libraries, pandas functions or methods have to
|
||||
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that
|
||||
are not actually used or respected in the pandas implementation.
|
||||
|
||||
To ensure that users do not abuse these parameters, validation is performed in
|
||||
'validators.py' to make sure that any extra parameters passed correspond ONLY
|
||||
to those in the numpy signature. Part of that validation includes whether or
|
||||
not the user attempted to pass in non-default values for these extraneous
|
||||
parameters. As we want to discourage users from relying on these parameters
|
||||
when calling the pandas implementation, we want them only to pass in the
|
||||
default values for these parameters.
|
||||
|
||||
This module provides a set of commonly used default arguments for functions and
|
||||
methods that are spread throughout the codebase. This module will make it
|
||||
easier to adjust to future upstream changes in the analogous numpy signatures.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
TypeVar,
|
||||
cast,
|
||||
overload,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
from numpy import ndarray
|
||||
|
||||
from pandas._libs.lib import (
|
||||
is_bool,
|
||||
is_integer,
|
||||
)
|
||||
from pandas.errors import UnsupportedFunctionCall
|
||||
from pandas.util._validators import (
|
||||
validate_args,
|
||||
validate_args_and_kwargs,
|
||||
validate_kwargs,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
Axis,
|
||||
AxisInt,
|
||||
)
|
||||
|
||||
AxisNoneT = TypeVar("AxisNoneT", Axis, None)
|
||||
|
||||
|
||||
class CompatValidator:
|
||||
def __init__(
|
||||
self,
|
||||
defaults,
|
||||
fname=None,
|
||||
method: str | None = None,
|
||||
max_fname_arg_count=None,
|
||||
) -> None:
|
||||
self.fname = fname
|
||||
self.method = method
|
||||
self.defaults = defaults
|
||||
self.max_fname_arg_count = max_fname_arg_count
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
args,
|
||||
kwargs,
|
||||
fname=None,
|
||||
max_fname_arg_count=None,
|
||||
method: str | None = None,
|
||||
) -> None:
|
||||
if not args and not kwargs:
|
||||
return None
|
||||
|
||||
fname = self.fname if fname is None else fname
|
||||
max_fname_arg_count = (
|
||||
self.max_fname_arg_count
|
||||
if max_fname_arg_count is None
|
||||
else max_fname_arg_count
|
||||
)
|
||||
method = self.method if method is None else method
|
||||
|
||||
if method == "args":
|
||||
validate_args(fname, args, max_fname_arg_count, self.defaults)
|
||||
elif method == "kwargs":
|
||||
validate_kwargs(fname, kwargs, self.defaults)
|
||||
elif method == "both":
|
||||
validate_args_and_kwargs(
|
||||
fname, args, kwargs, max_fname_arg_count, self.defaults
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"invalid validation method '{method}'")
|
||||
|
||||
|
||||
ARGMINMAX_DEFAULTS = {"out": None}
|
||||
validate_argmin = CompatValidator(
|
||||
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_argmax = CompatValidator(
|
||||
ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
|
||||
def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]:
|
||||
if isinstance(skipna, ndarray) or skipna is None:
|
||||
args = (skipna, *args)
|
||||
skipna = True
|
||||
|
||||
return skipna, args
|
||||
|
||||
|
||||
def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
|
||||
"""
|
||||
If 'Series.argmin' is called via the 'numpy' library, the third parameter
|
||||
in its signature is 'out', which takes either an ndarray or 'None', so
|
||||
check if the 'skipna' parameter is either an instance of ndarray or is
|
||||
None, since 'skipna' itself should be a boolean
|
||||
"""
|
||||
skipna, args = process_skipna(skipna, args)
|
||||
validate_argmin(args, kwargs)
|
||||
return skipna
|
||||
|
||||
|
||||
def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
|
||||
"""
|
||||
If 'Series.argmax' is called via the 'numpy' library, the third parameter
|
||||
in its signature is 'out', which takes either an ndarray or 'None', so
|
||||
check if the 'skipna' parameter is either an instance of ndarray or is
|
||||
None, since 'skipna' itself should be a boolean
|
||||
"""
|
||||
skipna, args = process_skipna(skipna, args)
|
||||
validate_argmax(args, kwargs)
|
||||
return skipna
|
||||
|
||||
|
||||
ARGSORT_DEFAULTS: dict[str, int | str | None] = {}
|
||||
ARGSORT_DEFAULTS["axis"] = -1
|
||||
ARGSORT_DEFAULTS["kind"] = "quicksort"
|
||||
ARGSORT_DEFAULTS["order"] = None
|
||||
ARGSORT_DEFAULTS["kind"] = None
|
||||
ARGSORT_DEFAULTS["stable"] = None
|
||||
|
||||
|
||||
validate_argsort = CompatValidator(
|
||||
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
|
||||
)
|
||||
|
||||
# two different signatures of argsort, this second validation for when the
|
||||
# `kind` param is supported
|
||||
ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {}
|
||||
ARGSORT_DEFAULTS_KIND["axis"] = -1
|
||||
ARGSORT_DEFAULTS_KIND["order"] = None
|
||||
ARGSORT_DEFAULTS_KIND["stable"] = None
|
||||
validate_argsort_kind = CompatValidator(
|
||||
ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both"
|
||||
)
|
||||
|
||||
|
||||
def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool:
|
||||
"""
|
||||
If 'Categorical.argsort' is called via the 'numpy' library, the first
|
||||
parameter in its signature is 'axis', which takes either an integer or
|
||||
'None', so check if the 'ascending' parameter has either integer type or is
|
||||
None, since 'ascending' itself should be a boolean
|
||||
"""
|
||||
if is_integer(ascending) or ascending is None:
|
||||
args = (ascending, *args)
|
||||
ascending = True
|
||||
|
||||
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
|
||||
ascending = cast(bool, ascending)
|
||||
return ascending
|
||||
|
||||
|
||||
CLIP_DEFAULTS: dict[str, Any] = {"out": None}
|
||||
validate_clip = CompatValidator(
|
||||
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
|
||||
)
|
||||
|
||||
|
||||
@overload
|
||||
def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None: ...
|
||||
|
||||
|
||||
@overload
|
||||
def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT: ...
|
||||
|
||||
|
||||
def validate_clip_with_axis(
|
||||
axis: ndarray | AxisNoneT, args, kwargs
|
||||
) -> AxisNoneT | None:
|
||||
"""
|
||||
If 'NDFrame.clip' is called via the numpy library, the third parameter in
|
||||
its signature is 'out', which can takes an ndarray, so check if the 'axis'
|
||||
parameter is an instance of ndarray, since 'axis' itself should either be
|
||||
an integer or None
|
||||
"""
|
||||
if isinstance(axis, ndarray):
|
||||
args = (axis, *args)
|
||||
# error: Incompatible types in assignment (expression has type "None",
|
||||
# variable has type "Union[ndarray[Any, Any], str, int]")
|
||||
axis = None # type: ignore[assignment]
|
||||
|
||||
validate_clip(args, kwargs)
|
||||
# error: Incompatible return value type (got "Union[ndarray[Any, Any],
|
||||
# str, int]", expected "Union[str, int, None]")
|
||||
return axis # type: ignore[return-value]
|
||||
|
||||
|
||||
CUM_FUNC_DEFAULTS: dict[str, Any] = {}
|
||||
CUM_FUNC_DEFAULTS["dtype"] = None
|
||||
CUM_FUNC_DEFAULTS["out"] = None
|
||||
validate_cum_func = CompatValidator(
|
||||
CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_cumsum = CompatValidator(
|
||||
CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
|
||||
def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool:
|
||||
"""
|
||||
If this function is called via the 'numpy' library, the third parameter in
|
||||
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
|
||||
check if the 'skipna' parameter is a boolean or not
|
||||
"""
|
||||
if not is_bool(skipna):
|
||||
args = (skipna, *args)
|
||||
skipna = True
|
||||
elif isinstance(skipna, np.bool_):
|
||||
skipna = bool(skipna)
|
||||
|
||||
validate_cum_func(args, kwargs, fname=name)
|
||||
return skipna
|
||||
|
||||
|
||||
ALLANY_DEFAULTS: dict[str, bool | None] = {}
|
||||
ALLANY_DEFAULTS["dtype"] = None
|
||||
ALLANY_DEFAULTS["out"] = None
|
||||
ALLANY_DEFAULTS["keepdims"] = False
|
||||
ALLANY_DEFAULTS["axis"] = None
|
||||
validate_all = CompatValidator(
|
||||
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_any = CompatValidator(
|
||||
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False}
|
||||
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
|
||||
|
||||
MINMAX_DEFAULTS = {"axis": None, "dtype": None, "out": None, "keepdims": False}
|
||||
validate_min = CompatValidator(
|
||||
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_max = CompatValidator(
|
||||
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
|
||||
REPEAT_DEFAULTS: dict[str, Any] = {"axis": None}
|
||||
validate_repeat = CompatValidator(
|
||||
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
ROUND_DEFAULTS: dict[str, Any] = {"out": None}
|
||||
validate_round = CompatValidator(
|
||||
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
STAT_FUNC_DEFAULTS: dict[str, Any | None] = {}
|
||||
STAT_FUNC_DEFAULTS["dtype"] = None
|
||||
STAT_FUNC_DEFAULTS["out"] = None
|
||||
|
||||
SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
|
||||
SUM_DEFAULTS["axis"] = None
|
||||
SUM_DEFAULTS["keepdims"] = False
|
||||
SUM_DEFAULTS["initial"] = None
|
||||
|
||||
PROD_DEFAULTS = SUM_DEFAULTS.copy()
|
||||
|
||||
MEAN_DEFAULTS = SUM_DEFAULTS.copy()
|
||||
|
||||
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
|
||||
MEDIAN_DEFAULTS["overwrite_input"] = False
|
||||
MEDIAN_DEFAULTS["keepdims"] = False
|
||||
|
||||
STAT_FUNC_DEFAULTS["keepdims"] = False
|
||||
|
||||
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs")
|
||||
validate_sum = CompatValidator(
|
||||
SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_prod = CompatValidator(
|
||||
PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_mean = CompatValidator(
|
||||
MEAN_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_median = CompatValidator(
|
||||
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {}
|
||||
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
|
||||
STAT_DDOF_FUNC_DEFAULTS["out"] = None
|
||||
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
|
||||
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
|
||||
|
||||
TAKE_DEFAULTS: dict[str, str | None] = {}
|
||||
TAKE_DEFAULTS["out"] = None
|
||||
TAKE_DEFAULTS["mode"] = "raise"
|
||||
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
|
||||
|
||||
|
||||
TRANSPOSE_DEFAULTS = {"axes": None}
|
||||
validate_transpose = CompatValidator(
|
||||
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
|
||||
)
|
||||
|
||||
|
||||
def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None:
|
||||
"""
|
||||
'args' and 'kwargs' should be empty, except for allowed kwargs because all
|
||||
of their necessary parameters are explicitly listed in the function
|
||||
signature
|
||||
"""
|
||||
if allowed is None:
|
||||
allowed = []
|
||||
|
||||
kwargs = set(kwargs) - set(allowed)
|
||||
|
||||
if len(args) + len(kwargs) > 0:
|
||||
raise UnsupportedFunctionCall(
|
||||
"numpy operations are not valid with groupby. "
|
||||
f"Use .groupby(...).{name}() instead"
|
||||
)
|
||||
|
||||
|
||||
def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None:
|
||||
"""
|
||||
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
|
||||
or None, as otherwise it will be incorrectly ignored.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
axis : int or None
|
||||
ndim : int, default 1
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
"""
|
||||
if axis is None:
|
||||
return
|
||||
if axis >= ndim or (axis < 0 and ndim + axis < 0):
|
||||
raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})")
|
||||
|
||||
|
||||
_validation_funcs = {
|
||||
"median": validate_median,
|
||||
"mean": validate_mean,
|
||||
"min": validate_min,
|
||||
"max": validate_max,
|
||||
"sum": validate_sum,
|
||||
"prod": validate_prod,
|
||||
}
|
||||
|
||||
|
||||
def validate_func(fname, args, kwargs) -> None:
|
||||
if fname not in _validation_funcs:
|
||||
return validate_stat_func(args, kwargs, fname=fname)
|
||||
|
||||
validation_func = _validation_funcs[fname]
|
||||
return validation_func(args, kwargs)
|
||||
@ -0,0 +1,143 @@
|
||||
"""
|
||||
Pickle compatibility to pandas version 1.0
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import pickle
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.arrays import NDArrayBacked
|
||||
from pandas._libs.tslibs import BaseOffset
|
||||
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
PeriodArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
from pandas.core.internals import BlockManager
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
|
||||
|
||||
# If classes are moved, provide compat here.
|
||||
_class_locations_map = {
|
||||
# Re-routing unpickle block logic to go through _unpickle_block instead
|
||||
# for pandas <= 1.3.5
|
||||
("pandas.core.internals.blocks", "new_block"): (
|
||||
"pandas._libs.internals",
|
||||
"_unpickle_block",
|
||||
),
|
||||
# Avoid Cython's warning "contradiction to Python 'class private name' rules"
|
||||
("pandas._libs.tslibs.nattype", "__nat_unpickle"): (
|
||||
"pandas._libs.tslibs.nattype",
|
||||
"_nat_unpickle",
|
||||
),
|
||||
# 50775, remove Int64Index, UInt64Index & Float64Index from codebase
|
||||
("pandas.core.indexes.numeric", "Int64Index"): (
|
||||
"pandas.core.indexes.base",
|
||||
"Index",
|
||||
),
|
||||
("pandas.core.indexes.numeric", "UInt64Index"): (
|
||||
"pandas.core.indexes.base",
|
||||
"Index",
|
||||
),
|
||||
("pandas.core.indexes.numeric", "Float64Index"): (
|
||||
"pandas.core.indexes.base",
|
||||
"Index",
|
||||
),
|
||||
("pandas.core.arrays.sparse.dtype", "SparseDtype"): (
|
||||
"pandas.core.dtypes.dtypes",
|
||||
"SparseDtype",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# our Unpickler sub-class to override methods and some dispatcher
|
||||
# functions for compat and uses a non-public class of the pickle module.
|
||||
class Unpickler(pickle._Unpickler):
|
||||
def find_class(self, module: str, name: str) -> Any:
|
||||
key = (module, name)
|
||||
module, name = _class_locations_map.get(key, key)
|
||||
return super().find_class(module, name)
|
||||
|
||||
dispatch = pickle._Unpickler.dispatch.copy()
|
||||
|
||||
def load_reduce(self) -> None:
|
||||
stack = self.stack # type: ignore[attr-defined]
|
||||
args = stack.pop()
|
||||
func = stack[-1]
|
||||
|
||||
try:
|
||||
stack[-1] = func(*args)
|
||||
except TypeError:
|
||||
# If we have a deprecated function,
|
||||
# try to replace and try again.
|
||||
if args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
|
||||
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
|
||||
cls = args[0]
|
||||
stack[-1] = cls.__new__(*args)
|
||||
return
|
||||
elif args and issubclass(args[0], PeriodArray):
|
||||
cls = args[0]
|
||||
stack[-1] = NDArrayBacked.__new__(*args)
|
||||
return
|
||||
raise
|
||||
|
||||
dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment]
|
||||
|
||||
def load_newobj(self) -> None:
|
||||
args = self.stack.pop() # type: ignore[attr-defined]
|
||||
cls = self.stack.pop() # type: ignore[attr-defined]
|
||||
|
||||
# compat
|
||||
if issubclass(cls, DatetimeArray) and not args:
|
||||
arr = np.array([], dtype="M8[ns]")
|
||||
obj = cls.__new__(cls, arr, arr.dtype)
|
||||
elif issubclass(cls, TimedeltaArray) and not args:
|
||||
arr = np.array([], dtype="m8[ns]")
|
||||
obj = cls.__new__(cls, arr, arr.dtype)
|
||||
elif cls is BlockManager and not args:
|
||||
obj = cls.__new__(cls, (), [], False)
|
||||
else:
|
||||
obj = cls.__new__(cls, *args)
|
||||
self.append(obj) # type: ignore[attr-defined]
|
||||
|
||||
dispatch[pickle.NEWOBJ[0]] = load_newobj # type: ignore[assignment]
|
||||
|
||||
|
||||
def loads(
|
||||
bytes_object: bytes,
|
||||
*,
|
||||
fix_imports: bool = True,
|
||||
encoding: str = "ASCII",
|
||||
errors: str = "strict",
|
||||
) -> Any:
|
||||
"""
|
||||
Analogous to pickle._loads.
|
||||
"""
|
||||
fd = io.BytesIO(bytes_object)
|
||||
return Unpickler(
|
||||
fd, fix_imports=fix_imports, encoding=encoding, errors=errors
|
||||
).load()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def patch_pickle() -> Generator[None]:
|
||||
"""
|
||||
Temporarily patch pickle to use our unpickler.
|
||||
"""
|
||||
orig_loads = pickle.loads
|
||||
try:
|
||||
setattr(pickle, "loads", loads)
|
||||
yield
|
||||
finally:
|
||||
setattr(pickle, "loads", orig_loads)
|
||||
@ -0,0 +1,95 @@
|
||||
"""support pyarrow compatibility across versions"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from pandas.util.version import Version
|
||||
|
||||
PYARROW_MIN_VERSION = "13.0.0"
|
||||
try:
|
||||
import pyarrow as pa
|
||||
|
||||
_palv = Version(Version(pa.__version__).base_version)
|
||||
pa_version_under14p0 = _palv < Version("14.0.0")
|
||||
pa_version_under14p1 = _palv < Version("14.0.1")
|
||||
pa_version_under15p0 = _palv < Version("15.0.0")
|
||||
pa_version_under16p0 = _palv < Version("16.0.0")
|
||||
pa_version_under17p0 = _palv < Version("17.0.0")
|
||||
pa_version_under18p0 = _palv < Version("18.0.0")
|
||||
pa_version_under19p0 = _palv < Version("19.0.0")
|
||||
pa_version_under20p0 = _palv < Version("20.0.0")
|
||||
pa_version_under21p0 = _palv < Version("21.0.0")
|
||||
pa_version_under22p0 = _palv < Version("22.0.0")
|
||||
pa_version_under23p0 = _palv < Version("23.0.0")
|
||||
PYARROW_INSTALLED = True
|
||||
HAS_PYARROW = _palv >= Version(PYARROW_MIN_VERSION)
|
||||
except ImportError:
|
||||
pa_version_under14p0 = True
|
||||
pa_version_under14p1 = True
|
||||
pa_version_under15p0 = True
|
||||
pa_version_under16p0 = True
|
||||
pa_version_under17p0 = True
|
||||
pa_version_under18p0 = True
|
||||
pa_version_under19p0 = True
|
||||
pa_version_under20p0 = True
|
||||
pa_version_under21p0 = True
|
||||
pa_version_under22p0 = True
|
||||
pa_version_under23p0 = True
|
||||
PYARROW_INSTALLED = False
|
||||
HAS_PYARROW = False
|
||||
|
||||
|
||||
def _safe_fill_null(
|
||||
arr: pa.Array | pa.ChunkedArray, fill_value: Any
|
||||
) -> pa.Array | pa.ChunkedArray:
|
||||
"""
|
||||
Safe wrapper for pyarrow.compute.fill_null with fallback for Windows + pyarrow 21.
|
||||
|
||||
pyarrow 21.0.0 on Windows has a bug in fill_null that incorrectly fills null values.
|
||||
This function uses a fallback implementation for that specific case, otherwise uses
|
||||
the standard pyarrow.compute.fill_null.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : pyarrow.Array | pyarrow.ChunkedArray
|
||||
Input array with potential null values.
|
||||
fill_value : Any
|
||||
Value to fill nulls with.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pyarrow.Array | pyarrow.ChunkedArray
|
||||
Array with nulls filled with fill_value.
|
||||
"""
|
||||
import pyarrow.compute as pc
|
||||
|
||||
is_windows = sys.platform in ["win32", "cygwin"]
|
||||
use_fallback = (
|
||||
HAS_PYARROW and is_windows and not pa_version_under21p0 and pa_version_under22p0
|
||||
)
|
||||
if not use_fallback or isinstance(fill_value, (pa.Array, pa.ChunkedArray)):
|
||||
return pc.fill_null(arr, fill_value)
|
||||
|
||||
fill_scalar = pa.scalar(fill_value, type=arr.type)
|
||||
|
||||
if pa.types.is_duration(arr.type):
|
||||
|
||||
def fill_null_duration(arr: pa.Array, fill_scalar: pa.Scalar) -> pa.Array:
|
||||
mask = pc.is_null(arr)
|
||||
zero_duration = pa.scalar(0, type=arr.type)
|
||||
arr_zeroed = pc.if_else(mask, zero_duration, arr)
|
||||
return pc.if_else(mask, fill_scalar, arr_zeroed)
|
||||
|
||||
if isinstance(arr, pa.ChunkedArray):
|
||||
return pa.chunked_array(
|
||||
[fill_null_duration(chunk, fill_scalar) for chunk in arr.chunks]
|
||||
)
|
||||
return fill_null_duration(arr, fill_scalar)
|
||||
|
||||
if isinstance(arr, pa.ChunkedArray):
|
||||
return pa.chunked_array(
|
||||
[pc.if_else(pc.is_null(chunk), fill_scalar, chunk) for chunk in arr.chunks]
|
||||
)
|
||||
return pc.if_else(pc.is_null(arr), fill_scalar, arr)
|
||||
Reference in New Issue
Block a user