Initial commit: 首次建仓，建立目录结构

2026-06-11 23:49:54 +08:00
commit 4038a476b5
9396 changed files with 2372905 additions and 0 deletions
--- a/dashboard/venv/lib/python3.12/site-packages/pandas/io/_util.py
+++ b/dashboard/venv/lib/python3.12/site-packages/pandas/io/_util.py
@ -0,0 +1,310 @@
+from __future__ import annotations
+
+import datetime as dt
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+    cast,
+)
+import zoneinfo
+
+import numpy as np
+
+from pandas._config import using_string_dtype
+
+from pandas._libs import lib
+from pandas._libs.tslibs import timezones
+from pandas.compat import (
+    pa_version_under18p0,
+    pa_version_under19p0,
+)
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.core.dtypes.common import pandas_dtype
+
+import pandas as pd
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Callable,
+        Hashable,
+        Sequence,
+    )
+
+    import pyarrow
+
+    from pandas._typing import (
+        DtypeArg,
+        DtypeBackend,
+    )
+
+
+pytz = import_optional_dependency("pytz", errors="ignore")
+
+
+def _arrow_dtype_mapping() -> dict:
+    pa = import_optional_dependency("pyarrow")
+    return {
+        pa.int8(): pd.Int8Dtype(),
+        pa.int16(): pd.Int16Dtype(),
+        pa.int32(): pd.Int32Dtype(),
+        pa.int64(): pd.Int64Dtype(),
+        pa.uint8(): pd.UInt8Dtype(),
+        pa.uint16(): pd.UInt16Dtype(),
+        pa.uint32(): pd.UInt32Dtype(),
+        pa.uint64(): pd.UInt64Dtype(),
+        pa.bool_(): pd.BooleanDtype(),
+        pa.string(): pd.StringDtype(),
+        pa.float32(): pd.Float32Dtype(),
+        pa.float64(): pd.Float64Dtype(),
+        pa.string(): pd.StringDtype(),
+        pa.large_string(): pd.StringDtype(),
+    }
+
+
+def _arrow_string_types_mapper() -> Callable:
+    pa = import_optional_dependency("pyarrow")
+
+    mapping = {
+        pa.string(): pd.StringDtype(na_value=np.nan),
+        pa.large_string(): pd.StringDtype(na_value=np.nan),
+    }
+    if not pa_version_under18p0:
+        mapping[pa.string_view()] = pd.StringDtype(na_value=np.nan)
+
+    return mapping.get
+
+
+def arrow_table_to_pandas(
+    table: pyarrow.Table,
+    dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
+    null_to_int64: bool = False,
+    to_pandas_kwargs: dict | None = None,
+    dtype: DtypeArg | None = None,
+    names: Sequence[Hashable] | None = None,
+) -> pd.DataFrame:
+    pa = import_optional_dependency("pyarrow")
+
+    to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
+
+    types_mapper: type[pd.ArrowDtype] | None | Callable
+    if dtype_backend == "numpy_nullable":
+        mapping = _arrow_dtype_mapping()
+        if null_to_int64:
+            # Modify the default mapping to also map null to Int64
+            # (to match other engines - only for CSV parser)
+            mapping[pa.null()] = pd.Int64Dtype()
+        types_mapper = mapping.get
+    elif dtype_backend == "pyarrow":
+        types_mapper = pd.ArrowDtype
+    elif using_string_dtype():
+        if pa_version_under19p0:
+            types_mapper = _arrow_string_types_mapper()
+        elif dtype is not None:
+            # GH#56136 Avoid lossy conversion to float64
+            # We'll convert to numpy below if
+            types_mapper = {
+                pa.int8(): pd.Int8Dtype(),
+                pa.int16(): pd.Int16Dtype(),
+                pa.int32(): pd.Int32Dtype(),
+                pa.int64(): pd.Int64Dtype(),
+            }.get
+        else:
+            types_mapper = None
+    elif dtype_backend is lib.no_default or dtype_backend == "numpy":
+        if dtype is not None:
+            # GH#56136 Avoid lossy conversion to float64
+            # We'll convert to numpy below if
+            types_mapper = {
+                pa.int8(): pd.Int8Dtype(),
+                pa.int16(): pd.Int16Dtype(),
+                pa.int32(): pd.Int32Dtype(),
+                pa.int64(): pd.Int64Dtype(),
+            }.get
+        else:
+            types_mapper = None
+    else:
+        raise NotImplementedError
+
+    df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
+    df = _post_convert_dtypes(df, dtype_backend, dtype, names)
+    df = _normalize_timezone_dtypes(df)
+    return df
+
+
+def _post_convert_dtypes(
+    df: pd.DataFrame,
+    dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault,
+    dtype: DtypeArg | None,
+    names: Sequence[Hashable] | None,
+) -> pd.DataFrame:
+    if dtype is not None and (
+        dtype_backend is lib.no_default or dtype_backend == "numpy"
+    ):
+        # GH#56136 apply any user-provided dtype, and convert any IntegerDtype
+        #  columns the user didn't explicitly ask for.
+        if isinstance(dtype, dict):
+            if names is not None:
+                df.columns = names
+
+            cmp_dtypes = {
+                pd.Int8Dtype(),
+                pd.Int16Dtype(),
+                pd.Int32Dtype(),
+                pd.Int64Dtype(),
+            }
+            for col in df.columns:
+                if col not in dtype and df[col].dtype in cmp_dtypes:
+                    # Any key that the user didn't explicitly specify
+                    #  that got converted to IntegerDtype now gets converted
+                    #  to numpy dtype.
+                    dtype[col] = df[col].dtype.numpy_dtype
+
+            # Ignore non-existent columns from dtype mapping
+            # like other parsers do
+            dtype = {
+                key: pandas_dtype(dtype[key]) for key in dtype if key in df.columns
+            }
+
+        else:
+            dtype = pandas_dtype(dtype)
+
+        try:
+            df = df.astype(dtype)
+        except TypeError as err:
+            # GH#44901 reraise to keep api consistent
+            raise ValueError(str(err)) from err
+
+    if (
+        not using_string_dtype()
+        and dtype != "str"
+        and (dtype_backend is lib.no_default or dtype_backend == "numpy")
+    ):
+        # Convert any StringDtype columns back to object dtype (pyarrow always
+        # uses string dtype even when the infer_string option is False)
+        for i in range(len(df.columns)):
+            new_col = _maybe_convert_string_to_object(df.iloc[:, i])
+            if new_col is not None:
+                df.isetitem(i, new_col)
+
+        new_idx = _maybe_convert_string_index_to_object(df.index)
+        if new_idx is not None:
+            df.index = new_idx
+        new_cols = _maybe_convert_string_index_to_object(df.columns)
+        if new_cols is not None:
+            df.columns = new_cols
+
+    return df
+
+
+def _maybe_convert_string_to_object(
+    data: pd.Series | pd.Index,
+) -> pd.Series | pd.Index | None:
+    if isinstance(data.dtype, pd.StringDtype) and data.dtype.na_value is np.nan:
+        return data.astype("object").fillna(None)
+    elif isinstance(data.dtype, pd.CategoricalDtype):
+        cat_dtype = data.dtype.categories.dtype
+        if isinstance(cat_dtype, pd.StringDtype) and cat_dtype.na_value is np.nan:
+            cat_dtype = pd.CategoricalDtype(
+                categories=data.dtype.categories.astype("object"),
+                ordered=data.dtype.ordered,
+            )
+            return data.astype(cat_dtype)
+
+    # no conversion needed
+    return None
+
+
+def _maybe_convert_string_index_to_object(index: pd.Index) -> pd.Index | None:
+    if isinstance(index, pd.MultiIndex):
+        if any(
+            isinstance(level.dtype, pd.StringDtype) and level.dtype.na_value is np.nan
+            for level in index.levels
+        ):
+            new_levels = []
+            for level in index.levels:
+                new_level = _maybe_convert_string_to_object(level)
+                if new_level is not None:
+                    new_levels.append(new_level)
+                else:
+                    new_levels.append(level)
+            return index.set_levels(new_levels)
+        return None
+
+    else:
+        return cast("pd.Index | None", _maybe_convert_string_to_object(index))
+
+
+def _normalize_pytz_timezone(tz: dt.tzinfo) -> dt.tzinfo:
+    """
+    If the input tz is a pytz timezone, attempt to convert it to "default"
+    tzinfo object (zoneinfo or datetime.timezone).
+    """
+    if not type(tz).__module__.startswith("pytz"):
+        # isinstance(col.dtype.tz, pytz.BaseTzInfo) does not included
+        # fixed offsets
+        return tz
+
+    if timezones.is_utc(tz):
+        return dt.timezone.utc
+
+    if tz.zone is not None:  # type: ignore[attr-defined]
+        try:
+            return zoneinfo.ZoneInfo(tz.zone)  # type: ignore[attr-defined]
+        except Exception:
+            # some pytz timezones might not be available for zoneinfo
+            pass
+
+    if timezones.is_fixed_offset(tz):
+        # Convert pytz fixed offset to datetime.timezone
+        try:
+            offset = tz.utcoffset(None)
+            if offset is not None:
+                return dt.timezone(offset)
+        except Exception:
+            pass
+
+    return tz
+
+
+def _normalize_timezone_index(index: pd.Index) -> pd.Index:
+    if isinstance(index, pd.MultiIndex):
+        if any(isinstance(level.dtype, pd.DatetimeTZDtype) for level in index.levels):
+            levels = [_normalize_timezone_index(level) for level in index.levels]
+            return index.set_levels(levels)
+
+        return index
+
+    if isinstance(index.dtype, pd.DatetimeTZDtype):
+        normalized_tz = _normalize_pytz_timezone(index.dtype.tz)
+        if normalized_tz is not index.dtype.tz:
+            return index.tz_convert(normalized_tz)  # type: ignore[attr-defined]
+
+    return index
+
+
+def _normalize_timezone_dtypes(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    PyArrow uses pytz by default for timezones, but pandas uses
+    zoneinfo / datetime.timezone since pandas 3.0.
+
+    TODO: Starting with pyarrow 25, it will use zoneinfo by default, and then
+    this normalization can be skipped (https://github.com/apache/arrow/pull/49694).
+    """
+    if pytz is not None:
+        # Convert any pytz timezones to zoneinfo / fixed offset timezones
+        if any(
+            isinstance(dtype, pd.DatetimeTZDtype)
+            for dtype in df._mgr.get_unique_dtypes()
+        ):
+            col_indices = df._select_dtypes_indices(pd.DatetimeTZDtype)
+            for i in col_indices:
+                col = df.iloc[:, i]
+                normalized_tz = _normalize_pytz_timezone(col.dtype.tz)
+                if normalized_tz is not col.dtype.tz:
+                    df.isetitem(i, col.dt.tz_convert(normalized_tz))
+
+        df.index = _normalize_timezone_index(df.index)
+        df.columns = _normalize_timezone_index(df.columns)
+
+    return df