Files
sirh/venv/lib/python3.12/site-packages/pandas/_typing.py
2026-04-29 11:52:03 +02:00

579 lines
16 KiB
Python

from __future__ import annotations
from builtins import type as type_t # pyright: ignore[reportUnusedImport]
from collections.abc import (
Callable,
Hashable,
Iterator,
Mapping,
MutableMapping,
Sequence,
)
from datetime import (
date,
datetime,
timedelta,
tzinfo,
)
from os import PathLike
from typing import (
TYPE_CHECKING,
Any,
Literal,
ParamSpec,
Protocol,
SupportsIndex,
TypeAlias,
TypeVar,
Union,
overload,
)
import numpy as np
import numpy.typing as npt
# To prevent import cycles place any internal imports in the branch below
# and use a string literal forward reference to it in subsequent types
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
# Note that Union is needed when a Union includes a pandas type
if TYPE_CHECKING:
from pandas._libs import (
NaTType,
Period,
Timedelta,
Timestamp,
)
from pandas._libs.tslibs import BaseOffset
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas import (
DatetimeIndex,
Interval,
PeriodIndex,
TimedeltaIndex,
)
from pandas.arrays import (
DatetimeArray,
TimedeltaArray,
)
from pandas.core.arrays.base import ExtensionArray
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.groupby.generic import (
DataFrameGroupBy,
GroupBy,
SeriesGroupBy,
)
from pandas.core.indexes.base import Index
from pandas.core.internals import (
BlockManager,
SingleBlockManager,
)
from pandas.core.resample import Resampler
from pandas.core.series import Series
from pandas.core.window.rolling import BaseWindow
from pandas.io.formats.format import EngFormatter
from pandas.tseries.holiday import AbstractHolidayCalendar
ScalarLike_co: TypeAlias = int | float | complex | str | bytes | np.generic
# numpy compatible types
NumpyValueArrayLike: TypeAlias = ScalarLike_co | npt.ArrayLike
NumpySorter: TypeAlias = npt.NDArray[np.integer] | None
P = ParamSpec("P")
HashableT = TypeVar("HashableT", bound=Hashable)
HashableT2 = TypeVar("HashableT2", bound=Hashable)
MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
# array-like
ArrayLike: TypeAlias = Union["ExtensionArray", np.ndarray]
ArrayLikeT = TypeVar("ArrayLikeT", "ExtensionArray", np.ndarray)
AnyArrayLike: TypeAlias = Union[ArrayLike, "Index", "Series"]
TimeArrayLike: TypeAlias = Union["DatetimeArray", "TimedeltaArray"]
# list-like
# from https://github.com/hauntsaninja/useful_types
# includes Sequence-like objects but excludes str and bytes
_T_co = TypeVar("_T_co", covariant=True)
class SequenceNotStr(Protocol[_T_co]):
__module__: str = "pandas.api.typing.aliases"
@overload
def __getitem__(self, index: SupportsIndex, /) -> _T_co: ...
@overload
def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ...
def __contains__(self, value: object, /) -> bool: ...
def __len__(self) -> int: ...
def __iter__(self) -> Iterator[_T_co]: ...
def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int: ...
def count(self, value: Any, /) -> int: ...
def __reversed__(self) -> Iterator[_T_co]: ...
ListLike: TypeAlias = AnyArrayLike | SequenceNotStr | range
# scalars
PythonScalar: TypeAlias = str | float | bool
DatetimeLikeScalar: TypeAlias = Union["Period", "Timestamp", "Timedelta"]
# aligned with pandas-stubs - typical scalars found in Series. Explicitly leaves
# out object
_IndexIterScalar: TypeAlias = Union[
str,
bytes,
date,
datetime,
timedelta,
np.datetime64,
np.timedelta64,
bool,
int,
float,
"Timestamp",
"Timedelta",
]
Scalar: TypeAlias = Union[
_IndexIterScalar, "Interval", complex, np.integer, np.floating, np.complexfloating
]
IntStrT = TypeVar("IntStrT", bound=int | str)
# timestamp and timedelta convertible types
TimestampConvertibleTypes: TypeAlias = Union[
"Timestamp", date, np.datetime64, np.int64, float, str
]
TimestampNonexistent: TypeAlias = (
Literal["shift_forward", "shift_backward", "NaT", "raise"] | timedelta
)
TimedeltaConvertibleTypes: TypeAlias = Union[
"Timedelta", timedelta, np.timedelta64, np.int64, float, str
]
Timezone: TypeAlias = str | tzinfo
ToTimestampHow: TypeAlias = Literal["s", "e", "start", "end"]
# NDFrameT is stricter and ensures that the same subclass of NDFrame always is
# used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
# Series is passed into a function, a Series is always returned and if a DataFrame is
# passed in, a DataFrame is always returned.
NDFrameT = TypeVar("NDFrameT", bound="NDFrame")
IndexT = TypeVar("IndexT", bound="Index")
FreqIndexT = TypeVar("FreqIndexT", "DatetimeIndex", "PeriodIndex", "TimedeltaIndex")
NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")
AxisInt: TypeAlias = int
Axis: TypeAlias = AxisInt | Literal["index", "columns", "rows"]
IndexLabel: TypeAlias = Hashable | Sequence[Hashable]
Level: TypeAlias = Hashable
Shape: TypeAlias = tuple[int, ...]
Suffixes: TypeAlias = Sequence[str | None]
Ordered: TypeAlias = bool | None
JSONSerializable: TypeAlias = PythonScalar | list | dict | None
Frequency: TypeAlias = Union[str, "BaseOffset"]
Axes: TypeAlias = ListLike
RandomState: TypeAlias = (
int
| np.ndarray
| np.random.Generator
| np.random.BitGenerator
| np.random.RandomState
)
# dtypes
NpDtype: TypeAlias = str | np.dtype | type[str | complex | bool | object]
Dtype: TypeAlias = Union["ExtensionDtype", NpDtype]
AstypeArg: TypeAlias = Union["ExtensionDtype", npt.DTypeLike]
# DtypeArg specifies all allowable dtypes in a functions its dtype argument
DtypeArg: TypeAlias = Dtype | Mapping[Hashable, Dtype]
DtypeObj: TypeAlias = Union[np.dtype, "ExtensionDtype"]
# converters
ConvertersArg: TypeAlias = dict[Hashable, Callable[[Dtype], Dtype]]
# parse_dates
ParseDatesArg: TypeAlias = (
bool | list[Hashable] | list[list[Hashable]] | dict[Hashable, list[Hashable]]
)
# For functions like rename that convert one label to another
Renamer: TypeAlias = Mapping[Any, Hashable] | Callable[[Any], Hashable]
# to maintain type information across generic functions and parametrization
T = TypeVar("T")
# used in decorators to preserve the signature of the function it decorates
# see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
FuncType: TypeAlias = Callable[..., Any]
F = TypeVar("F", bound=FuncType)
TypeT = TypeVar("TypeT", bound=type)
# types of vectorized key functions for DataFrame::sort_values and
# DataFrame::sort_index, among others
ValueKeyFunc: TypeAlias = Callable[["Series"], Union["Series", AnyArrayLike]] | None
IndexKeyFunc: TypeAlias = Callable[["Index"], Union["Index", AnyArrayLike]] | None
# types of `func` kwarg for DataFrame.aggregate and Series.aggregate
AggFuncTypeBase: TypeAlias = Callable | str
AggFuncTypeDict: TypeAlias = MutableMapping[
Hashable, AggFuncTypeBase | list[AggFuncTypeBase]
]
AggFuncType: TypeAlias = AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDict
AggObjType: TypeAlias = Union[
"Series",
"DataFrame",
"GroupBy",
"SeriesGroupBy",
"DataFrameGroupBy",
"BaseWindow",
"Resampler",
]
PythonFuncType: TypeAlias = Callable[[Any], Any]
# filenames and file-like-objects
AnyStr_co = TypeVar("AnyStr_co", str, bytes, covariant=True)
AnyStr_contra = TypeVar("AnyStr_contra", str, bytes, contravariant=True)
class BaseBuffer(Protocol):
@property
def mode(self) -> str:
# for _get_filepath_or_buffer
...
def seek(self, offset: int, whence: int = ..., /) -> int:
# with one argument: gzip.GzipFile, bz2.BZ2File
# with two arguments: zip.ZipFile, read_sas
...
def seekable(self) -> bool:
# for bz2.BZ2File
...
def tell(self) -> int:
# for zip.ZipFile, read_stata, to_stata
...
class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]):
__module__: str = "pandas.api.typing.aliases"
def read(self, n: int = ..., /) -> AnyStr_co:
# for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
...
class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]):
__module__: str = "pandas.api.typing.aliases"
def write(self, b: AnyStr_contra, /) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...
def flush(self) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...
class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
__module__: str = "pandas.api.typing.aliases"
def readline(self) -> bytes: ...
class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
__module__: str = "pandas.api.typing.aliases"
def truncate(self, size: int | None = ..., /) -> int: ...
class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol):
__module__: str = "pandas.api.typing.aliases"
def __iter__(self) -> Iterator[AnyStr_co]:
# for engine=python
...
def fileno(self) -> int:
# for _MMapWrapper
...
def readline(self) -> AnyStr_co:
# for engine=python
...
@property
def closed(self) -> bool:
# for engine=pyarrow
...
FilePath: TypeAlias = str | PathLike[str]
# for arbitrary kwargs passed during reading/writing files
StorageOptions: TypeAlias = dict[str, Any] | None
# compression keywords and compression
CompressionDict: TypeAlias = dict[str, Any]
CompressionOptions: TypeAlias = (
Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"] | CompressionDict | None
)
ParquetCompressionOptions: TypeAlias = (
Literal["snappy", "gzip", "brotli", "lz4", "zstd"] | None
)
# types in DataFrameFormatter
FormattersType: TypeAlias = (
list[Callable] | tuple[Callable, ...] | Mapping[str | int, Callable]
)
ColspaceType: TypeAlias = Mapping[Hashable, str | int]
FloatFormatType: TypeAlias = Union[str, Callable, "EngFormatter"]
ColspaceArgType: TypeAlias = (
str | int | Sequence[str | int] | Mapping[Hashable, str | int]
)
# Arguments for fillna()
FillnaOptions: TypeAlias = Literal["backfill", "bfill", "ffill", "pad"]
InterpolateOptions: TypeAlias = Literal[
"linear",
"time",
"index",
"values",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"barycentric",
"polynomial",
"krogh",
"piecewise_polynomial",
"spline",
"pchip",
"akima",
"cubicspline",
"from_derivatives",
]
# internals
Manager: TypeAlias = Union["BlockManager", "SingleBlockManager"]
# indexing
# PositionalIndexer -> valid 1D positional indexer, e.g. can pass
# to ndarray.__getitem__
# ScalarIndexer is for a single value as the index
# SequenceIndexer is for list like or slices (but not tuples)
# PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays
# These are used in various __getitem__ overloads
# TODO(typing#684): add Ellipsis, see
# https://github.com/python/typing/issues/684#issuecomment-548203158
# https://bugs.python.org/issue41810
# Using List[int] here rather than Sequence[int] to disallow tuples.
ScalarIndexer: TypeAlias = int | np.integer
SequenceIndexer: TypeAlias = slice | list[int] | np.ndarray
PositionalIndexer: TypeAlias = ScalarIndexer | SequenceIndexer
PositionalIndexerTuple: TypeAlias = tuple[PositionalIndexer, PositionalIndexer]
PositionalIndexer2D: TypeAlias = PositionalIndexer | PositionalIndexerTuple
TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer]
# Shared by functions such as drop and astype
IgnoreRaise: TypeAlias = Literal["ignore", "raise"]
# Windowing rank methods
WindowingRankType: TypeAlias = Literal["average", "min", "max"]
# read_csv engines
CSVEngine: TypeAlias = Literal["c", "python", "pyarrow", "python-fwf"]
# read_json engines
JSONEngine: TypeAlias = Literal["ujson", "pyarrow"]
# read_xml parsers
XMLParsers: TypeAlias = Literal["lxml", "etree"]
# read_html flavors
HTMLFlavors: TypeAlias = Literal["lxml", "html5lib", "bs4"]
# Interval closed type
IntervalLeftRight: TypeAlias = Literal["left", "right"]
IntervalClosedType: TypeAlias = IntervalLeftRight | Literal["both", "neither"]
# datetime and NaTType
DatetimeNaTType: TypeAlias = Union[datetime, "NaTType"]
DateTimeErrorChoices: TypeAlias = Literal["raise", "coerce"]
# sort_index
SortKind: TypeAlias = Literal["quicksort", "mergesort", "heapsort", "stable"]
NaPosition: TypeAlias = Literal["first", "last"]
# Arguments for nsmallest and nlargest
NsmallestNlargestKeep: TypeAlias = Literal["first", "last", "all"]
# quantile interpolation
QuantileInterpolation: TypeAlias = Literal[
"linear", "lower", "higher", "midpoint", "nearest"
]
# plotting
PlottingOrientation: TypeAlias = Literal["horizontal", "vertical"]
# dropna
AnyAll: TypeAlias = Literal["any", "all"]
# merge
MergeHow: TypeAlias = Literal[
"left", "right", "inner", "outer", "cross", "left_anti", "right_anti"
]
MergeValidate: TypeAlias = Literal[
"one_to_one",
"1:1",
"one_to_many",
"1:m",
"many_to_one",
"m:1",
"many_to_many",
"m:m",
]
# join
JoinHow: TypeAlias = Literal["left", "right", "inner", "outer"]
JoinValidate: TypeAlias = Literal[
"one_to_one",
"1:1",
"one_to_many",
"1:m",
"many_to_one",
"m:1",
"many_to_many",
"m:m",
]
# reindex
ReindexMethod: TypeAlias = FillnaOptions | Literal["nearest"]
MatplotlibColor: TypeAlias = str | Sequence[float]
TimeGrouperOrigin: TypeAlias = Union[
"Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
]
TimeAmbiguous: TypeAlias = (
Literal["infer", "NaT", "raise"] | bool | npt.NDArray[np.bool_]
)
TimeNonexistent: TypeAlias = (
Literal["shift_forward", "shift_backward", "NaT", "raise"] | timedelta
)
DropKeep: TypeAlias = Literal["first", "last", False]
CorrelationMethod: TypeAlias = (
Literal["pearson", "kendall", "spearman"]
| Callable[[np.ndarray, np.ndarray], float]
)
AlignJoin: TypeAlias = Literal["outer", "inner", "left", "right"]
DtypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"]
TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"]
OpenFileErrors: TypeAlias = Literal[
"strict",
"ignore",
"replace",
"surrogateescape",
"xmlcharrefreplace",
"backslashreplace",
"namereplace",
]
# update
UpdateJoin: TypeAlias = Literal["left"]
# applymap
NaAction: TypeAlias = Literal["ignore"]
# from_dict
FromDictOrient: TypeAlias = Literal["columns", "index", "tight"]
# to_stata
ToStataByteorder: TypeAlias = Literal[">", "<", "little", "big"]
# ExcelWriter
ExcelWriterIfSheetExists: TypeAlias = Literal["error", "new", "replace", "overlay"]
ExcelWriterMergeCells: TypeAlias = bool | Literal["columns"]
# Offsets
OffsetCalendar: TypeAlias = Union[np.busdaycalendar, "AbstractHolidayCalendar"]
# read_csv: usecols
UsecolsArgType: TypeAlias = (
SequenceNotStr[Hashable] | range | AnyArrayLike | Callable[[HashableT], bool] | None
)
# maintain the sub-type of any hashable sequence
SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable])
SliceType: TypeAlias = Hashable | None
# Arrow PyCapsule Interface
# from https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#protocol-typehints
class ArrowArrayExportable(Protocol):
"""
An object with an ``__arrow_c_array__`` method.
This method indicates the object is an Arrow-compatible object implementing
the `Arrow PyCapsule Protocol`_ (exposing the `Arrow C Data Interface`_ in
Python), enabling zero-copy Arrow data interchange across libraries.
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
.. _Arrow C Data Interface: https://arrow.apache.org/docs/format/CDataInterface.html
"""
def __arrow_c_array__(
self, requested_schema: object | None = None
) -> tuple[object, object]: ...
class ArrowStreamExportable(Protocol):
"""
An object with an ``__arrow_c_stream__`` method.
This method indicates the object is an Arrow-compatible object implementing
the `Arrow PyCapsule Protocol`_ (exposing the `Arrow C Data Interface`_
for streams in Python), enabling zero-copy Arrow data interchange across
libraries.
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
.. _Arrow C Stream Interface: https://arrow.apache.org/docs/format/CStreamInterface.html
"""
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
__all__ = ["type_t"]