Initial commit: 首次建仓,建立目录结构
This commit is contained in:
@ -0,0 +1,9 @@
|
||||
# ruff: noqa: TC004
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# import modules that have public classes/functions
|
||||
from pandas.io.formats import style
|
||||
|
||||
# and mark only those modules as public
|
||||
__all__ = ["style"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,157 @@
|
||||
# GH37967: Enable the use of CSS named colors, as defined in
|
||||
# matplotlib.colors.CSS4_COLORS, when exporting to Excel.
|
||||
# This data has been copied here, instead of being imported from matplotlib,
|
||||
# not to have ``to_excel`` methods require matplotlib.
|
||||
# source: matplotlib._color_data (3.3.3)
|
||||
from __future__ import annotations
|
||||
|
||||
CSS4_COLORS = {
|
||||
"aliceblue": "F0F8FF",
|
||||
"antiquewhite": "FAEBD7",
|
||||
"aqua": "00FFFF",
|
||||
"aquamarine": "7FFFD4",
|
||||
"azure": "F0FFFF",
|
||||
"beige": "F5F5DC",
|
||||
"bisque": "FFE4C4",
|
||||
"black": "000000",
|
||||
"blanchedalmond": "FFEBCD",
|
||||
"blue": "0000FF",
|
||||
"blueviolet": "8A2BE2",
|
||||
"brown": "A52A2A",
|
||||
"burlywood": "DEB887",
|
||||
"cadetblue": "5F9EA0",
|
||||
"chartreuse": "7FFF00",
|
||||
"chocolate": "D2691E",
|
||||
"coral": "FF7F50",
|
||||
"cornflowerblue": "6495ED",
|
||||
"cornsilk": "FFF8DC",
|
||||
"crimson": "DC143C",
|
||||
"cyan": "00FFFF",
|
||||
"darkblue": "00008B",
|
||||
"darkcyan": "008B8B",
|
||||
"darkgoldenrod": "B8860B",
|
||||
"darkgray": "A9A9A9",
|
||||
"darkgreen": "006400",
|
||||
"darkgrey": "A9A9A9",
|
||||
"darkkhaki": "BDB76B",
|
||||
"darkmagenta": "8B008B",
|
||||
"darkolivegreen": "556B2F",
|
||||
"darkorange": "FF8C00",
|
||||
"darkorchid": "9932CC",
|
||||
"darkred": "8B0000",
|
||||
"darksalmon": "E9967A",
|
||||
"darkseagreen": "8FBC8F",
|
||||
"darkslateblue": "483D8B",
|
||||
"darkslategray": "2F4F4F",
|
||||
"darkslategrey": "2F4F4F",
|
||||
"darkturquoise": "00CED1",
|
||||
"darkviolet": "9400D3",
|
||||
"deeppink": "FF1493",
|
||||
"deepskyblue": "00BFFF",
|
||||
"dimgray": "696969",
|
||||
"dimgrey": "696969",
|
||||
"dodgerblue": "1E90FF",
|
||||
"firebrick": "B22222",
|
||||
"floralwhite": "FFFAF0",
|
||||
"forestgreen": "228B22",
|
||||
"fuchsia": "FF00FF",
|
||||
"gainsboro": "DCDCDC",
|
||||
"ghostwhite": "F8F8FF",
|
||||
"gold": "FFD700",
|
||||
"goldenrod": "DAA520",
|
||||
"gray": "808080",
|
||||
"green": "008000",
|
||||
"greenyellow": "ADFF2F",
|
||||
"grey": "808080",
|
||||
"honeydew": "F0FFF0",
|
||||
"hotpink": "FF69B4",
|
||||
"indianred": "CD5C5C",
|
||||
"indigo": "4B0082",
|
||||
"ivory": "FFFFF0",
|
||||
"khaki": "F0E68C",
|
||||
"lavender": "E6E6FA",
|
||||
"lavenderblush": "FFF0F5",
|
||||
"lawngreen": "7CFC00",
|
||||
"lemonchiffon": "FFFACD",
|
||||
"lightblue": "ADD8E6",
|
||||
"lightcoral": "F08080",
|
||||
"lightcyan": "E0FFFF",
|
||||
"lightgoldenrodyellow": "FAFAD2",
|
||||
"lightgray": "D3D3D3",
|
||||
"lightgreen": "90EE90",
|
||||
"lightgrey": "D3D3D3",
|
||||
"lightpink": "FFB6C1",
|
||||
"lightsalmon": "FFA07A",
|
||||
"lightseagreen": "20B2AA",
|
||||
"lightskyblue": "87CEFA",
|
||||
"lightslategray": "778899",
|
||||
"lightslategrey": "778899",
|
||||
"lightsteelblue": "B0C4DE",
|
||||
"lightyellow": "FFFFE0",
|
||||
"lime": "00FF00",
|
||||
"limegreen": "32CD32",
|
||||
"linen": "FAF0E6",
|
||||
"magenta": "FF00FF",
|
||||
"maroon": "800000",
|
||||
"mediumaquamarine": "66CDAA",
|
||||
"mediumblue": "0000CD",
|
||||
"mediumorchid": "BA55D3",
|
||||
"mediumpurple": "9370DB",
|
||||
"mediumseagreen": "3CB371",
|
||||
"mediumslateblue": "7B68EE",
|
||||
"mediumspringgreen": "00FA9A",
|
||||
"mediumturquoise": "48D1CC",
|
||||
"mediumvioletred": "C71585",
|
||||
"midnightblue": "191970",
|
||||
"mintcream": "F5FFFA",
|
||||
"mistyrose": "FFE4E1",
|
||||
"moccasin": "FFE4B5",
|
||||
"navajowhite": "FFDEAD",
|
||||
"navy": "000080",
|
||||
"oldlace": "FDF5E6",
|
||||
"olive": "808000",
|
||||
"olivedrab": "6B8E23",
|
||||
"orange": "FFA500",
|
||||
"orangered": "FF4500",
|
||||
"orchid": "DA70D6",
|
||||
"palegoldenrod": "EEE8AA",
|
||||
"palegreen": "98FB98",
|
||||
"paleturquoise": "AFEEEE",
|
||||
"palevioletred": "DB7093",
|
||||
"papayawhip": "FFEFD5",
|
||||
"peachpuff": "FFDAB9",
|
||||
"peru": "CD853F",
|
||||
"pink": "FFC0CB",
|
||||
"plum": "DDA0DD",
|
||||
"powderblue": "B0E0E6",
|
||||
"purple": "800080",
|
||||
"rebeccapurple": "663399",
|
||||
"red": "FF0000",
|
||||
"rosybrown": "BC8F8F",
|
||||
"royalblue": "4169E1",
|
||||
"saddlebrown": "8B4513",
|
||||
"salmon": "FA8072",
|
||||
"sandybrown": "F4A460",
|
||||
"seagreen": "2E8B57",
|
||||
"seashell": "FFF5EE",
|
||||
"sienna": "A0522D",
|
||||
"silver": "C0C0C0",
|
||||
"skyblue": "87CEEB",
|
||||
"slateblue": "6A5ACD",
|
||||
"slategray": "708090",
|
||||
"slategrey": "708090",
|
||||
"snow": "FFFAFA",
|
||||
"springgreen": "00FF7F",
|
||||
"steelblue": "4682B4",
|
||||
"tan": "D2B48C",
|
||||
"teal": "008080",
|
||||
"thistle": "D8BFD8",
|
||||
"tomato": "FF6347",
|
||||
"turquoise": "40E0D0",
|
||||
"violet": "EE82EE",
|
||||
"wheat": "F5DEB3",
|
||||
"white": "FFFFFF",
|
||||
"whitesmoke": "F5F5F5",
|
||||
"yellow": "FFFF00",
|
||||
"yellowgreen": "9ACD32",
|
||||
}
|
||||
@ -0,0 +1,95 @@
|
||||
"""
|
||||
Internal module for console introspection
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from shutil import get_terminal_size
|
||||
|
||||
|
||||
def get_console_size() -> tuple[int | None, int | None]:
|
||||
"""
|
||||
Return console size as tuple = (width, height).
|
||||
|
||||
Returns (None,None) in non-interactive session.
|
||||
"""
|
||||
from pandas import get_option
|
||||
|
||||
display_width = get_option("display.width")
|
||||
display_height = get_option("display.max_rows")
|
||||
|
||||
# Consider
|
||||
# interactive shell terminal, can detect term size
|
||||
# interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
|
||||
# size non-interactive script, should disregard term size
|
||||
|
||||
# in addition
|
||||
# width,height have default values, but setting to 'None' signals
|
||||
# should use Auto-Detection, But only in interactive shell-terminal.
|
||||
# Simple. yeah.
|
||||
|
||||
if in_interactive_session():
|
||||
if in_ipython_frontend():
|
||||
# sane defaults for interactive non-shell terminal
|
||||
# match default for width,height in config_init
|
||||
from pandas._config.config import get_default_val
|
||||
|
||||
terminal_width = get_default_val("display.width")
|
||||
terminal_height = get_default_val("display.max_rows")
|
||||
else:
|
||||
# pure terminal
|
||||
terminal_width, terminal_height = get_terminal_size()
|
||||
else:
|
||||
terminal_width, terminal_height = None, None
|
||||
|
||||
# Note if the User sets width/Height to None (auto-detection)
|
||||
# and we're in a script (non-inter), this will return (None,None)
|
||||
# caller needs to deal.
|
||||
return display_width or terminal_width, display_height or terminal_height
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Detect our environment
|
||||
|
||||
|
||||
def in_interactive_session() -> bool:
|
||||
"""
|
||||
Check if we're running in an interactive shell.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if running under python/ipython interactive shell.
|
||||
"""
|
||||
from pandas import get_option
|
||||
|
||||
def check_main() -> bool:
|
||||
try:
|
||||
import __main__ as main
|
||||
except ModuleNotFoundError:
|
||||
return get_option("mode.sim_interactive")
|
||||
return not hasattr(main, "__file__") or get_option("mode.sim_interactive")
|
||||
|
||||
try:
|
||||
# error: Name '__IPYTHON__' is not defined
|
||||
return __IPYTHON__ or check_main() # type: ignore[name-defined]
|
||||
except NameError:
|
||||
return check_main()
|
||||
|
||||
|
||||
def in_ipython_frontend() -> bool:
|
||||
"""
|
||||
Check if we're inside an IPython zmq frontend.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
try:
|
||||
# error: Name 'get_ipython' is not defined
|
||||
ip = get_ipython() # type: ignore[name-defined]
|
||||
return "zmq" in str(type(ip)).lower()
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
return False
|
||||
@ -0,0 +1,425 @@
|
||||
"""
|
||||
Utilities for interpreting CSS from Stylers for formatting non-HTML outputs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
import warnings
|
||||
|
||||
from pandas.errors import CSSWarning
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Callable,
|
||||
Generator,
|
||||
Iterable,
|
||||
Iterator,
|
||||
)
|
||||
|
||||
|
||||
def _side_expander(prop_fmt: str) -> Callable:
|
||||
"""
|
||||
Wrapper to expand shorthand property into top, right, bottom, left properties
|
||||
|
||||
Parameters
|
||||
----------
|
||||
side : str
|
||||
The border side to expand into properties
|
||||
|
||||
Returns
|
||||
-------
|
||||
function: Return to call when a 'border(-{side}): {value}' string is encountered
|
||||
"""
|
||||
|
||||
def expand(self: CSSResolver, prop: str, value: str) -> Generator[tuple[str, str]]:
|
||||
"""
|
||||
Expand shorthand property into side-specific property (top, right, bottom, left)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prop (str): CSS property name
|
||||
value (str): String token for property
|
||||
|
||||
Yields
|
||||
------
|
||||
Tuple (str, str): Expanded property, value
|
||||
"""
|
||||
tokens = value.split()
|
||||
try:
|
||||
mapping = self.SIDE_SHORTHANDS[len(tokens)]
|
||||
except KeyError:
|
||||
warnings.warn(
|
||||
f'Could not expand "{prop}: {value}"',
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return
|
||||
for key, idx in zip(self.SIDES, mapping, strict=True):
|
||||
yield prop_fmt.format(key), tokens[idx]
|
||||
|
||||
return expand
|
||||
|
||||
|
||||
def _border_expander(side: str = "") -> Callable:
|
||||
"""
|
||||
Wrapper to expand 'border' property into border color, style, and width properties
|
||||
|
||||
Parameters
|
||||
----------
|
||||
side : str
|
||||
The border side to expand into properties
|
||||
|
||||
Returns
|
||||
-------
|
||||
function: Return to call when a 'border(-{side}): {value}' string is encountered
|
||||
"""
|
||||
if side != "":
|
||||
side = f"-{side}"
|
||||
|
||||
def expand(self: CSSResolver, prop: str, value: str) -> Generator[tuple[str, str]]:
|
||||
"""
|
||||
Expand border into color, style, and width tuples
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prop : str
|
||||
CSS property name passed to styler
|
||||
value : str
|
||||
Value passed to styler for property
|
||||
|
||||
Yields
|
||||
------
|
||||
Tuple (str, str): Expanded property, value
|
||||
"""
|
||||
tokens = value.split()
|
||||
if len(tokens) == 0 or len(tokens) > 3:
|
||||
warnings.warn(
|
||||
f'Too many tokens provided to "{prop}" (expected 1-3)',
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
# TODO: Can we use current color as initial value to comply with CSS standards?
|
||||
border_declarations = {
|
||||
f"border{side}-color": "black",
|
||||
f"border{side}-style": "none",
|
||||
f"border{side}-width": "medium",
|
||||
}
|
||||
for token in tokens:
|
||||
if token.lower() in self.BORDER_STYLES:
|
||||
border_declarations[f"border{side}-style"] = token
|
||||
elif any(ratio in token.lower() for ratio in self.BORDER_WIDTH_RATIOS):
|
||||
border_declarations[f"border{side}-width"] = token
|
||||
else:
|
||||
border_declarations[f"border{side}-color"] = token
|
||||
# TODO: Warn user if item entered more than once (e.g. "border: red green")
|
||||
|
||||
# Per CSS, "border" will reset previous "border-*" definitions
|
||||
yield from self.atomize(border_declarations.items())
|
||||
|
||||
return expand
|
||||
|
||||
|
||||
class CSSResolver:
|
||||
"""
|
||||
A callable for parsing and resolving CSS to atomic properties.
|
||||
"""
|
||||
|
||||
UNIT_RATIOS = {
|
||||
"pt": ("pt", 1),
|
||||
"em": ("em", 1),
|
||||
"rem": ("pt", 12),
|
||||
"ex": ("em", 0.5),
|
||||
# 'ch':
|
||||
"px": ("pt", 0.75),
|
||||
"pc": ("pt", 12),
|
||||
"in": ("pt", 72),
|
||||
"cm": ("in", 1 / 2.54),
|
||||
"mm": ("in", 1 / 25.4),
|
||||
"q": ("mm", 0.25),
|
||||
"!!default": ("em", 0),
|
||||
}
|
||||
|
||||
FONT_SIZE_RATIOS = UNIT_RATIOS.copy()
|
||||
FONT_SIZE_RATIOS.update(
|
||||
{
|
||||
"%": ("em", 0.01),
|
||||
"xx-small": ("rem", 0.5),
|
||||
"x-small": ("rem", 0.625),
|
||||
"small": ("rem", 0.8),
|
||||
"medium": ("rem", 1),
|
||||
"large": ("rem", 1.125),
|
||||
"x-large": ("rem", 1.5),
|
||||
"xx-large": ("rem", 2),
|
||||
"smaller": ("em", 1 / 1.2),
|
||||
"larger": ("em", 1.2),
|
||||
"!!default": ("em", 1),
|
||||
}
|
||||
)
|
||||
|
||||
MARGIN_RATIOS = UNIT_RATIOS.copy()
|
||||
MARGIN_RATIOS.update({"none": ("pt", 0)})
|
||||
|
||||
BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy()
|
||||
BORDER_WIDTH_RATIOS.update(
|
||||
{
|
||||
"none": ("pt", 0),
|
||||
"thick": ("px", 4),
|
||||
"medium": ("px", 2),
|
||||
"thin": ("px", 1),
|
||||
# Default: medium only if solid
|
||||
}
|
||||
)
|
||||
|
||||
BORDER_STYLES = [
|
||||
"none",
|
||||
"hidden",
|
||||
"dotted",
|
||||
"dashed",
|
||||
"solid",
|
||||
"double",
|
||||
"groove",
|
||||
"ridge",
|
||||
"inset",
|
||||
"outset",
|
||||
"mediumdashdot",
|
||||
"dashdotdot",
|
||||
"hair",
|
||||
"mediumdashdotdot",
|
||||
"dashdot",
|
||||
"slantdashdot",
|
||||
"mediumdashed",
|
||||
]
|
||||
|
||||
SIDE_SHORTHANDS = {
|
||||
1: [0, 0, 0, 0],
|
||||
2: [0, 1, 0, 1],
|
||||
3: [0, 1, 2, 1],
|
||||
4: [0, 1, 2, 3],
|
||||
}
|
||||
|
||||
SIDES = ("top", "right", "bottom", "left")
|
||||
|
||||
CSS_EXPANSIONS = {
|
||||
**{
|
||||
(f"border-{prop}" if prop else "border"): _border_expander(prop)
|
||||
for prop in ["", "top", "right", "bottom", "left"]
|
||||
},
|
||||
**{
|
||||
f"border-{prop}": _side_expander(f"border-{{:s}}-{prop}")
|
||||
for prop in ["color", "style", "width"]
|
||||
},
|
||||
"margin": _side_expander("margin-{:s}"),
|
||||
"padding": _side_expander("padding-{:s}"),
|
||||
}
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
declarations: str | Iterable[tuple[str, str]],
|
||||
inherited: dict[str, str] | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
The given declarations to atomic properties.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
declarations_str : str | Iterable[tuple[str, str]]
|
||||
A CSS string or set of CSS declaration tuples
|
||||
e.g. "font-weight: bold; background: blue" or
|
||||
{("font-weight", "bold"), ("background", "blue")}
|
||||
inherited : dict, optional
|
||||
Atomic properties indicating the inherited style context in which
|
||||
declarations_str is to be resolved. ``inherited`` should already
|
||||
be resolved, i.e. valid output of this method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Atomic CSS 2.2 properties.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> resolve = CSSResolver()
|
||||
>>> inherited = {"font-family": "serif", "font-weight": "bold"}
|
||||
>>> out = resolve(
|
||||
... '''
|
||||
... border-color: BLUE RED;
|
||||
... font-size: 1em;
|
||||
... font-size: 2em;
|
||||
... font-weight: normal;
|
||||
... font-weight: inherit;
|
||||
... ''',
|
||||
... inherited,
|
||||
... )
|
||||
>>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE
|
||||
[('border-bottom-color', 'blue'),
|
||||
('border-left-color', 'red'),
|
||||
('border-right-color', 'red'),
|
||||
('border-top-color', 'blue'),
|
||||
('font-family', 'serif'),
|
||||
('font-size', '24pt'),
|
||||
('font-weight', 'bold')]
|
||||
"""
|
||||
if isinstance(declarations, str):
|
||||
declarations = self.parse(declarations)
|
||||
props = dict(self.atomize(declarations))
|
||||
if inherited is None:
|
||||
inherited = {}
|
||||
|
||||
props = self._update_initial(props, inherited)
|
||||
props = self._update_font_size(props, inherited)
|
||||
return self._update_other_units(props)
|
||||
|
||||
def _update_initial(
|
||||
self,
|
||||
props: dict[str, str],
|
||||
inherited: dict[str, str],
|
||||
) -> dict[str, str]:
|
||||
# 1. resolve inherited, initial
|
||||
for prop, val in inherited.items():
|
||||
if prop not in props:
|
||||
props[prop] = val
|
||||
|
||||
new_props = props.copy()
|
||||
for prop, val in props.items():
|
||||
if val == "inherit":
|
||||
val = inherited.get(prop, "initial")
|
||||
|
||||
if val in ("initial", None):
|
||||
# we do not define a complete initial stylesheet
|
||||
del new_props[prop]
|
||||
else:
|
||||
new_props[prop] = val
|
||||
return new_props
|
||||
|
||||
def _update_font_size(
|
||||
self,
|
||||
props: dict[str, str],
|
||||
inherited: dict[str, str],
|
||||
) -> dict[str, str]:
|
||||
# 2. resolve relative font size
|
||||
if props.get("font-size"):
|
||||
props["font-size"] = self.size_to_pt(
|
||||
props["font-size"],
|
||||
self._get_font_size(inherited),
|
||||
conversions=self.FONT_SIZE_RATIOS,
|
||||
)
|
||||
return props
|
||||
|
||||
def _get_font_size(self, props: dict[str, str]) -> float | None:
|
||||
if props.get("font-size"):
|
||||
font_size_string = props["font-size"]
|
||||
return self._get_float_font_size_from_pt(font_size_string)
|
||||
return None
|
||||
|
||||
def _get_float_font_size_from_pt(self, font_size_string: str) -> float:
|
||||
assert font_size_string.endswith("pt")
|
||||
return float(font_size_string.rstrip("pt"))
|
||||
|
||||
def _update_other_units(self, props: dict[str, str]) -> dict[str, str]:
|
||||
font_size = self._get_font_size(props)
|
||||
# 3. TODO: resolve other font-relative units
|
||||
for side in self.SIDES:
|
||||
prop = f"border-{side}-width"
|
||||
if prop in props:
|
||||
props[prop] = self.size_to_pt(
|
||||
props[prop],
|
||||
em_pt=font_size,
|
||||
conversions=self.BORDER_WIDTH_RATIOS,
|
||||
)
|
||||
|
||||
for prop in [f"margin-{side}", f"padding-{side}"]:
|
||||
if prop in props:
|
||||
# TODO: support %
|
||||
props[prop] = self.size_to_pt(
|
||||
props[prop],
|
||||
em_pt=font_size,
|
||||
conversions=self.MARGIN_RATIOS,
|
||||
)
|
||||
return props
|
||||
|
||||
def size_to_pt(
|
||||
self, in_val: str, em_pt: float | None = None, conversions: dict = UNIT_RATIOS
|
||||
) -> str:
|
||||
def _error() -> str:
|
||||
warnings.warn(
|
||||
f"Unhandled size: {in_val!r}",
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return self.size_to_pt("1!!default", conversions=conversions)
|
||||
|
||||
match = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val)
|
||||
if match is None:
|
||||
return _error()
|
||||
|
||||
val, unit = match.groups()
|
||||
if val == "":
|
||||
# hack for 'large' etc.
|
||||
val = 1
|
||||
else:
|
||||
try:
|
||||
val = float(val)
|
||||
except ValueError:
|
||||
return _error()
|
||||
|
||||
while unit != "pt":
|
||||
if unit == "em":
|
||||
if em_pt is None:
|
||||
unit = "rem"
|
||||
else:
|
||||
val *= em_pt
|
||||
unit = "pt"
|
||||
continue
|
||||
|
||||
try:
|
||||
unit, mul = conversions[unit]
|
||||
except KeyError:
|
||||
return _error()
|
||||
val *= mul
|
||||
|
||||
val = round(val, 5)
|
||||
if int(val) == val:
|
||||
size_fmt = f"{int(val):d}pt"
|
||||
else:
|
||||
size_fmt = f"{val:f}pt"
|
||||
return size_fmt
|
||||
|
||||
def atomize(self, declarations: Iterable) -> Generator[tuple[str, str]]:
|
||||
for prop, value in declarations:
|
||||
prop = prop.lower()
|
||||
value = value.lower()
|
||||
if prop in self.CSS_EXPANSIONS:
|
||||
expand = self.CSS_EXPANSIONS[prop]
|
||||
yield from expand(self, prop, value)
|
||||
else:
|
||||
yield prop, value
|
||||
|
||||
def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
|
||||
"""
|
||||
Generates (prop, value) pairs from declarations.
|
||||
|
||||
In a future version may generate parsed tokens from tinycss/tinycss2
|
||||
|
||||
Parameters
|
||||
----------
|
||||
declarations_str : str
|
||||
"""
|
||||
for decl in declarations_str.split(";"):
|
||||
if not decl.strip():
|
||||
continue
|
||||
prop, sep, val = decl.partition(":")
|
||||
prop = prop.strip().lower()
|
||||
# TODO: don't lowercase case sensitive parts of values (strings)
|
||||
val = val.strip().lower()
|
||||
if sep:
|
||||
yield prop, val
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Ill-formatted attribute: expected a colon in {decl!r}",
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
@ -0,0 +1,336 @@
|
||||
"""
|
||||
Module for formatting output data into CSV files.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import (
|
||||
Hashable,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Sequence,
|
||||
)
|
||||
import csv as csvlib
|
||||
import os
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import writers as libwriters
|
||||
from pandas._typing import SequenceNotStr
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDatetimeIndex,
|
||||
ABCIndex,
|
||||
ABCMultiIndex,
|
||||
ABCPeriodIndex,
|
||||
)
|
||||
from pandas.core.dtypes.missing import notna
|
||||
|
||||
from pandas.core.indexes.api import Index
|
||||
|
||||
from pandas.io.common import get_handle
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
CompressionOptions,
|
||||
FilePath,
|
||||
FloatFormatType,
|
||||
IndexLabel,
|
||||
StorageOptions,
|
||||
WriteBuffer,
|
||||
npt,
|
||||
)
|
||||
|
||||
from pandas.io.formats.format import DataFrameFormatter
|
||||
|
||||
|
||||
_DEFAULT_CHUNKSIZE_CELLS = 100_000
|
||||
|
||||
|
||||
class CSVFormatter:
|
||||
cols: npt.NDArray[np.object_]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
formatter: DataFrameFormatter,
|
||||
path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] = "",
|
||||
sep: str = ",",
|
||||
cols: Sequence[Hashable] | None = None,
|
||||
index_label: IndexLabel | None = None,
|
||||
mode: str = "w",
|
||||
encoding: str | None = None,
|
||||
errors: str = "strict",
|
||||
compression: CompressionOptions = "infer",
|
||||
quoting: int | None = None,
|
||||
lineterminator: str | None = "\n",
|
||||
chunksize: int | None = None,
|
||||
quotechar: str | None = '"',
|
||||
date_format: str | None = None,
|
||||
doublequote: bool = True,
|
||||
escapechar: str | None = None,
|
||||
storage_options: StorageOptions | None = None,
|
||||
) -> None:
|
||||
self.fmt = formatter
|
||||
|
||||
self.obj = self.fmt.frame
|
||||
|
||||
self.filepath_or_buffer = path_or_buf
|
||||
self.encoding = encoding
|
||||
self.compression: CompressionOptions = compression
|
||||
self.mode = mode
|
||||
self.storage_options = storage_options
|
||||
|
||||
self.sep = sep
|
||||
self.index_label = self._initialize_index_label(index_label)
|
||||
self.errors = errors
|
||||
self.quoting = quoting or csvlib.QUOTE_MINIMAL
|
||||
self.doublequote = doublequote
|
||||
self.escapechar = escapechar
|
||||
self.quotechar = self._initialize_quotechar(quotechar)
|
||||
self.lineterminator = lineterminator or os.linesep
|
||||
self.date_format = date_format
|
||||
self.cols = self._initialize_columns(cols)
|
||||
self.chunksize = self._initialize_chunksize(chunksize)
|
||||
|
||||
@property
|
||||
def na_rep(self) -> str:
|
||||
return self.fmt.na_rep
|
||||
|
||||
@property
|
||||
def float_format(self) -> FloatFormatType | None:
|
||||
return self.fmt.float_format
|
||||
|
||||
@property
|
||||
def decimal(self) -> str:
|
||||
return self.fmt.decimal
|
||||
|
||||
@property
|
||||
def header(self) -> bool | SequenceNotStr[str]:
|
||||
return self.fmt.header
|
||||
|
||||
@property
|
||||
def index(self) -> bool:
|
||||
return self.fmt.index
|
||||
|
||||
def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel:
|
||||
if index_label is not False:
|
||||
if index_label is None:
|
||||
return self._get_index_label_from_obj()
|
||||
elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndex)):
|
||||
# given a string for a DF with Index
|
||||
return [index_label]
|
||||
return index_label
|
||||
|
||||
def _get_index_label_from_obj(self) -> Sequence[Hashable]:
|
||||
if isinstance(self.obj.index, ABCMultiIndex):
|
||||
return self._get_index_label_multiindex()
|
||||
else:
|
||||
return self._get_index_label_flat()
|
||||
|
||||
def _get_index_label_multiindex(self) -> Sequence[Hashable]:
|
||||
return [name or "" for name in self.obj.index.names]
|
||||
|
||||
def _get_index_label_flat(self) -> Sequence[Hashable]:
|
||||
index_label = self.obj.index.name
|
||||
return [""] if index_label is None else [index_label]
|
||||
|
||||
def _initialize_quotechar(self, quotechar: str | None) -> str | None:
|
||||
if self.quoting != csvlib.QUOTE_NONE or self.escapechar is not None:
|
||||
# prevents crash in _csv
|
||||
return quotechar
|
||||
return None
|
||||
|
||||
@property
|
||||
def has_mi_columns(self) -> bool:
|
||||
return bool(isinstance(self.obj.columns, ABCMultiIndex))
|
||||
|
||||
def _initialize_columns(
|
||||
self, cols: Iterable[Hashable] | None
|
||||
) -> npt.NDArray[np.object_]:
|
||||
# validate mi options
|
||||
if self.has_mi_columns:
|
||||
if cols is not None:
|
||||
msg = "cannot specify cols with a MultiIndex on the columns"
|
||||
raise TypeError(msg)
|
||||
|
||||
if cols is not None:
|
||||
if isinstance(cols, ABCIndex):
|
||||
cols = cols._get_values_for_csv(**self._number_format)
|
||||
else:
|
||||
cols = list(cols)
|
||||
self.obj = self.obj.loc[:, cols]
|
||||
|
||||
# update columns to include possible multiplicity of dupes
|
||||
# and make sure cols is just a list of labels
|
||||
new_cols = self.obj.columns
|
||||
return new_cols._get_values_for_csv(**self._number_format)
|
||||
|
||||
def _initialize_chunksize(self, chunksize: int | None) -> int:
|
||||
if chunksize is None:
|
||||
return (_DEFAULT_CHUNKSIZE_CELLS // (len(self.cols) or 1)) or 1
|
||||
return int(chunksize)
|
||||
|
||||
@property
|
||||
def _number_format(self) -> dict[str, Any]:
|
||||
"""Dictionary used for storing number formatting settings."""
|
||||
return {
|
||||
"na_rep": self.na_rep,
|
||||
"float_format": self.float_format,
|
||||
"date_format": self.date_format,
|
||||
"quoting": self.quoting,
|
||||
"decimal": self.decimal,
|
||||
}
|
||||
|
||||
@cache_readonly
|
||||
def data_index(self) -> Index:
|
||||
data_index = self.obj.index
|
||||
if (
|
||||
isinstance(data_index, (ABCDatetimeIndex, ABCPeriodIndex))
|
||||
and self.date_format is not None
|
||||
):
|
||||
data_index = Index(
|
||||
[x.strftime(self.date_format) if notna(x) else "" for x in data_index]
|
||||
)
|
||||
elif isinstance(data_index, ABCMultiIndex):
|
||||
data_index = data_index.remove_unused_levels()
|
||||
return data_index
|
||||
|
||||
@property
|
||||
def nlevels(self) -> int:
|
||||
if self.index:
|
||||
return getattr(self.data_index, "nlevels", 1)
|
||||
else:
|
||||
return 0
|
||||
|
||||
@property
|
||||
def _has_aliases(self) -> bool:
|
||||
return isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
|
||||
|
||||
@property
|
||||
def _need_to_save_header(self) -> bool:
|
||||
return bool(self._has_aliases or self.header)
|
||||
|
||||
@property
|
||||
def write_cols(self) -> SequenceNotStr[Hashable]:
|
||||
if self._has_aliases:
|
||||
assert not isinstance(self.header, bool)
|
||||
if len(self.header) != len(self.cols):
|
||||
raise ValueError(
|
||||
f"Writing {len(self.cols)} cols but got {len(self.header)} aliases"
|
||||
)
|
||||
return self.header
|
||||
else:
|
||||
# self.cols is an ndarray derived from Index._get_values_for_csv,
|
||||
# so its entries are strings, i.e. hashable
|
||||
return cast(SequenceNotStr[Hashable], self.cols)
|
||||
|
||||
@property
|
||||
def encoded_labels(self) -> list[Hashable]:
|
||||
encoded_labels: list[Hashable] = []
|
||||
|
||||
if self.index and self.index_label:
|
||||
assert isinstance(self.index_label, Sequence)
|
||||
encoded_labels = list(self.index_label)
|
||||
|
||||
if not self.has_mi_columns or self._has_aliases:
|
||||
encoded_labels += list(self.write_cols)
|
||||
|
||||
return encoded_labels
|
||||
|
||||
def save(self) -> None:
|
||||
"""
|
||||
Create the writer & save.
|
||||
"""
|
||||
# apply compression and byte/text conversion
|
||||
with get_handle(
|
||||
self.filepath_or_buffer,
|
||||
self.mode,
|
||||
encoding=self.encoding,
|
||||
errors=self.errors,
|
||||
compression=self.compression,
|
||||
storage_options=self.storage_options,
|
||||
) as handles:
|
||||
# Note: self.encoding is irrelevant here
|
||||
# error: Argument "quoting" to "writer" has incompatible type "int";
|
||||
# expected "Literal[0, 1, 2, 3]"
|
||||
self.writer = csvlib.writer(
|
||||
handles.handle,
|
||||
lineterminator=self.lineterminator,
|
||||
delimiter=self.sep,
|
||||
quoting=self.quoting, # type: ignore[arg-type]
|
||||
doublequote=self.doublequote,
|
||||
escapechar=self.escapechar,
|
||||
quotechar=self.quotechar,
|
||||
)
|
||||
|
||||
self._save()
|
||||
|
||||
def _save(self) -> None:
|
||||
if self._need_to_save_header:
|
||||
self._save_header()
|
||||
self._save_body()
|
||||
|
||||
def _save_header(self) -> None:
|
||||
if not self.has_mi_columns or self._has_aliases:
|
||||
self.writer.writerow(self.encoded_labels)
|
||||
else:
|
||||
for row in self._generate_multiindex_header_rows():
|
||||
self.writer.writerow(row)
|
||||
|
||||
def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]:
|
||||
columns = self.obj.columns
|
||||
for i in range(columns.nlevels):
|
||||
# we need at least 1 index column to write our col names
|
||||
col_line = []
|
||||
if self.index:
|
||||
# name is the first column
|
||||
col_line.append(columns.names[i])
|
||||
|
||||
if isinstance(self.index_label, list) and len(self.index_label) > 1:
|
||||
col_line.extend([""] * (len(self.index_label) - 1))
|
||||
|
||||
col_line.extend(columns._get_level_values(i))
|
||||
yield col_line
|
||||
|
||||
# Write out the index line if it's not empty.
|
||||
# Otherwise, we will print out an extraneous
|
||||
# blank line between the mi and the data rows.
|
||||
if self.encoded_labels and set(self.encoded_labels) != {""}:
|
||||
yield self.encoded_labels + [""] * len(columns)
|
||||
|
||||
def _save_body(self) -> None:
|
||||
nrows = len(self.data_index)
|
||||
chunks = (nrows // self.chunksize) + 1
|
||||
for i in range(chunks):
|
||||
start_i = i * self.chunksize
|
||||
end_i = min(start_i + self.chunksize, nrows)
|
||||
if start_i >= end_i:
|
||||
break
|
||||
self._save_chunk(start_i, end_i)
|
||||
|
||||
def _save_chunk(self, start_i: int, end_i: int) -> None:
|
||||
# create the data for a chunk
|
||||
slicer = slice(start_i, end_i)
|
||||
df = self.obj.iloc[slicer]
|
||||
|
||||
res = df._get_values_for_csv(**self._number_format)
|
||||
data = list(res._iter_column_arrays())
|
||||
|
||||
ix = (
|
||||
self.data_index[slicer]._get_values_for_csv(**self._number_format)
|
||||
if self.nlevels != 0
|
||||
else np.empty(end_i - start_i)
|
||||
)
|
||||
libwriters.write_csv_rows(
|
||||
data,
|
||||
ix,
|
||||
self.nlevels,
|
||||
self.cols,
|
||||
self.writer,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,657 @@
|
||||
"""
|
||||
Module for formatting output data in HTML.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Final,
|
||||
cast,
|
||||
)
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas._libs import lib
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
option_context,
|
||||
)
|
||||
|
||||
from pandas.io.common import is_url
|
||||
from pandas.io.formats.format import (
|
||||
DataFrameFormatter,
|
||||
get_level_lengths,
|
||||
)
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Hashable,
|
||||
Iterable,
|
||||
Mapping,
|
||||
)
|
||||
|
||||
|
||||
class HTMLFormatter:
|
||||
"""
|
||||
Internal class for formatting output data in html.
|
||||
This class is intended for shared functionality between
|
||||
DataFrame.to_html() and DataFrame._repr_html_().
|
||||
Any logic in common with other output formatting methods
|
||||
should ideally be inherited from classes in format.py
|
||||
and this class responsible for only producing html markup.
|
||||
"""
|
||||
|
||||
indent_delta: Final = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
formatter: DataFrameFormatter,
|
||||
classes: str | list[str] | tuple[str, ...] | None = None,
|
||||
border: int | bool | None = None,
|
||||
table_id: str | None = None,
|
||||
render_links: bool = False,
|
||||
) -> None:
|
||||
self.fmt = formatter
|
||||
self.classes = classes
|
||||
|
||||
self.frame = self.fmt.frame
|
||||
self.columns = self.fmt.tr_frame.columns
|
||||
self.elements: list[str] = []
|
||||
self.bold_rows = self.fmt.bold_rows
|
||||
self.escape = self.fmt.escape
|
||||
self.show_dimensions = self.fmt.show_dimensions
|
||||
if border is None or border is True:
|
||||
border = cast(int, get_option("display.html.border"))
|
||||
elif not border:
|
||||
border = None
|
||||
|
||||
self.border = border
|
||||
self.table_id = table_id
|
||||
self.render_links = render_links
|
||||
|
||||
self.col_space = {}
|
||||
is_multi_index = isinstance(self.columns, MultiIndex)
|
||||
for column, value in self.fmt.col_space.items():
|
||||
col_space_value = f"{value}px" if isinstance(value, int) else value
|
||||
self.col_space[column] = col_space_value
|
||||
# GH 53885: Handling case where column is index
|
||||
# Flatten the data in the multi index and add in the map
|
||||
if is_multi_index and isinstance(column, tuple):
|
||||
for column_index in column:
|
||||
self.col_space[str(column_index)] = col_space_value
|
||||
|
||||
def to_string(self) -> str:
|
||||
lines = self.render()
|
||||
if any(isinstance(x, str) for x in lines):
|
||||
lines = [str(x) for x in lines]
|
||||
return "\n".join(lines)
|
||||
|
||||
def render(self) -> list[str]:
|
||||
self._write_table()
|
||||
|
||||
if self.should_show_dimensions:
|
||||
by = chr(215) # × # noqa: RUF003
|
||||
self.write(
|
||||
f"<p>{len(self.frame)} rows {by} {len(self.frame.columns)} columns</p>"
|
||||
)
|
||||
|
||||
return self.elements
|
||||
|
||||
@property
|
||||
def should_show_dimensions(self) -> bool:
|
||||
return self.fmt.should_show_dimensions
|
||||
|
||||
@property
|
||||
def show_row_idx_names(self) -> bool:
|
||||
return self.fmt.show_row_idx_names
|
||||
|
||||
@property
|
||||
def show_col_idx_names(self) -> bool:
|
||||
return self.fmt.show_col_idx_names
|
||||
|
||||
@property
|
||||
def row_levels(self) -> int:
|
||||
if self.fmt.index:
|
||||
# showing (row) index
|
||||
return self.frame.index.nlevels
|
||||
elif self.show_col_idx_names:
|
||||
# see gh-22579
|
||||
# Column misalignment also occurs for
|
||||
# a standard index when the columns index is named.
|
||||
# If the row index is not displayed a column of
|
||||
# blank cells need to be included before the DataFrame values.
|
||||
return 1
|
||||
# not showing (row) index
|
||||
return 0
|
||||
|
||||
def _get_columns_formatted_values(self) -> Iterable:
|
||||
return self.columns
|
||||
|
||||
@property
|
||||
def is_truncated(self) -> bool:
|
||||
return self.fmt.is_truncated
|
||||
|
||||
@property
|
||||
def ncols(self) -> int:
|
||||
return len(self.fmt.tr_frame.columns)
|
||||
|
||||
def write(self, s: Any, indent: int = 0) -> None:
|
||||
rs = pprint_thing(s)
|
||||
self.elements.append(" " * indent + rs)
|
||||
|
||||
def write_th(
|
||||
self, s: Any, header: bool = False, indent: int = 0, tags: str | None = None
|
||||
) -> None:
|
||||
"""
|
||||
Method for writing a formatted <th> cell.
|
||||
|
||||
If col_space is set on the formatter then that is used for
|
||||
the value of min-width.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s : object
|
||||
The data to be written inside the cell.
|
||||
header : bool, default False
|
||||
Set to True if the <th> is for use inside <thead>. This will
|
||||
cause min-width to be set if there is one.
|
||||
indent : int, default 0
|
||||
The indentation level of the cell.
|
||||
tags : str, default None
|
||||
Tags to include in the cell.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A written <th> cell.
|
||||
"""
|
||||
col_space = self.col_space.get(s, None)
|
||||
|
||||
if header and col_space is not None:
|
||||
tags = tags or ""
|
||||
tags += f'style="min-width: {col_space};"'
|
||||
|
||||
self._write_cell(s, kind="th", indent=indent, tags=tags)
|
||||
|
||||
def write_td(self, s: Any, indent: int = 0, tags: str | None = None) -> None:
|
||||
self._write_cell(s, kind="td", indent=indent, tags=tags)
|
||||
|
||||
def _write_cell(
|
||||
self, s: Any, kind: str = "td", indent: int = 0, tags: str | None = None
|
||||
) -> None:
|
||||
if tags is not None:
|
||||
start_tag = f"<{kind} {tags}>"
|
||||
else:
|
||||
start_tag = f"<{kind}>"
|
||||
|
||||
if self.escape:
|
||||
# escape & first to prevent double escaping of &
|
||||
esc = {"&": r"&", "<": r"<", ">": r">"}
|
||||
else:
|
||||
esc = {}
|
||||
|
||||
rs = pprint_thing(s, escape_chars=esc).strip()
|
||||
# replace spaces betweens strings with non-breaking spaces
|
||||
rs = rs.replace(" ", " ")
|
||||
|
||||
if self.render_links and is_url(rs):
|
||||
rs_unescaped = pprint_thing(s, escape_chars={}).strip()
|
||||
start_tag += f'<a href="{rs_unescaped}" target="_blank">'
|
||||
end_a = "</a>"
|
||||
else:
|
||||
end_a = ""
|
||||
|
||||
self.write(f"{start_tag}{rs}{end_a}</{kind}>", indent)
|
||||
|
||||
def write_tr(
|
||||
self,
|
||||
line: Iterable,
|
||||
indent: int = 0,
|
||||
indent_delta: int = 0,
|
||||
header: bool = False,
|
||||
align: str | None = None,
|
||||
tags: dict[int, str] | None = None,
|
||||
nindex_levels: int = 0,
|
||||
) -> None:
|
||||
if tags is None:
|
||||
tags = {}
|
||||
|
||||
if align is None:
|
||||
self.write("<tr>", indent)
|
||||
else:
|
||||
self.write(f'<tr style="text-align: {align};">', indent)
|
||||
indent += indent_delta
|
||||
|
||||
for i, s in enumerate(line):
|
||||
val_tag = tags.get(i, None)
|
||||
if header or (self.bold_rows and i < nindex_levels):
|
||||
self.write_th(s, indent=indent, header=header, tags=val_tag)
|
||||
else:
|
||||
self.write_td(s, indent, tags=val_tag)
|
||||
|
||||
indent -= indent_delta
|
||||
self.write("</tr>", indent)
|
||||
|
||||
def _write_table(self, indent: int = 0) -> None:
|
||||
_classes = ["dataframe"] # Default class.
|
||||
use_mathjax = get_option("display.html.use_mathjax")
|
||||
if not use_mathjax:
|
||||
_classes.append("tex2jax_ignore")
|
||||
_classes.append("mathjax_ignore")
|
||||
if self.classes is not None:
|
||||
if isinstance(self.classes, str):
|
||||
self.classes = self.classes.split()
|
||||
if not isinstance(self.classes, (list, tuple)):
|
||||
raise TypeError(
|
||||
"classes must be a string, list, "
|
||||
f"or tuple, not {type(self.classes)}"
|
||||
)
|
||||
_classes.extend(self.classes)
|
||||
|
||||
if self.table_id is None:
|
||||
id_section = ""
|
||||
else:
|
||||
id_section = f' id="{self.table_id}"'
|
||||
|
||||
if self.border is None:
|
||||
border_attr = ""
|
||||
else:
|
||||
border_attr = f' border="{self.border}"'
|
||||
|
||||
self.write(
|
||||
f'<table{border_attr} class="{" ".join(_classes)}"{id_section}>',
|
||||
indent,
|
||||
)
|
||||
|
||||
if self.fmt.header or self.show_row_idx_names:
|
||||
self._write_header(indent + self.indent_delta)
|
||||
|
||||
self._write_body(indent + self.indent_delta)
|
||||
|
||||
self.write("</table>", indent)
|
||||
|
||||
def _write_col_header(self, indent: int) -> None:
|
||||
row: list[Hashable]
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
if isinstance(self.columns, MultiIndex):
|
||||
template = 'colspan="{span:d}" halign="left"'
|
||||
|
||||
sentinel: lib.NoDefault | bool
|
||||
if self.fmt.sparsify:
|
||||
# GH3547
|
||||
sentinel = lib.no_default
|
||||
else:
|
||||
sentinel = False
|
||||
levels = self.columns._format_multi(sparsify=sentinel, include_names=False)
|
||||
level_lengths = get_level_lengths(levels, sentinel)
|
||||
inner_lvl = len(level_lengths) - 1
|
||||
for lnum, (records, values) in enumerate(
|
||||
zip(level_lengths, levels, strict=True)
|
||||
):
|
||||
if is_truncated_horizontally:
|
||||
# modify the header lines
|
||||
ins_col = self.fmt.tr_col_num
|
||||
if self.fmt.sparsify:
|
||||
recs_new = {}
|
||||
# Increment tags after ... col.
|
||||
for tag, span in list(records.items()):
|
||||
if tag >= ins_col:
|
||||
recs_new[tag + 1] = span
|
||||
elif tag + span > ins_col:
|
||||
recs_new[tag] = span + 1
|
||||
if lnum == inner_lvl:
|
||||
values = (
|
||||
*values[:ins_col],
|
||||
"...",
|
||||
*values[ins_col:],
|
||||
)
|
||||
else:
|
||||
# sparse col headers do not receive a ...
|
||||
values = (
|
||||
*values[:ins_col],
|
||||
values[ins_col - 1],
|
||||
*values[ins_col:],
|
||||
)
|
||||
else:
|
||||
recs_new[tag] = span
|
||||
# if ins_col lies between tags, all col headers
|
||||
# get ...
|
||||
if tag + span == ins_col:
|
||||
recs_new[ins_col] = 1
|
||||
values = (*values[:ins_col], "...", *values[ins_col:])
|
||||
records = recs_new
|
||||
inner_lvl = len(level_lengths) - 1
|
||||
if lnum == inner_lvl:
|
||||
records[ins_col] = 1
|
||||
else:
|
||||
recs_new = {}
|
||||
for tag, span in list(records.items()):
|
||||
if tag >= ins_col:
|
||||
recs_new[tag + 1] = span
|
||||
else:
|
||||
recs_new[tag] = span
|
||||
recs_new[ins_col] = 1
|
||||
records = recs_new
|
||||
values = [*values[:ins_col], "...", *values[ins_col:]]
|
||||
|
||||
# see gh-22579
|
||||
# Column Offset Bug with to_html(index=False) with
|
||||
# MultiIndex Columns and Index.
|
||||
# Initially fill row with blank cells before column names.
|
||||
# TODO: Refactor to remove code duplication with code
|
||||
# block below for standard columns index.
|
||||
row = [""] * (self.row_levels - 1)
|
||||
if self.fmt.index or self.show_col_idx_names:
|
||||
# see gh-22747
|
||||
# If to_html(index_names=False) do not show columns
|
||||
# index names.
|
||||
# TODO: Refactor to use _get_column_name_list from
|
||||
# DataFrameFormatter class and create a
|
||||
# _get_formatted_column_labels function for code
|
||||
# parity with DataFrameFormatter class.
|
||||
if self.fmt.show_index_names:
|
||||
name = self.columns.names[lnum]
|
||||
row.append(pprint_thing(name or ""))
|
||||
else:
|
||||
row.append("")
|
||||
|
||||
tags = {}
|
||||
j = len(row)
|
||||
for i, v in enumerate(values):
|
||||
if i in records:
|
||||
if records[i] > 1:
|
||||
tags[j] = template.format(span=records[i])
|
||||
else:
|
||||
continue
|
||||
j += 1
|
||||
row.append(v)
|
||||
self.write_tr(row, indent, self.indent_delta, tags=tags, header=True)
|
||||
else:
|
||||
# see gh-22579
|
||||
# Column misalignment also occurs for
|
||||
# a standard index when the columns index is named.
|
||||
# Initially fill row with blank cells before column names.
|
||||
# TODO: Refactor to remove code duplication with code block
|
||||
# above for columns MultiIndex.
|
||||
row = [""] * (self.row_levels - 1)
|
||||
if self.fmt.index or self.show_col_idx_names:
|
||||
# see gh-22747
|
||||
# If to_html(index_names=False) do not show columns
|
||||
# index names.
|
||||
# TODO: Refactor to use _get_column_name_list from
|
||||
# DataFrameFormatter class.
|
||||
if self.fmt.show_index_names:
|
||||
row.append(self.columns.name or "")
|
||||
else:
|
||||
row.append("")
|
||||
row.extend(self._get_columns_formatted_values())
|
||||
align = self.fmt.justify
|
||||
|
||||
if is_truncated_horizontally:
|
||||
ins_col = self.row_levels + self.fmt.tr_col_num
|
||||
row.insert(ins_col, "...")
|
||||
|
||||
self.write_tr(row, indent, self.indent_delta, header=True, align=align)
|
||||
|
||||
def _write_row_header(self, indent: int) -> None:
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
row = [x if x is not None else "" for x in self.frame.index.names] + [""] * (
|
||||
self.ncols + (1 if is_truncated_horizontally else 0)
|
||||
)
|
||||
self.write_tr(row, indent, self.indent_delta, header=True)
|
||||
|
||||
def _write_header(self, indent: int) -> None:
|
||||
self.write("<thead>", indent)
|
||||
|
||||
if self.fmt.header:
|
||||
self._write_col_header(indent + self.indent_delta)
|
||||
|
||||
if self.show_row_idx_names:
|
||||
self._write_row_header(indent + self.indent_delta)
|
||||
|
||||
self.write("</thead>", indent)
|
||||
|
||||
def _get_formatted_values(self) -> dict[int, list[str]]:
|
||||
with option_context("display.max_colwidth", None):
|
||||
fmt_values = {i: self.fmt.format_col(i) for i in range(self.ncols)}
|
||||
return fmt_values
|
||||
|
||||
def _write_body(self, indent: int) -> None:
|
||||
self.write("<tbody>", indent)
|
||||
fmt_values = self._get_formatted_values()
|
||||
|
||||
# write values
|
||||
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
|
||||
self._write_hierarchical_rows(fmt_values, indent + self.indent_delta)
|
||||
else:
|
||||
self._write_regular_rows(fmt_values, indent + self.indent_delta)
|
||||
|
||||
self.write("</tbody>", indent)
|
||||
|
||||
def _write_regular_rows(
|
||||
self, fmt_values: Mapping[int, list[str]], indent: int
|
||||
) -> None:
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
is_truncated_vertically = self.fmt.is_truncated_vertically
|
||||
|
||||
nrows = len(self.fmt.tr_frame)
|
||||
|
||||
if self.fmt.index:
|
||||
fmt = self.fmt._get_formatter("__index__")
|
||||
if fmt is not None:
|
||||
index_values = self.fmt.tr_frame.index.map(fmt)
|
||||
else:
|
||||
# only reached with non-Multi index
|
||||
index_values = self.fmt.tr_frame.index._format_flat(include_name=False)
|
||||
|
||||
row: list[str] = []
|
||||
for i in range(nrows):
|
||||
if is_truncated_vertically and i == (self.fmt.tr_row_num):
|
||||
str_sep_row = ["..."] * len(row)
|
||||
self.write_tr(
|
||||
str_sep_row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=None,
|
||||
nindex_levels=self.row_levels,
|
||||
)
|
||||
|
||||
row = []
|
||||
if self.fmt.index:
|
||||
row.append(index_values[i])
|
||||
# see gh-22579
|
||||
# Column misalignment also occurs for
|
||||
# a standard index when the columns index is named.
|
||||
# Add blank cell before data cells.
|
||||
elif self.show_col_idx_names:
|
||||
row.append("")
|
||||
row.extend(fmt_values[j][i] for j in range(self.ncols))
|
||||
|
||||
if is_truncated_horizontally:
|
||||
dot_col_ix = self.fmt.tr_col_num + self.row_levels
|
||||
row.insert(dot_col_ix, "...")
|
||||
self.write_tr(
|
||||
row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels
|
||||
)
|
||||
|
||||
def _write_hierarchical_rows(
|
||||
self, fmt_values: Mapping[int, list[str]], indent: int
|
||||
) -> None:
|
||||
template = 'rowspan="{span}" valign="top"'
|
||||
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
is_truncated_vertically = self.fmt.is_truncated_vertically
|
||||
frame = self.fmt.tr_frame
|
||||
nrows = len(frame)
|
||||
|
||||
assert isinstance(frame.index, MultiIndex)
|
||||
idx_values = frame.index._format_multi(sparsify=False, include_names=False)
|
||||
idx_values = list(zip(*idx_values, strict=True))
|
||||
|
||||
if self.fmt.sparsify:
|
||||
# GH3547
|
||||
sentinel = lib.no_default
|
||||
levels = frame.index._format_multi(sparsify=sentinel, include_names=False)
|
||||
|
||||
level_lengths = get_level_lengths(levels, sentinel)
|
||||
inner_lvl = len(level_lengths) - 1
|
||||
if is_truncated_vertically:
|
||||
# Insert ... row and adjust idx_values and
|
||||
# level_lengths to take this into account.
|
||||
ins_row = self.fmt.tr_row_num
|
||||
inserted = False
|
||||
for lnum, records in enumerate(level_lengths):
|
||||
rec_new = {}
|
||||
for tag, span in list(records.items()):
|
||||
if tag >= ins_row:
|
||||
rec_new[tag + 1] = span
|
||||
elif tag + span > ins_row:
|
||||
rec_new[tag] = span + 1
|
||||
|
||||
# GH 14882 - Make sure insertion done once
|
||||
if not inserted:
|
||||
dot_row = list(idx_values[ins_row - 1])
|
||||
dot_row[-1] = "..."
|
||||
idx_values.insert(ins_row, tuple(dot_row))
|
||||
inserted = True
|
||||
else:
|
||||
dot_row = list(idx_values[ins_row])
|
||||
dot_row[inner_lvl - lnum] = "..."
|
||||
idx_values[ins_row] = tuple(dot_row)
|
||||
else:
|
||||
rec_new[tag] = span
|
||||
# If ins_row lies between tags, all cols idx cols
|
||||
# receive ...
|
||||
if tag + span == ins_row:
|
||||
rec_new[ins_row] = 1
|
||||
if lnum == 0:
|
||||
idx_values.insert(
|
||||
ins_row, tuple(["..."] * len(level_lengths))
|
||||
)
|
||||
|
||||
# GH 14882 - Place ... in correct level
|
||||
elif inserted:
|
||||
dot_row = list(idx_values[ins_row])
|
||||
dot_row[inner_lvl - lnum] = "..."
|
||||
idx_values[ins_row] = tuple(dot_row)
|
||||
level_lengths[lnum] = rec_new
|
||||
|
||||
level_lengths[inner_lvl][ins_row] = 1
|
||||
for ix_col in fmt_values:
|
||||
fmt_values[ix_col].insert(ins_row, "...")
|
||||
nrows += 1
|
||||
|
||||
for i in range(nrows):
|
||||
row = []
|
||||
tags = {}
|
||||
|
||||
sparse_offset = 0
|
||||
j = 0
|
||||
for records, v in zip(level_lengths, idx_values[i], strict=True):
|
||||
if i in records:
|
||||
if records[i] > 1:
|
||||
tags[j] = template.format(span=records[i])
|
||||
else:
|
||||
sparse_offset += 1
|
||||
continue
|
||||
|
||||
j += 1
|
||||
row.append(v)
|
||||
|
||||
row.extend(fmt_values[j][i] for j in range(self.ncols))
|
||||
if is_truncated_horizontally:
|
||||
row.insert(
|
||||
self.row_levels - sparse_offset + self.fmt.tr_col_num, "..."
|
||||
)
|
||||
self.write_tr(
|
||||
row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=tags,
|
||||
nindex_levels=len(levels) - sparse_offset,
|
||||
)
|
||||
else:
|
||||
row = []
|
||||
for i in range(len(frame)):
|
||||
if is_truncated_vertically and i == (self.fmt.tr_row_num):
|
||||
str_sep_row = ["..."] * len(row)
|
||||
self.write_tr(
|
||||
str_sep_row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=None,
|
||||
nindex_levels=self.row_levels,
|
||||
)
|
||||
|
||||
idx_values = list(
|
||||
zip(
|
||||
*frame.index._format_multi(sparsify=False, include_names=False),
|
||||
strict=True,
|
||||
)
|
||||
)
|
||||
row = []
|
||||
row.extend(idx_values[i])
|
||||
row.extend(fmt_values[j][i] for j in range(self.ncols))
|
||||
if is_truncated_horizontally:
|
||||
row.insert(self.row_levels + self.fmt.tr_col_num, "...")
|
||||
self.write_tr(
|
||||
row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=None,
|
||||
nindex_levels=frame.index.nlevels,
|
||||
)
|
||||
|
||||
|
||||
class NotebookFormatter(HTMLFormatter):
|
||||
"""
|
||||
Internal class for formatting output data in html for display in Jupyter
|
||||
Notebooks. This class is intended for functionality specific to
|
||||
DataFrame._repr_html_() and DataFrame.to_html(notebook=True)
|
||||
"""
|
||||
|
||||
def _get_formatted_values(self) -> dict[int, list[str]]:
|
||||
return {i: self.fmt.format_col(i) for i in range(self.ncols)}
|
||||
|
||||
def _get_columns_formatted_values(self) -> list[str]:
|
||||
# only reached with non-Multi Index
|
||||
return self.columns._format_flat(include_name=False)
|
||||
|
||||
def write_style(self) -> None:
|
||||
# We use the "scoped" attribute here so that the desired
|
||||
# style properties for the data frame are not then applied
|
||||
# throughout the entire notebook.
|
||||
template_first = """\
|
||||
<style scoped>"""
|
||||
template_last = """\
|
||||
</style>"""
|
||||
template_select = """\
|
||||
.dataframe %s {
|
||||
%s: %s;
|
||||
}"""
|
||||
element_props = [
|
||||
("tbody tr th:only-of-type", "vertical-align", "middle"),
|
||||
("tbody tr th", "vertical-align", "top"),
|
||||
]
|
||||
if isinstance(self.columns, MultiIndex):
|
||||
element_props.append(("thead tr th", "text-align", "left"))
|
||||
if self.show_row_idx_names:
|
||||
element_props.append(
|
||||
("thead tr:last-of-type th", "text-align", "right")
|
||||
)
|
||||
else:
|
||||
element_props.append(("thead th", "text-align", "right"))
|
||||
template_mid = "\n\n".join(template_select % t for t in element_props)
|
||||
template = dedent(f"{template_first}\n{template_mid}\n{template_last}")
|
||||
self.write(template)
|
||||
|
||||
def render(self) -> list[str]:
|
||||
self.write("<div>")
|
||||
self.write_style()
|
||||
super().render()
|
||||
self.write("</div>")
|
||||
return self.elements
|
||||
@ -0,0 +1,943 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import (
|
||||
ABC,
|
||||
abstractmethod,
|
||||
)
|
||||
import sys
|
||||
from textwrap import dedent
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas.io.formats import format as fmt
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Iterable,
|
||||
Iterator,
|
||||
Mapping,
|
||||
Sequence,
|
||||
)
|
||||
|
||||
from pandas._typing import (
|
||||
Dtype,
|
||||
WriteBuffer,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
|
||||
show_counts_sub = dedent(
|
||||
"""\
|
||||
show_counts : bool, optional
|
||||
Whether to show the non-null counts. By default, this is shown
|
||||
only if the DataFrame is smaller than
|
||||
``pandas.options.display.max_info_rows`` and
|
||||
``pandas.options.display.max_info_columns``. A value of True always
|
||||
shows the counts, and False never shows the counts."""
|
||||
)
|
||||
|
||||
series_examples_sub = dedent(
|
||||
"""\
|
||||
>>> int_values = [1, 2, 3, 4, 5]
|
||||
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
|
||||
>>> s = pd.Series(text_values, index=int_values)
|
||||
>>> s.info()
|
||||
<class 'pandas.Series'>
|
||||
Index: 5 entries, 1 to 5
|
||||
Series name: None
|
||||
Non-Null Count Dtype
|
||||
-------------- -----
|
||||
5 non-null object
|
||||
dtypes: object(1)
|
||||
memory usage: 80.0+ bytes
|
||||
|
||||
Prints a summary excluding information about its values:
|
||||
|
||||
>>> s.info(verbose=False)
|
||||
<class 'pandas.Series'>
|
||||
Index: 5 entries, 1 to 5
|
||||
dtypes: object(1)
|
||||
memory usage: 80.0+ bytes
|
||||
|
||||
Pipe output of Series.info to buffer instead of sys.stdout, get
|
||||
buffer content and writes to a text file:
|
||||
|
||||
>>> import io
|
||||
>>> buffer = io.StringIO()
|
||||
>>> s.info(buf=buffer)
|
||||
>>> s = buffer.getvalue()
|
||||
>>> with open("df_info.txt", "w",
|
||||
... encoding="utf-8") as f: # doctest: +SKIP
|
||||
... f.write(s)
|
||||
260
|
||||
|
||||
The `memory_usage` parameter allows deep introspection mode, specially
|
||||
useful for big Series and fine-tune memory optimization:
|
||||
|
||||
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
|
||||
>>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))
|
||||
>>> s.info()
|
||||
<class 'pandas.Series'>
|
||||
RangeIndex: 1000000 entries, 0 to 999999
|
||||
Series name: None
|
||||
Non-Null Count Dtype
|
||||
-------------- -----
|
||||
1000000 non-null object
|
||||
dtypes: object(1)
|
||||
memory usage: 7.6+ MB
|
||||
|
||||
>>> s.info(memory_usage='deep')
|
||||
<class 'pandas.Series'>
|
||||
RangeIndex: 1000000 entries, 0 to 999999
|
||||
Series name: None
|
||||
Non-Null Count Dtype
|
||||
-------------- -----
|
||||
1000000 non-null object
|
||||
dtypes: object(1)
|
||||
memory usage: 55.3 MB"""
|
||||
)
|
||||
|
||||
|
||||
series_see_also_sub = dedent(
|
||||
"""\
|
||||
Series.describe: Generate descriptive statistics of Series.
|
||||
Series.memory_usage: Memory usage of Series."""
|
||||
)
|
||||
series_max_cols_sub = dedent(
|
||||
"""\
|
||||
max_cols : int, optional
|
||||
Unused, exists only for compatibility with DataFrame.info."""
|
||||
)
|
||||
|
||||
|
||||
series_sub_kwargs = {
|
||||
"klass": "Series",
|
||||
"type_sub": "",
|
||||
"max_cols_sub": series_max_cols_sub,
|
||||
"show_counts_sub": show_counts_sub,
|
||||
"examples_sub": series_examples_sub,
|
||||
"see_also_sub": series_see_also_sub,
|
||||
"version_added_sub": "\n.. versionadded:: 1.4.0\n",
|
||||
}
|
||||
|
||||
|
||||
def _put_str(s: str | Dtype, space: int) -> str:
|
||||
"""
|
||||
Make string of specified length, padding to the right if necessary.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s : Union[str, Dtype]
|
||||
String to be formatted.
|
||||
space : int
|
||||
Length to force string to be of.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
String coerced to given length.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> pd.io.formats.info._put_str("panda", 6)
|
||||
'panda '
|
||||
>>> pd.io.formats.info._put_str("panda", 4)
|
||||
'pand'
|
||||
"""
|
||||
return str(s)[:space].ljust(space)
|
||||
|
||||
|
||||
def _sizeof_fmt(num: float, size_qualifier: str) -> str:
|
||||
"""
|
||||
Return size in human readable format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
num : int
|
||||
Size in bytes.
|
||||
size_qualifier : str
|
||||
Either empty, or '+' (if lower bound).
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Size in human readable format.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> _sizeof_fmt(23028, "")
|
||||
'22.5 KB'
|
||||
|
||||
>>> _sizeof_fmt(23028, "+")
|
||||
'22.5+ KB'
|
||||
"""
|
||||
for x in ["bytes", "KB", "MB", "GB", "TB"]:
|
||||
if num < 1024.0:
|
||||
return f"{num:3.1f}{size_qualifier} {x}"
|
||||
num /= 1024.0
|
||||
return f"{num:3.1f}{size_qualifier} PB"
|
||||
|
||||
|
||||
def _initialize_memory_usage(
|
||||
memory_usage: bool | str | None = None,
|
||||
) -> bool | str:
|
||||
"""Get memory usage based on inputs and display options."""
|
||||
if memory_usage is None:
|
||||
memory_usage = get_option("display.memory_usage")
|
||||
return memory_usage
|
||||
|
||||
|
||||
class _BaseInfo(ABC):
|
||||
"""
|
||||
Base class for DataFrameInfo and SeriesInfo.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : DataFrame or Series
|
||||
Either dataframe or series.
|
||||
memory_usage : bool or str, optional
|
||||
If "deep", introspect the data deeply by interrogating object dtypes
|
||||
for system-level memory consumption, and include it in the returned
|
||||
values.
|
||||
"""
|
||||
|
||||
data: DataFrame | Series
|
||||
memory_usage: bool | str
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def dtypes(self) -> Iterable[Dtype]:
|
||||
"""
|
||||
Dtypes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtypes : sequence
|
||||
Dtype of each of the DataFrame's columns (or one series column).
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def dtype_counts(self) -> Mapping[str, int]:
|
||||
"""Mapping dtype - number of counts."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def non_null_counts(self) -> list[int] | Series:
|
||||
"""Sequence of non-null counts for all columns or column (if series)."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def memory_usage_bytes(self) -> int:
|
||||
"""
|
||||
Memory usage in bytes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
memory_usage_bytes : int
|
||||
Object's total memory usage in bytes.
|
||||
"""
|
||||
|
||||
@property
|
||||
def memory_usage_string(self) -> str:
|
||||
"""Memory usage in a form of human readable string."""
|
||||
return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"
|
||||
|
||||
@property
|
||||
def size_qualifier(self) -> str:
|
||||
size_qualifier = ""
|
||||
if self.memory_usage:
|
||||
if self.memory_usage != "deep":
|
||||
# size_qualifier is just a best effort; not guaranteed to catch
|
||||
# all cases (e.g., it misses categorical data even with object
|
||||
# categories)
|
||||
if (
|
||||
"object" in self.dtype_counts
|
||||
or self.data.index._is_memory_usage_qualified
|
||||
):
|
||||
size_qualifier = "+"
|
||||
return size_qualifier
|
||||
|
||||
@abstractmethod
|
||||
def render(
|
||||
self,
|
||||
*,
|
||||
buf: WriteBuffer[str] | None,
|
||||
max_cols: int | None,
|
||||
verbose: bool | None,
|
||||
show_counts: bool | None,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class DataFrameInfo(_BaseInfo):
|
||||
"""
|
||||
Class storing dataframe-specific info.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data: DataFrame,
|
||||
memory_usage: bool | str | None = None,
|
||||
) -> None:
|
||||
self.data: DataFrame = data
|
||||
self.memory_usage = _initialize_memory_usage(memory_usage)
|
||||
|
||||
@property
|
||||
def dtype_counts(self) -> Mapping[str, int]:
|
||||
return _get_dataframe_dtype_counts(self.data)
|
||||
|
||||
@property
|
||||
def dtypes(self) -> Iterable[Dtype]:
|
||||
"""
|
||||
Dtypes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtypes
|
||||
Dtype of each of the DataFrame's columns.
|
||||
"""
|
||||
return self.data.dtypes
|
||||
|
||||
@property
|
||||
def ids(self) -> Index:
|
||||
"""
|
||||
Column names.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ids : Index
|
||||
DataFrame's column names.
|
||||
"""
|
||||
return self.data.columns
|
||||
|
||||
@property
|
||||
def col_count(self) -> int:
|
||||
"""Number of columns to be summarized."""
|
||||
return len(self.ids)
|
||||
|
||||
@property
|
||||
def non_null_counts(self) -> Series:
|
||||
"""Sequence of non-null counts for all columns or column (if series)."""
|
||||
return self.data.count()
|
||||
|
||||
@property
|
||||
def memory_usage_bytes(self) -> int:
|
||||
deep = self.memory_usage == "deep"
|
||||
return self.data.memory_usage(index=True, deep=deep).sum()
|
||||
|
||||
def render(
|
||||
self,
|
||||
*,
|
||||
buf: WriteBuffer[str] | None,
|
||||
max_cols: int | None,
|
||||
verbose: bool | None,
|
||||
show_counts: bool | None,
|
||||
) -> None:
|
||||
printer = _DataFrameInfoPrinter(
|
||||
info=self,
|
||||
max_cols=max_cols,
|
||||
verbose=verbose,
|
||||
show_counts=show_counts,
|
||||
)
|
||||
printer.to_buffer(buf)
|
||||
|
||||
|
||||
class SeriesInfo(_BaseInfo):
|
||||
"""
|
||||
Class storing series-specific info.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data: Series,
|
||||
memory_usage: bool | str | None = None,
|
||||
) -> None:
|
||||
self.data: Series = data
|
||||
self.memory_usage = _initialize_memory_usage(memory_usage)
|
||||
|
||||
def render(
|
||||
self,
|
||||
*,
|
||||
buf: WriteBuffer[str] | None = None,
|
||||
max_cols: int | None = None,
|
||||
verbose: bool | None = None,
|
||||
show_counts: bool | None = None,
|
||||
) -> None:
|
||||
if max_cols is not None:
|
||||
raise ValueError(
|
||||
"Argument `max_cols` can only be passed "
|
||||
"in DataFrame.info, not Series.info"
|
||||
)
|
||||
printer = _SeriesInfoPrinter(
|
||||
info=self,
|
||||
verbose=verbose,
|
||||
show_counts=show_counts,
|
||||
)
|
||||
printer.to_buffer(buf)
|
||||
|
||||
@property
|
||||
def non_null_counts(self) -> list[int]:
|
||||
return [self.data.count()]
|
||||
|
||||
@property
|
||||
def dtypes(self) -> Iterable[Dtype]:
|
||||
return [self.data.dtypes]
|
||||
|
||||
@property
|
||||
def dtype_counts(self) -> Mapping[str, int]:
|
||||
from pandas.core.frame import DataFrame
|
||||
|
||||
return _get_dataframe_dtype_counts(DataFrame(self.data))
|
||||
|
||||
@property
|
||||
def memory_usage_bytes(self) -> int:
|
||||
"""Memory usage in bytes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
memory_usage_bytes : int
|
||||
Object's total memory usage in bytes.
|
||||
"""
|
||||
deep = self.memory_usage == "deep"
|
||||
return self.data.memory_usage(index=True, deep=deep)
|
||||
|
||||
|
||||
class _InfoPrinterAbstract:
|
||||
"""
|
||||
Class for printing dataframe or series info.
|
||||
"""
|
||||
|
||||
def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:
|
||||
"""Save dataframe info into buffer."""
|
||||
table_builder = self._create_table_builder()
|
||||
lines = table_builder.get_lines()
|
||||
if buf is None: # pragma: no cover
|
||||
buf = sys.stdout
|
||||
fmt.buffer_put_lines(buf, lines)
|
||||
|
||||
@abstractmethod
|
||||
def _create_table_builder(self) -> _TableBuilderAbstract:
|
||||
"""Create instance of table builder."""
|
||||
|
||||
|
||||
class _DataFrameInfoPrinter(_InfoPrinterAbstract):
|
||||
"""
|
||||
Class for printing dataframe info.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
info : DataFrameInfo
|
||||
Instance of DataFrameInfo.
|
||||
max_cols : int, optional
|
||||
When to switch from the verbose to the truncated output.
|
||||
verbose : bool, optional
|
||||
Whether to print the full summary.
|
||||
show_counts : bool, optional
|
||||
Whether to show the non-null counts.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
info: DataFrameInfo,
|
||||
max_cols: int | None = None,
|
||||
verbose: bool | None = None,
|
||||
show_counts: bool | None = None,
|
||||
) -> None:
|
||||
self.info = info
|
||||
self.data = info.data
|
||||
self.verbose = verbose
|
||||
self.max_cols = self._initialize_max_cols(max_cols)
|
||||
self.show_counts = self._initialize_show_counts(show_counts)
|
||||
|
||||
@property
|
||||
def max_rows(self) -> int:
|
||||
"""Maximum info rows to be displayed."""
|
||||
return get_option("display.max_info_rows")
|
||||
|
||||
@property
|
||||
def exceeds_info_cols(self) -> bool:
|
||||
"""Check if number of columns to be summarized does not exceed maximum."""
|
||||
return bool(self.col_count > self.max_cols)
|
||||
|
||||
@property
|
||||
def exceeds_info_rows(self) -> bool:
|
||||
"""Check if number of rows to be summarized does not exceed maximum."""
|
||||
return bool(len(self.data) > self.max_rows)
|
||||
|
||||
@property
|
||||
def col_count(self) -> int:
|
||||
"""Number of columns to be summarized."""
|
||||
return self.info.col_count
|
||||
|
||||
def _initialize_max_cols(self, max_cols: int | None) -> int:
|
||||
if max_cols is None:
|
||||
return get_option("display.max_info_columns")
|
||||
return max_cols
|
||||
|
||||
def _initialize_show_counts(self, show_counts: bool | None) -> bool:
|
||||
if show_counts is None:
|
||||
return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)
|
||||
else:
|
||||
return show_counts
|
||||
|
||||
def _create_table_builder(self) -> _DataFrameTableBuilder:
|
||||
"""
|
||||
Create instance of table builder based on verbosity and display settings.
|
||||
"""
|
||||
if self.verbose:
|
||||
return _DataFrameTableBuilderVerbose(
|
||||
info=self.info,
|
||||
with_counts=self.show_counts,
|
||||
)
|
||||
elif self.verbose is False: # specifically set to False, not necessarily None
|
||||
return _DataFrameTableBuilderNonVerbose(info=self.info)
|
||||
elif self.exceeds_info_cols:
|
||||
return _DataFrameTableBuilderNonVerbose(info=self.info)
|
||||
else:
|
||||
return _DataFrameTableBuilderVerbose(
|
||||
info=self.info,
|
||||
with_counts=self.show_counts,
|
||||
)
|
||||
|
||||
|
||||
class _SeriesInfoPrinter(_InfoPrinterAbstract):
|
||||
"""Class for printing series info.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
info : SeriesInfo
|
||||
Instance of SeriesInfo.
|
||||
verbose : bool, optional
|
||||
Whether to print the full summary.
|
||||
show_counts : bool, optional
|
||||
Whether to show the non-null counts.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
info: SeriesInfo,
|
||||
verbose: bool | None = None,
|
||||
show_counts: bool | None = None,
|
||||
) -> None:
|
||||
self.info = info
|
||||
self.data = info.data
|
||||
self.verbose = verbose
|
||||
self.show_counts = self._initialize_show_counts(show_counts)
|
||||
|
||||
def _create_table_builder(self) -> _SeriesTableBuilder:
|
||||
"""
|
||||
Create instance of table builder based on verbosity.
|
||||
"""
|
||||
if self.verbose or self.verbose is None:
|
||||
return _SeriesTableBuilderVerbose(
|
||||
info=self.info,
|
||||
with_counts=self.show_counts,
|
||||
)
|
||||
else:
|
||||
return _SeriesTableBuilderNonVerbose(info=self.info)
|
||||
|
||||
def _initialize_show_counts(self, show_counts: bool | None) -> bool:
|
||||
if show_counts is None:
|
||||
return True
|
||||
else:
|
||||
return show_counts
|
||||
|
||||
|
||||
class _TableBuilderAbstract(ABC):
|
||||
"""
|
||||
Abstract builder for info table.
|
||||
"""
|
||||
|
||||
_lines: list[str]
|
||||
info: _BaseInfo
|
||||
|
||||
@abstractmethod
|
||||
def get_lines(self) -> list[str]:
|
||||
"""Product in a form of list of lines (strings)."""
|
||||
|
||||
@property
|
||||
def data(self) -> DataFrame | Series:
|
||||
return self.info.data
|
||||
|
||||
@property
|
||||
def dtypes(self) -> Iterable[Dtype]:
|
||||
"""Dtypes of each of the DataFrame's columns."""
|
||||
return self.info.dtypes
|
||||
|
||||
@property
|
||||
def dtype_counts(self) -> Mapping[str, int]:
|
||||
"""Mapping dtype - number of counts."""
|
||||
return self.info.dtype_counts
|
||||
|
||||
@property
|
||||
def display_memory_usage(self) -> bool:
|
||||
"""Whether to display memory usage."""
|
||||
return bool(self.info.memory_usage)
|
||||
|
||||
@property
|
||||
def memory_usage_string(self) -> str:
|
||||
"""Memory usage string with proper size qualifier."""
|
||||
return self.info.memory_usage_string
|
||||
|
||||
@property
|
||||
def non_null_counts(self) -> list[int] | Series:
|
||||
return self.info.non_null_counts
|
||||
|
||||
def add_object_type_line(self) -> None:
|
||||
"""Add line with string representation of dataframe to the table."""
|
||||
self._lines.append(str(type(self.data)))
|
||||
|
||||
def add_index_range_line(self) -> None:
|
||||
"""Add line with range of indices to the table."""
|
||||
self._lines.append(self.data.index._summary())
|
||||
|
||||
def add_dtypes_line(self) -> None:
|
||||
"""Add summary line with dtypes present in dataframe."""
|
||||
collected_dtypes = [
|
||||
f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())
|
||||
]
|
||||
self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")
|
||||
|
||||
|
||||
class _DataFrameTableBuilder(_TableBuilderAbstract):
|
||||
"""
|
||||
Abstract builder for dataframe info table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
info : DataFrameInfo.
|
||||
Instance of DataFrameInfo.
|
||||
"""
|
||||
|
||||
def __init__(self, *, info: DataFrameInfo) -> None:
|
||||
self.info: DataFrameInfo = info
|
||||
|
||||
def get_lines(self) -> list[str]:
|
||||
self._lines = []
|
||||
if self.col_count == 0:
|
||||
self._fill_empty_info()
|
||||
else:
|
||||
self._fill_non_empty_info()
|
||||
return self._lines
|
||||
|
||||
def _fill_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to empty dataframe."""
|
||||
self.add_object_type_line()
|
||||
self.add_index_range_line()
|
||||
self._lines.append(f"Empty {type(self.data).__name__}\n")
|
||||
|
||||
@abstractmethod
|
||||
def _fill_non_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to non-empty dataframe."""
|
||||
|
||||
@property
|
||||
def data(self) -> DataFrame:
|
||||
"""DataFrame."""
|
||||
return self.info.data
|
||||
|
||||
@property
|
||||
def ids(self) -> Index:
|
||||
"""Dataframe columns."""
|
||||
return self.info.ids
|
||||
|
||||
@property
|
||||
def col_count(self) -> int:
|
||||
"""Number of dataframe columns to be summarized."""
|
||||
return self.info.col_count
|
||||
|
||||
def add_memory_usage_line(self) -> None:
|
||||
"""Add line containing memory usage."""
|
||||
self._lines.append(f"memory usage: {self.memory_usage_string}")
|
||||
|
||||
|
||||
class _DataFrameTableBuilderNonVerbose(_DataFrameTableBuilder):
|
||||
"""
|
||||
Dataframe info table builder for non-verbose output.
|
||||
"""
|
||||
|
||||
def _fill_non_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to non-empty dataframe."""
|
||||
self.add_object_type_line()
|
||||
self.add_index_range_line()
|
||||
self.add_columns_summary_line()
|
||||
self.add_dtypes_line()
|
||||
if self.display_memory_usage:
|
||||
self.add_memory_usage_line()
|
||||
|
||||
def add_columns_summary_line(self) -> None:
|
||||
self._lines.append(self.ids._summary(name="Columns"))
|
||||
|
||||
|
||||
class _TableBuilderVerboseMixin(_TableBuilderAbstract):
|
||||
"""
|
||||
Mixin for verbose info output.
|
||||
"""
|
||||
|
||||
SPACING: str = " " * 2
|
||||
strrows: Sequence[Sequence[str]]
|
||||
gross_column_widths: Sequence[int]
|
||||
with_counts: bool
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def headers(self) -> Sequence[str]:
|
||||
"""Headers names of the columns in verbose table."""
|
||||
|
||||
@property
|
||||
def header_column_widths(self) -> Sequence[int]:
|
||||
"""Widths of header columns (only titles)."""
|
||||
return [len(col) for col in self.headers]
|
||||
|
||||
def _get_gross_column_widths(self) -> Sequence[int]:
|
||||
"""Get widths of columns containing both headers and actual content."""
|
||||
body_column_widths = self._get_body_column_widths()
|
||||
return [
|
||||
max(*widths)
|
||||
for widths in zip(
|
||||
self.header_column_widths, body_column_widths, strict=False
|
||||
)
|
||||
]
|
||||
|
||||
def _get_body_column_widths(self) -> Sequence[int]:
|
||||
"""Get widths of table content columns."""
|
||||
strcols: Sequence[Sequence[str]] = list(zip(*self.strrows, strict=True))
|
||||
return [max(len(x) for x in col) for col in strcols]
|
||||
|
||||
def _gen_rows(self) -> Iterator[Sequence[str]]:
|
||||
"""
|
||||
Generator function yielding rows content.
|
||||
|
||||
Each element represents a row comprising a sequence of strings.
|
||||
"""
|
||||
if self.with_counts:
|
||||
return self._gen_rows_with_counts()
|
||||
else:
|
||||
return self._gen_rows_without_counts()
|
||||
|
||||
@abstractmethod
|
||||
def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
|
||||
"""Iterator with string representation of body data with counts."""
|
||||
|
||||
@abstractmethod
|
||||
def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
|
||||
"""Iterator with string representation of body data without counts."""
|
||||
|
||||
def add_header_line(self) -> None:
|
||||
header_line = self.SPACING.join(
|
||||
[
|
||||
_put_str(header, col_width)
|
||||
for header, col_width in zip(
|
||||
self.headers, self.gross_column_widths, strict=True
|
||||
)
|
||||
]
|
||||
)
|
||||
self._lines.append(header_line)
|
||||
|
||||
def add_separator_line(self) -> None:
|
||||
separator_line = self.SPACING.join(
|
||||
[
|
||||
_put_str("-" * header_colwidth, gross_colwidth)
|
||||
for header_colwidth, gross_colwidth in zip(
|
||||
self.header_column_widths, self.gross_column_widths, strict=True
|
||||
)
|
||||
]
|
||||
)
|
||||
self._lines.append(separator_line)
|
||||
|
||||
def add_body_lines(self) -> None:
|
||||
for row in self.strrows:
|
||||
body_line = self.SPACING.join(
|
||||
[
|
||||
_put_str(col, gross_colwidth)
|
||||
for col, gross_colwidth in zip(
|
||||
row, self.gross_column_widths, strict=True
|
||||
)
|
||||
]
|
||||
)
|
||||
self._lines.append(body_line)
|
||||
|
||||
def _gen_non_null_counts(self) -> Iterator[str]:
|
||||
"""Iterator with string representation of non-null counts."""
|
||||
for count in self.non_null_counts:
|
||||
yield f"{count} non-null"
|
||||
|
||||
def _gen_dtypes(self) -> Iterator[str]:
|
||||
"""Iterator with string representation of column dtypes."""
|
||||
for dtype in self.dtypes:
|
||||
yield pprint_thing(dtype)
|
||||
|
||||
|
||||
class _DataFrameTableBuilderVerbose(_DataFrameTableBuilder, _TableBuilderVerboseMixin):
|
||||
"""
|
||||
Dataframe info table builder for verbose output.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
info: DataFrameInfo,
|
||||
with_counts: bool,
|
||||
) -> None:
|
||||
self.info = info
|
||||
self.with_counts = with_counts
|
||||
self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
|
||||
self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
|
||||
|
||||
def _fill_non_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to non-empty dataframe."""
|
||||
self.add_object_type_line()
|
||||
self.add_index_range_line()
|
||||
self.add_columns_summary_line()
|
||||
self.add_header_line()
|
||||
self.add_separator_line()
|
||||
self.add_body_lines()
|
||||
self.add_dtypes_line()
|
||||
if self.display_memory_usage:
|
||||
self.add_memory_usage_line()
|
||||
|
||||
@property
|
||||
def headers(self) -> Sequence[str]:
|
||||
"""Headers names of the columns in verbose table."""
|
||||
if self.with_counts:
|
||||
return [" # ", "Column", "Non-Null Count", "Dtype"]
|
||||
return [" # ", "Column", "Dtype"]
|
||||
|
||||
def add_columns_summary_line(self) -> None:
|
||||
self._lines.append(f"Data columns (total {self.col_count} columns):")
|
||||
|
||||
def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
|
||||
"""Iterator with string representation of body data without counts."""
|
||||
yield from zip(
|
||||
self._gen_line_numbers(),
|
||||
self._gen_columns(),
|
||||
self._gen_dtypes(),
|
||||
strict=True,
|
||||
)
|
||||
|
||||
def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
|
||||
"""Iterator with string representation of body data with counts."""
|
||||
yield from zip(
|
||||
self._gen_line_numbers(),
|
||||
self._gen_columns(),
|
||||
self._gen_non_null_counts(),
|
||||
self._gen_dtypes(),
|
||||
strict=True,
|
||||
)
|
||||
|
||||
def _gen_line_numbers(self) -> Iterator[str]:
|
||||
"""Iterator with string representation of column numbers."""
|
||||
for i, _ in enumerate(self.ids):
|
||||
yield f" {i}"
|
||||
|
||||
def _gen_columns(self) -> Iterator[str]:
|
||||
"""Iterator with string representation of column names."""
|
||||
for col in self.ids:
|
||||
yield pprint_thing(col)
|
||||
|
||||
|
||||
class _SeriesTableBuilder(_TableBuilderAbstract):
|
||||
"""
|
||||
Abstract builder for series info table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
info : SeriesInfo.
|
||||
Instance of SeriesInfo.
|
||||
"""
|
||||
|
||||
def __init__(self, *, info: SeriesInfo) -> None:
|
||||
self.info: SeriesInfo = info
|
||||
|
||||
def get_lines(self) -> list[str]:
|
||||
self._lines = []
|
||||
self._fill_non_empty_info()
|
||||
return self._lines
|
||||
|
||||
@property
|
||||
def data(self) -> Series:
|
||||
"""Series."""
|
||||
return self.info.data
|
||||
|
||||
def add_memory_usage_line(self) -> None:
|
||||
"""Add line containing memory usage."""
|
||||
self._lines.append(f"memory usage: {self.memory_usage_string}")
|
||||
|
||||
@abstractmethod
|
||||
def _fill_non_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to non-empty series."""
|
||||
|
||||
|
||||
class _SeriesTableBuilderNonVerbose(_SeriesTableBuilder):
|
||||
"""
|
||||
Series info table builder for non-verbose output.
|
||||
"""
|
||||
|
||||
def _fill_non_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to non-empty series."""
|
||||
self.add_object_type_line()
|
||||
self.add_index_range_line()
|
||||
self.add_dtypes_line()
|
||||
if self.display_memory_usage:
|
||||
self.add_memory_usage_line()
|
||||
|
||||
|
||||
class _SeriesTableBuilderVerbose(_SeriesTableBuilder, _TableBuilderVerboseMixin):
|
||||
"""
|
||||
Series info table builder for verbose output.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
info: SeriesInfo,
|
||||
with_counts: bool,
|
||||
) -> None:
|
||||
self.info = info
|
||||
self.with_counts = with_counts
|
||||
self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
|
||||
self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
|
||||
|
||||
def _fill_non_empty_info(self) -> None:
|
||||
"""Add lines to the info table, pertaining to non-empty series."""
|
||||
self.add_object_type_line()
|
||||
self.add_index_range_line()
|
||||
self.add_series_name_line()
|
||||
self.add_header_line()
|
||||
self.add_separator_line()
|
||||
self.add_body_lines()
|
||||
self.add_dtypes_line()
|
||||
if self.display_memory_usage:
|
||||
self.add_memory_usage_line()
|
||||
|
||||
def add_series_name_line(self) -> None:
|
||||
self._lines.append(f"Series name: {self.data.name}")
|
||||
|
||||
@property
|
||||
def headers(self) -> Sequence[str]:
|
||||
"""Headers names of the columns in verbose table."""
|
||||
if self.with_counts:
|
||||
return ["Non-Null Count", "Dtype"]
|
||||
return ["Dtype"]
|
||||
|
||||
def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
|
||||
"""Iterator with string representation of body data without counts."""
|
||||
yield from ([dtype] for dtype in self._gen_dtypes())
|
||||
|
||||
def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
|
||||
"""Iterator with string representation of body data with counts."""
|
||||
yield from zip(self._gen_non_null_counts(), self._gen_dtypes(), strict=True)
|
||||
|
||||
|
||||
def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
|
||||
"""
|
||||
Create mapping between datatypes and their number of occurrences.
|
||||
"""
|
||||
# groupby dtype.name to collect e.g. Categorical columns
|
||||
return df.dtypes.value_counts().groupby(lambda x: x.name).sum()
|
||||
@ -0,0 +1,587 @@
|
||||
"""
|
||||
Printing tools.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import (
|
||||
Callable,
|
||||
Iterable,
|
||||
Mapping,
|
||||
Sequence,
|
||||
)
|
||||
import sys
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
TypeAlias,
|
||||
TypeVar,
|
||||
)
|
||||
from unicodedata import east_asian_width
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas.core.dtypes.inference import is_sequence
|
||||
|
||||
from pandas.io.formats.console import get_console_size
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import ListLike
|
||||
EscapeChars: TypeAlias = Mapping[str, str] | Iterable[str]
|
||||
_KT = TypeVar("_KT")
|
||||
_VT = TypeVar("_VT")
|
||||
|
||||
|
||||
def adjoin(space: int, *lists: list[str], **kwargs: Any) -> str:
|
||||
"""
|
||||
Glues together two sets of strings using the amount of space requested.
|
||||
The idea is to prettify.
|
||||
|
||||
----------
|
||||
space : int
|
||||
number of spaces for padding
|
||||
lists : str
|
||||
list of str which being joined
|
||||
strlen : callable
|
||||
function used to calculate the length of each str. Needed for unicode
|
||||
handling.
|
||||
justfunc : callable
|
||||
function used to justify str. Needed for unicode handling.
|
||||
"""
|
||||
strlen = kwargs.pop("strlen", len)
|
||||
justfunc = kwargs.pop("justfunc", _adj_justify)
|
||||
|
||||
newLists = []
|
||||
lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
|
||||
# not the last one
|
||||
lengths.append(max(map(len, lists[-1])))
|
||||
maxLen = max(map(len, lists))
|
||||
for i, lst in enumerate(lists):
|
||||
nl = justfunc(lst, lengths[i], mode="left")
|
||||
nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
|
||||
newLists.append(nl)
|
||||
toJoin = zip(*newLists, strict=True)
|
||||
return "\n".join("".join(lines) for lines in toJoin)
|
||||
|
||||
|
||||
def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
|
||||
"""
|
||||
Perform ljust, center, rjust against string or list-like
|
||||
"""
|
||||
if mode == "left":
|
||||
return [x.ljust(max_len) for x in texts]
|
||||
elif mode == "center":
|
||||
return [x.center(max_len) for x in texts]
|
||||
else:
|
||||
return [x.rjust(max_len) for x in texts]
|
||||
|
||||
|
||||
# Unicode consolidation
|
||||
# ---------------------
|
||||
#
|
||||
# pprinting utility functions for generating Unicode text or
|
||||
# bytes(3.x)/str(2.x) representations of objects.
|
||||
# Try to use these as much as possible rather than rolling your own.
|
||||
#
|
||||
# When to use
|
||||
# -----------
|
||||
#
|
||||
# 1) If you're writing code internal to pandas (no I/O directly involved),
|
||||
# use pprint_thing().
|
||||
#
|
||||
# It will always return unicode text which can handled by other
|
||||
# parts of the package without breakage.
|
||||
#
|
||||
# 2) if you need to write something out to file, use
|
||||
# pprint_thing_encoded(encoding).
|
||||
#
|
||||
# If no encoding is specified, it defaults to utf-8. Since encoding pure
|
||||
# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
|
||||
# working with straight ascii.
|
||||
|
||||
|
||||
def _pprint_seq(
|
||||
seq: ListLike, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds: Any
|
||||
) -> str:
|
||||
"""
|
||||
internal. pprinter for iterables. you should probably use pprint_thing()
|
||||
rather than calling this directly.
|
||||
|
||||
bounds length of printed sequence, depending on options
|
||||
"""
|
||||
if isinstance(seq, set):
|
||||
fmt = "{{{body}}}"
|
||||
elif isinstance(seq, frozenset):
|
||||
fmt = "frozenset({{{body}}})"
|
||||
else:
|
||||
fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
|
||||
|
||||
if max_seq_items is False:
|
||||
max_items = None
|
||||
else:
|
||||
max_items = max_seq_items or get_option("max_seq_items") or len(seq)
|
||||
|
||||
s = iter(seq)
|
||||
# handle sets, no slicing
|
||||
r = []
|
||||
max_items_reached = False
|
||||
for i, item in enumerate(s):
|
||||
if (max_items is not None) and (i >= max_items):
|
||||
max_items_reached = True
|
||||
break
|
||||
r.append(pprint_thing(item, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
|
||||
body = ", ".join(r)
|
||||
|
||||
if max_items_reached:
|
||||
body += ", ..."
|
||||
elif isinstance(seq, tuple) and len(seq) == 1:
|
||||
body += ","
|
||||
|
||||
return fmt.format(body=body)
|
||||
|
||||
|
||||
def _pprint_dict(
|
||||
seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds: Any
|
||||
) -> str:
|
||||
"""
|
||||
internal. pprinter for iterables. you should probably use pprint_thing()
|
||||
rather than calling this directly.
|
||||
"""
|
||||
fmt = "{{{things}}}"
|
||||
pairs = []
|
||||
|
||||
pfmt = "{key}: {val}"
|
||||
|
||||
if max_seq_items is False:
|
||||
nitems = len(seq)
|
||||
else:
|
||||
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
|
||||
|
||||
for k, v in list(seq.items())[:nitems]:
|
||||
pairs.append(
|
||||
pfmt.format(
|
||||
key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
|
||||
val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
|
||||
)
|
||||
)
|
||||
|
||||
if nitems < len(seq):
|
||||
return fmt.format(things=", ".join(pairs) + ", ...")
|
||||
else:
|
||||
return fmt.format(things=", ".join(pairs))
|
||||
|
||||
|
||||
def pprint_thing(
|
||||
thing: object,
|
||||
_nest_lvl: int = 0,
|
||||
escape_chars: EscapeChars | None = None,
|
||||
default_escapes: bool = False,
|
||||
quote_strings: bool = False,
|
||||
max_seq_items: int | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
This function is the sanctioned way of converting objects
|
||||
to a string representation and properly handles nested sequences.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
thing : anything to be formatted
|
||||
_nest_lvl : internal use only. pprint_thing() is mutually-recursive
|
||||
with pprint_sequence, this argument is used to keep track of the
|
||||
current nesting level, and limit it.
|
||||
escape_chars : list[str] or Mapping[str, str], optional
|
||||
Characters to escape. If a Mapping is passed the values are the
|
||||
replacements
|
||||
default_escapes : bool, default False
|
||||
Whether the input escape characters replaces or adds to the defaults
|
||||
max_seq_items : int or None, default None
|
||||
Pass through to other pretty printers to limit sequence printing
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
"""
|
||||
|
||||
def as_escaped_string(
|
||||
thing: Any, escape_chars: EscapeChars | None = escape_chars
|
||||
) -> str:
|
||||
translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r", "'": r"\'"}
|
||||
if isinstance(escape_chars, Mapping):
|
||||
if default_escapes:
|
||||
translate.update(escape_chars)
|
||||
else:
|
||||
translate = escape_chars # type: ignore[assignment]
|
||||
escape_chars = list(escape_chars.keys())
|
||||
else:
|
||||
escape_chars = escape_chars or ()
|
||||
|
||||
result = str(thing)
|
||||
for c in escape_chars:
|
||||
result = result.replace(c, translate[c])
|
||||
return result
|
||||
|
||||
if hasattr(thing, "__next__"):
|
||||
return str(thing)
|
||||
elif isinstance(thing, Mapping) and _nest_lvl < get_option(
|
||||
"display.pprint_nest_depth"
|
||||
):
|
||||
result = _pprint_dict(
|
||||
thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
|
||||
)
|
||||
elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
|
||||
result = _pprint_seq(
|
||||
# error: Argument 1 to "_pprint_seq" has incompatible type "object";
|
||||
# expected "ExtensionArray | ndarray[Any, Any] | Index | Series |
|
||||
# SequenceNotStr[Any] | range"
|
||||
thing, # type: ignore[arg-type]
|
||||
_nest_lvl,
|
||||
escape_chars=escape_chars,
|
||||
quote_strings=quote_strings,
|
||||
max_seq_items=max_seq_items,
|
||||
)
|
||||
elif isinstance(thing, str) and quote_strings:
|
||||
result = f"'{as_escaped_string(thing)}'"
|
||||
else:
|
||||
result = as_escaped_string(thing)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def pprint_thing_encoded(
|
||||
object: object, encoding: str = "utf-8", errors: str = "replace"
|
||||
) -> bytes:
|
||||
value = pprint_thing(object) # get unicode representation of object
|
||||
return value.encode(encoding, errors)
|
||||
|
||||
|
||||
def enable_data_resource_formatter(enable: bool) -> None:
|
||||
if "IPython" not in sys.modules:
|
||||
# definitely not in IPython
|
||||
return
|
||||
from IPython import get_ipython
|
||||
|
||||
# error: Call to untyped function "get_ipython" in typed context
|
||||
ip = get_ipython() # type: ignore[no-untyped-call]
|
||||
if ip is None:
|
||||
# still not in IPython
|
||||
return
|
||||
|
||||
formatters = ip.display_formatter.formatters
|
||||
mimetype = "application/vnd.dataresource+json"
|
||||
|
||||
if enable:
|
||||
if mimetype not in formatters:
|
||||
# define tableschema formatter
|
||||
from IPython.core.formatters import BaseFormatter
|
||||
from traitlets import ObjectName
|
||||
|
||||
class TableSchemaFormatter(BaseFormatter):
|
||||
print_method = ObjectName("_repr_data_resource_")
|
||||
_return_type = (dict,)
|
||||
|
||||
# register it:
|
||||
formatters[mimetype] = TableSchemaFormatter()
|
||||
# enable it if it's been disabled:
|
||||
formatters[mimetype].enabled = True
|
||||
# unregister tableschema mime-type
|
||||
elif mimetype in formatters:
|
||||
formatters[mimetype].enabled = False
|
||||
|
||||
|
||||
def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
|
||||
return pprint_thing(
|
||||
thing,
|
||||
escape_chars=("\t", "\r", "\n"),
|
||||
quote_strings=True,
|
||||
max_seq_items=max_seq_items,
|
||||
)
|
||||
|
||||
|
||||
def format_object_summary(
|
||||
obj: ListLike,
|
||||
formatter: Callable,
|
||||
is_justify: bool = True,
|
||||
name: str | None = None,
|
||||
indent_for_name: bool = True,
|
||||
line_break_each_value: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Return the formatted obj as a unicode string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : object
|
||||
must be iterable and support __getitem__
|
||||
formatter : callable
|
||||
string formatter for an element
|
||||
is_justify : bool
|
||||
should justify the display
|
||||
name : name, optional
|
||||
defaults to the class name of the obj
|
||||
indent_for_name : bool, default True
|
||||
Whether subsequent lines should be indented to
|
||||
align with the name.
|
||||
line_break_each_value : bool, default False
|
||||
If True, inserts a line break for each value of ``obj``.
|
||||
If False, only break lines when the a line of values gets wider
|
||||
than the display width.
|
||||
|
||||
Returns
|
||||
-------
|
||||
summary string
|
||||
"""
|
||||
display_width, _ = get_console_size()
|
||||
if display_width is None:
|
||||
display_width = get_option("display.width") or 80
|
||||
if name is None:
|
||||
name = type(obj).__name__
|
||||
|
||||
if indent_for_name:
|
||||
name_len = len(name)
|
||||
space1 = f"\n{(' ' * (name_len + 1))}"
|
||||
space2 = f"\n{(' ' * (name_len + 2))}"
|
||||
else:
|
||||
space1 = "\n"
|
||||
space2 = "\n " # space for the opening '['
|
||||
|
||||
n = len(obj)
|
||||
if line_break_each_value:
|
||||
# If we want to vertically align on each value of obj, we need to
|
||||
# separate values by a line break and indent the values
|
||||
sep = ",\n " + " " * len(name)
|
||||
else:
|
||||
sep = ","
|
||||
max_seq_items = get_option("display.max_seq_items") or n
|
||||
|
||||
# are we a truncated display
|
||||
is_truncated = n > max_seq_items
|
||||
|
||||
# adj can optionally handle unicode eastern asian width
|
||||
adj = get_adjustment()
|
||||
|
||||
def _extend_line(
|
||||
s: str, line: str, value: str, display_width: int, next_line_prefix: str
|
||||
) -> tuple[str, str]:
|
||||
if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
|
||||
s += line.rstrip()
|
||||
line = next_line_prefix
|
||||
line += value
|
||||
return s, line
|
||||
|
||||
def best_len(values: list[str]) -> int:
|
||||
if values:
|
||||
return max(adj.len(x) for x in values)
|
||||
else:
|
||||
return 0
|
||||
|
||||
close = ", "
|
||||
|
||||
if n == 0:
|
||||
summary = f"[]{close}"
|
||||
elif n == 1 and not line_break_each_value:
|
||||
first = formatter(obj[0])
|
||||
summary = f"[{first}]{close}"
|
||||
elif n == 2 and not line_break_each_value:
|
||||
first = formatter(obj[0])
|
||||
last = formatter(obj[-1])
|
||||
summary = f"[{first}, {last}]{close}"
|
||||
else:
|
||||
if max_seq_items == 1:
|
||||
# If max_seq_items=1 show only last element
|
||||
head = []
|
||||
tail = [formatter(x) for x in obj[-1:]]
|
||||
elif n > max_seq_items:
|
||||
n = min(max_seq_items // 2, 10)
|
||||
head = [formatter(x) for x in obj[:n]]
|
||||
tail = [formatter(x) for x in obj[-n:]]
|
||||
else:
|
||||
head = []
|
||||
tail = [formatter(x) for x in obj]
|
||||
|
||||
# adjust all values to max length if needed
|
||||
if is_justify:
|
||||
if line_break_each_value:
|
||||
# Justify each string in the values of head and tail, so the
|
||||
# strings will right align when head and tail are stacked
|
||||
# vertically.
|
||||
head, tail = _justify(head, tail)
|
||||
elif is_truncated or not (
|
||||
len(", ".join(head)) < display_width
|
||||
and len(", ".join(tail)) < display_width
|
||||
):
|
||||
# Each string in head and tail should align with each other
|
||||
max_length = max(best_len(head), best_len(tail))
|
||||
head = [x.rjust(max_length) for x in head]
|
||||
tail = [x.rjust(max_length) for x in tail]
|
||||
# If we are not truncated and we are only a single
|
||||
# line, then don't justify
|
||||
|
||||
if line_break_each_value:
|
||||
# Now head and tail are of type List[Tuple[str]]. Below we
|
||||
# convert them into List[str], so there will be one string per
|
||||
# value. Also truncate items horizontally if wider than
|
||||
# max_space
|
||||
max_space = display_width - len(space2)
|
||||
value = tail[0]
|
||||
max_items = 1
|
||||
for num_items in reversed(range(1, len(value) + 1)):
|
||||
pprinted_seq = _pprint_seq(value, max_seq_items=num_items)
|
||||
if len(pprinted_seq) < max_space:
|
||||
max_items = num_items
|
||||
break
|
||||
head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
|
||||
tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
|
||||
|
||||
summary = ""
|
||||
line = space2
|
||||
|
||||
for head_value in head:
|
||||
word = head_value + sep + " "
|
||||
summary, line = _extend_line(summary, line, word, display_width, space2)
|
||||
|
||||
if is_truncated:
|
||||
# remove trailing space of last line
|
||||
summary += line.rstrip() + space2 + "..."
|
||||
line = space2
|
||||
|
||||
for tail_item in tail[:-1]:
|
||||
word = tail_item + sep + " "
|
||||
summary, line = _extend_line(summary, line, word, display_width, space2)
|
||||
|
||||
# last value: no sep added + 1 space of width used for trailing ','
|
||||
summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
|
||||
summary += line
|
||||
|
||||
# right now close is either '' or ', '
|
||||
# Now we want to include the ']', but not the maybe space.
|
||||
close = "]" + close.rstrip(" ")
|
||||
summary += close
|
||||
|
||||
if len(summary) > (display_width) or line_break_each_value:
|
||||
summary += space1
|
||||
else: # one row
|
||||
summary += " "
|
||||
|
||||
# remove initial space
|
||||
summary = "[" + summary[len(space2) :]
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def _justify(
|
||||
head: list[Sequence[str]], tail: list[Sequence[str]]
|
||||
) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
|
||||
"""
|
||||
Justify items in head and tail, so they are right-aligned when stacked.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
head : list-like of list-likes of strings
|
||||
tail : list-like of list-likes of strings
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of list of tuples of strings
|
||||
Same as head and tail, but items are right aligned when stacked
|
||||
vertically.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> _justify([["a", "b"]], [["abc", "abcd"]])
|
||||
([(' a', ' b')], [('abc', 'abcd')])
|
||||
"""
|
||||
combined = head + tail
|
||||
|
||||
# For each position for the sequences in ``combined``,
|
||||
# find the length of the largest string.
|
||||
max_length = [0] * len(combined[0])
|
||||
for inner_seq in combined:
|
||||
length = [len(item) for item in inner_seq]
|
||||
max_length = [max(x, y) for x, y in zip(max_length, length, strict=True)]
|
||||
|
||||
# justify each item in each list-like in head and tail using max_length
|
||||
head_tuples = [
|
||||
tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length, strict=True))
|
||||
for seq in head
|
||||
]
|
||||
tail_tuples = [
|
||||
tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length, strict=True))
|
||||
for seq in tail
|
||||
]
|
||||
return head_tuples, tail_tuples
|
||||
|
||||
|
||||
class PrettyDict(dict[_KT, _VT]):
|
||||
"""Dict extension to support abbreviated __repr__"""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(self)
|
||||
|
||||
|
||||
class _TextAdjustment:
|
||||
def __init__(self) -> None:
|
||||
self.encoding = get_option("display.encoding")
|
||||
|
||||
def len(self, text: str) -> int:
|
||||
return len(text)
|
||||
|
||||
def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
|
||||
"""
|
||||
Perform ljust, center, rjust against string or list-like
|
||||
"""
|
||||
if mode == "left":
|
||||
return [x.ljust(max_len) for x in texts]
|
||||
elif mode == "center":
|
||||
return [x.center(max_len) for x in texts]
|
||||
else:
|
||||
return [x.rjust(max_len) for x in texts]
|
||||
|
||||
def adjoin(self, space: int, *lists: Any, **kwargs: Any) -> str:
|
||||
return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
|
||||
|
||||
|
||||
class _EastAsianTextAdjustment(_TextAdjustment):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
if get_option("display.unicode.ambiguous_as_wide"):
|
||||
self.ambiguous_width = 2
|
||||
else:
|
||||
self.ambiguous_width = 1
|
||||
|
||||
# Definition of East Asian Width
|
||||
# https://unicode.org/reports/tr11/
|
||||
# Ambiguous width can be changed by option
|
||||
self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
|
||||
|
||||
def len(self, text: str) -> int:
|
||||
"""
|
||||
Calculate display width considering unicode East Asian Width
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return len(text)
|
||||
|
||||
return sum(
|
||||
self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
|
||||
)
|
||||
|
||||
def justify(
|
||||
self, texts: Iterable[str], max_len: int, mode: str = "right"
|
||||
) -> list[str]:
|
||||
# re-calculate padding space per str considering East Asian Width
|
||||
def _get_pad(t: str) -> int:
|
||||
return max_len - self.len(t) + len(t)
|
||||
|
||||
if mode == "left":
|
||||
return [x.ljust(_get_pad(x)) for x in texts]
|
||||
elif mode == "center":
|
||||
return [x.center(_get_pad(x)) for x in texts]
|
||||
else:
|
||||
return [x.rjust(_get_pad(x)) for x in texts]
|
||||
|
||||
|
||||
def get_adjustment() -> _TextAdjustment:
|
||||
use_east_asian_width = get_option("display.unicode.east_asian_width")
|
||||
if use_east_asian_width:
|
||||
return _EastAsianTextAdjustment()
|
||||
else:
|
||||
return _TextAdjustment()
|
||||
@ -0,0 +1,207 @@
|
||||
"""
|
||||
Module for formatting output data in console (to string).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from shutil import get_terminal_size
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from pandas.io.formats.format import DataFrameFormatter
|
||||
|
||||
|
||||
class StringFormatter:
|
||||
"""Formatter for string representation of a dataframe."""
|
||||
|
||||
def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
|
||||
self.fmt = fmt
|
||||
self.adj = fmt.adj
|
||||
self.frame = fmt.frame
|
||||
self.line_width = line_width
|
||||
|
||||
def to_string(self) -> str:
|
||||
text = self._get_string_representation()
|
||||
if self.fmt.should_show_dimensions:
|
||||
text = f"{text}{self.fmt.dimensions_info}"
|
||||
return text
|
||||
|
||||
def _get_strcols(self) -> list[list[str]]:
|
||||
strcols = self.fmt.get_strcols()
|
||||
if self.fmt.is_truncated:
|
||||
strcols = self._insert_dot_separators(strcols)
|
||||
return strcols
|
||||
|
||||
def _get_string_representation(self) -> str:
|
||||
if self.fmt.frame.empty:
|
||||
return self._empty_info_line
|
||||
|
||||
strcols = self._get_strcols()
|
||||
|
||||
if self.line_width is None:
|
||||
# no need to wrap around just print the whole frame
|
||||
return self.adj.adjoin(1, *strcols)
|
||||
|
||||
if self._need_to_wrap_around:
|
||||
return self._join_multiline(strcols)
|
||||
|
||||
return self._fit_strcols_to_terminal_width(strcols)
|
||||
|
||||
@property
|
||||
def _empty_info_line(self) -> str:
|
||||
return (
|
||||
f"Empty {type(self.frame).__name__}\n"
|
||||
f"Columns: {pprint_thing(self.frame.columns)}\n"
|
||||
f"Index: {pprint_thing(self.frame.index)}"
|
||||
)
|
||||
|
||||
@property
|
||||
def _need_to_wrap_around(self) -> bool:
|
||||
return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
|
||||
|
||||
def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
|
||||
str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
|
||||
index_length = len(str_index)
|
||||
|
||||
if self.fmt.is_truncated_horizontally:
|
||||
strcols = self._insert_dot_separator_horizontal(strcols, index_length)
|
||||
|
||||
if self.fmt.is_truncated_vertically:
|
||||
strcols = self._insert_dot_separator_vertical(strcols, index_length)
|
||||
|
||||
return strcols
|
||||
|
||||
@property
|
||||
def _adjusted_tr_col_num(self) -> int:
|
||||
return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
|
||||
|
||||
def _insert_dot_separator_horizontal(
|
||||
self, strcols: list[list[str]], index_length: int
|
||||
) -> list[list[str]]:
|
||||
strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
|
||||
return strcols
|
||||
|
||||
def _insert_dot_separator_vertical(
|
||||
self, strcols: list[list[str]], index_length: int
|
||||
) -> list[list[str]]:
|
||||
n_header_rows = index_length - len(self.fmt.tr_frame)
|
||||
row_num = self.fmt.tr_row_num
|
||||
for ix, col in enumerate(strcols):
|
||||
cwidth = self.adj.len(col[row_num])
|
||||
|
||||
if self.fmt.is_truncated_horizontally:
|
||||
is_dot_col = ix == self._adjusted_tr_col_num
|
||||
else:
|
||||
is_dot_col = False
|
||||
|
||||
if cwidth > 3 or is_dot_col:
|
||||
dots = "..."
|
||||
else:
|
||||
dots = ".."
|
||||
|
||||
if ix == 0 and self.fmt.index:
|
||||
dot_mode = "left"
|
||||
elif is_dot_col:
|
||||
cwidth = 4
|
||||
dot_mode = "right"
|
||||
else:
|
||||
dot_mode = "right"
|
||||
|
||||
dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
|
||||
col.insert(row_num + n_header_rows, dot_str)
|
||||
return strcols
|
||||
|
||||
def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
|
||||
lwidth = self.line_width
|
||||
adjoin_width = 1
|
||||
strcols = list(strcols_input)
|
||||
|
||||
if self.fmt.index:
|
||||
idx = strcols.pop(0)
|
||||
lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
|
||||
|
||||
col_widths = [
|
||||
np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
|
||||
for col in strcols
|
||||
]
|
||||
|
||||
assert lwidth is not None
|
||||
col_bins = _binify(col_widths, lwidth)
|
||||
nbins = len(col_bins)
|
||||
|
||||
str_lst = []
|
||||
start = 0
|
||||
for i, end in enumerate(col_bins):
|
||||
row = strcols[start:end]
|
||||
if self.fmt.index:
|
||||
row.insert(0, idx)
|
||||
if nbins > 1:
|
||||
nrows = len(row[-1])
|
||||
if end <= len(strcols) and i < nbins - 1:
|
||||
row.append([" \\"] + [" "] * (nrows - 1))
|
||||
else:
|
||||
row.append([" "] * nrows)
|
||||
str_lst.append(self.adj.adjoin(adjoin_width, *row))
|
||||
start = end
|
||||
return "\n\n".join(str_lst)
|
||||
|
||||
def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
|
||||
from pandas import Series
|
||||
|
||||
lines = self.adj.adjoin(1, *strcols).split("\n")
|
||||
max_len = Series(lines).str.len().max()
|
||||
# plus truncate dot col
|
||||
width, _ = get_terminal_size()
|
||||
dif = max_len - width
|
||||
# '+ 1' to avoid too wide repr (GH PR #17023)
|
||||
adj_dif = dif + 1
|
||||
col_lens = Series([Series(ele).str.len().max() for ele in strcols])
|
||||
n_cols = len(col_lens)
|
||||
counter = 0
|
||||
while adj_dif > 0 and n_cols > 1:
|
||||
counter += 1
|
||||
mid = round(n_cols / 2)
|
||||
mid_ix = col_lens.index[mid]
|
||||
col_len = col_lens[mid_ix]
|
||||
# adjoin adds one
|
||||
adj_dif -= col_len + 1
|
||||
col_lens = col_lens.drop(mid_ix)
|
||||
n_cols = len(col_lens)
|
||||
|
||||
# subtract index column
|
||||
max_cols_fitted = n_cols - self.fmt.index
|
||||
# GH-21180. Ensure that we print at least two.
|
||||
max_cols_fitted = max(max_cols_fitted, 2)
|
||||
self.fmt.max_cols_fitted = max_cols_fitted
|
||||
|
||||
# Call again _truncate to cut frame appropriately
|
||||
# and then generate string representation
|
||||
self.fmt.truncate()
|
||||
strcols = self._get_strcols()
|
||||
return self.adj.adjoin(1, *strcols)
|
||||
|
||||
|
||||
def _binify(cols: list[int], line_width: int) -> list[int]:
|
||||
adjoin_width = 1
|
||||
bins = []
|
||||
curr_width = 0
|
||||
i_last_column = len(cols) - 1
|
||||
for i, w in enumerate(cols):
|
||||
w_adjoined = w + adjoin_width
|
||||
curr_width += w_adjoined
|
||||
if i_last_column == i:
|
||||
wrap = curr_width + 1 > line_width and i > 0
|
||||
else:
|
||||
wrap = curr_width + 2 > line_width and i > 0
|
||||
if wrap:
|
||||
bins.append(i)
|
||||
curr_width = w_adjoined
|
||||
|
||||
bins.append(len(cols))
|
||||
return bins
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,16 @@
|
||||
{# Update the html_style/table_structure.html documentation too #}
|
||||
{% if doctype_html %}
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="{{encoding}}">
|
||||
{% if not exclude_styles %}{% include html_style_tpl %}{% endif %}
|
||||
</head>
|
||||
<body>
|
||||
{% include html_table_tpl %}
|
||||
</body>
|
||||
</html>
|
||||
{% elif not doctype_html %}
|
||||
{% if not exclude_styles %}{% include html_style_tpl %}{% endif %}
|
||||
{% include html_table_tpl %}
|
||||
{% endif %}
|
||||
@ -0,0 +1,26 @@
|
||||
{%- block before_style -%}{%- endblock before_style -%}
|
||||
{% block style %}
|
||||
<style type="text/css">
|
||||
{% block table_styles %}
|
||||
{% for s in table_styles %}
|
||||
#T_{{uuid}} {{s.selector}} {
|
||||
{% for p,val in s.props %}
|
||||
{{p}}: {{val}};
|
||||
{% endfor %}
|
||||
}
|
||||
{% endfor %}
|
||||
{% endblock table_styles %}
|
||||
{% block before_cellstyle %}{% endblock before_cellstyle %}
|
||||
{% block cellstyle %}
|
||||
{% for cs in [cellstyle, cellstyle_index, cellstyle_columns] %}
|
||||
{% for s in cs %}
|
||||
{% for selector in s.selectors %}{% if not loop.first %}, {% endif %}#T_{{uuid}}_{{selector}}{% endfor %} {
|
||||
{% for p,val in s.props %}
|
||||
{{p}}: {{val}};
|
||||
{% endfor %}
|
||||
}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
{% endblock cellstyle %}
|
||||
</style>
|
||||
{% endblock style %}
|
||||
@ -0,0 +1,63 @@
|
||||
{% block before_table %}{% endblock before_table %}
|
||||
{% block table %}
|
||||
{% if exclude_styles %}
|
||||
<table>
|
||||
{% else %}
|
||||
<table id="T_{{uuid}}"{% if table_attributes %} {{table_attributes}}{% endif %}>
|
||||
{% endif %}
|
||||
{% block caption %}
|
||||
{% if caption and caption is string %}
|
||||
<caption>{{caption}}</caption>
|
||||
{% elif caption and caption is sequence %}
|
||||
<caption>{{caption[0]}}</caption>
|
||||
{% endif %}
|
||||
{% endblock caption %}
|
||||
{% block thead %}
|
||||
<thead>
|
||||
{% block before_head_rows %}{% endblock %}
|
||||
{% for r in head %}
|
||||
{% block head_tr scoped %}
|
||||
<tr>
|
||||
{% if exclude_styles %}
|
||||
{% for c in r %}
|
||||
{% if c.is_visible != False %}
|
||||
<{{c.type}} {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
{% for c in r %}
|
||||
{% if c.is_visible != False %}
|
||||
<{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% endblock head_tr %}
|
||||
{% endfor %}
|
||||
{% block after_head_rows %}{% endblock %}
|
||||
</thead>
|
||||
{% endblock thead %}
|
||||
{% block tbody %}
|
||||
<tbody>
|
||||
{% block before_rows %}{% endblock before_rows %}
|
||||
{% for r in body %}
|
||||
{% block tr scoped %}
|
||||
<tr>
|
||||
{% if exclude_styles %}
|
||||
{% for c in r %}{% if c.is_visible != False %}
|
||||
<{{c.type}} {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}{% endfor %}
|
||||
{% else %}
|
||||
{% for c in r %}{% if c.is_visible != False %}
|
||||
<{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}{% endfor %}
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% endblock tr %}
|
||||
{% endfor %}
|
||||
{% block after_rows %}{% endblock after_rows %}
|
||||
</tbody>
|
||||
{% endblock tbody %}
|
||||
</table>
|
||||
{% endblock table %}
|
||||
{% block after_table %}{% endblock after_table %}
|
||||
@ -0,0 +1,5 @@
|
||||
{% if environment == "longtable" %}
|
||||
{% include "latex_longtable.tpl" %}
|
||||
{% else %}
|
||||
{% include "latex_table.tpl" %}
|
||||
{% endif %}
|
||||
@ -0,0 +1,82 @@
|
||||
\begin{longtable}
|
||||
{%- set position = parse_table(table_styles, 'position') %}
|
||||
{%- if position is not none %}
|
||||
[{{position}}]
|
||||
{%- endif %}
|
||||
{%- set column_format = parse_table(table_styles, 'column_format') %}
|
||||
{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %}
|
||||
|
||||
{% for style in table_styles %}
|
||||
{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format', 'label'] %}
|
||||
\{{style['selector']}}{{parse_table(table_styles, style['selector'])}}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% if caption and caption is string %}
|
||||
\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %}
|
||||
{%- set label = parse_table(table_styles, 'label') %}
|
||||
{%- if label is not none %}
|
||||
\label{{label}}
|
||||
{%- endif %} \\
|
||||
{% elif caption and caption is sequence %}
|
||||
\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %}
|
||||
{%- set label = parse_table(table_styles, 'label') %}
|
||||
{%- if label is not none %}
|
||||
\label{{label}}
|
||||
{%- endif %} \\
|
||||
{% else %}
|
||||
{%- set label = parse_table(table_styles, 'label') %}
|
||||
{%- if label is not none %}
|
||||
\label{{label}} \\
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% set toprule = parse_table(table_styles, 'toprule') %}
|
||||
{% if toprule is not none %}
|
||||
\{{toprule}}
|
||||
{% endif %}
|
||||
{% for row in head %}
|
||||
{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\
|
||||
{% endfor %}
|
||||
{% set midrule = parse_table(table_styles, 'midrule') %}
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\endfirsthead
|
||||
{% if caption and caption is string %}
|
||||
\caption[]{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} \\
|
||||
{% elif caption and caption is sequence %}
|
||||
\caption[]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} \\
|
||||
{% endif %}
|
||||
{% if toprule is not none %}
|
||||
\{{toprule}}
|
||||
{% endif %}
|
||||
{% for row in head %}
|
||||
{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\
|
||||
{% endfor %}
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\endhead
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\multicolumn{% raw %}{{% endraw %}{{body[0]|length}}{% raw %}}{% endraw %}{r}{Continued on next page} \\
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\endfoot
|
||||
{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
|
||||
{% if bottomrule is not none %}
|
||||
\{{bottomrule}}
|
||||
{% endif %}
|
||||
\endlastfoot
|
||||
{% for row in body %}
|
||||
{% for c in row %}{% if not loop.first %} & {% endif %}
|
||||
{%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
|
||||
{%- endfor %} \\
|
||||
{% if clines and clines[loop.index] | length > 0 %}
|
||||
{%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %}
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
\end{longtable}
|
||||
{% raw %}{% endraw %}
|
||||
@ -0,0 +1,57 @@
|
||||
{% if environment or parse_wrap(table_styles, caption) %}
|
||||
\begin{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %}
|
||||
{%- set position = parse_table(table_styles, 'position') %}
|
||||
{%- if position is not none %}
|
||||
[{{position}}]
|
||||
{%- endif %}
|
||||
|
||||
{% set position_float = parse_table(table_styles, 'position_float') %}
|
||||
{% if position_float is not none%}
|
||||
\{{position_float}}
|
||||
{% endif %}
|
||||
{% if caption and caption is string %}
|
||||
\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %}
|
||||
|
||||
{% elif caption and caption is sequence %}
|
||||
\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %}
|
||||
|
||||
{% endif %}
|
||||
{% for style in table_styles %}
|
||||
{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format'] %}
|
||||
\{{style['selector']}}{{parse_table(table_styles, style['selector'])}}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
\begin{tabular}
|
||||
{%- set column_format = parse_table(table_styles, 'column_format') %}
|
||||
{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %}
|
||||
|
||||
{% set toprule = parse_table(table_styles, 'toprule') %}
|
||||
{% if toprule is not none %}
|
||||
\{{toprule}}
|
||||
{% endif %}
|
||||
{% for row in head %}
|
||||
{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx, convert_css)}}{% endfor %} \\
|
||||
{% endfor %}
|
||||
{% set midrule = parse_table(table_styles, 'midrule') %}
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
{% for row in body %}
|
||||
{% for c in row %}{% if not loop.first %} & {% endif %}
|
||||
{%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align, False, convert_css)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
|
||||
{%- endfor %} \\
|
||||
{% if clines and clines[loop.index] | length > 0 %}
|
||||
{%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %}
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
|
||||
{% if bottomrule is not none %}
|
||||
\{{bottomrule}}
|
||||
{% endif %}
|
||||
\end{tabular}
|
||||
{% if environment or parse_wrap(table_styles, caption) %}
|
||||
\end{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %}
|
||||
|
||||
{% endif %}
|
||||
@ -0,0 +1,12 @@
|
||||
{% for r in head %}
|
||||
{% for c in r %}{% if c["is_visible"] %}
|
||||
{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %}
|
||||
{% endif %}{% endfor %}
|
||||
|
||||
{% endfor %}
|
||||
{% for r in body %}
|
||||
{% for c in r %}{% if c["is_visible"] %}
|
||||
{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %}
|
||||
{% endif %}{% endfor %}
|
||||
|
||||
{% endfor %}
|
||||
@ -0,0 +1,12 @@
|
||||
#table(
|
||||
columns: {{ head[0] | length }},
|
||||
{% for r in head %}
|
||||
{% for c in r %}[{% if c["is_visible"] %}{{ c["display_value"] }}{% endif %}],{% if not loop.last %} {% endif%}{% endfor %}
|
||||
|
||||
{% endfor %}
|
||||
|
||||
{% for r in body %}
|
||||
{% for c in r %}[{% if c["is_visible"] %}{{ c["display_value"] }}{% endif %}],{% if not loop.last %} {% endif%}{% endfor %}
|
||||
|
||||
{% endfor %}
|
||||
)
|
||||
@ -0,0 +1,566 @@
|
||||
"""
|
||||
:mod:`pandas.io.formats.xml` is a module for formatting data in XML.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import io
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
final,
|
||||
)
|
||||
|
||||
from pandas.errors import AbstractMethodError
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
from pandas.core.dtypes.missing import isna
|
||||
|
||||
from pandas.io.common import get_handle
|
||||
from pandas.io.xml import get_data_from_filepath
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
CompressionOptions,
|
||||
FilePath,
|
||||
ReadBuffer,
|
||||
StorageOptions,
|
||||
WriteBuffer,
|
||||
)
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
class _BaseXMLFormatter:
|
||||
"""
|
||||
Subclass for formatting data in XML.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path_or_buffer : str or file-like
|
||||
This can be either a string of raw XML, a valid URL,
|
||||
file or file-like object.
|
||||
|
||||
index : bool
|
||||
Whether to include index in xml document.
|
||||
|
||||
row_name : str
|
||||
Name for root of xml document. Default is 'data'.
|
||||
|
||||
root_name : str
|
||||
Name for row elements of xml document. Default is 'row'.
|
||||
|
||||
na_rep : str
|
||||
Missing data representation.
|
||||
|
||||
attrs_cols : list
|
||||
List of columns to write as attributes in row element.
|
||||
|
||||
elem_cols : list
|
||||
List of columns to write as children in row element.
|
||||
|
||||
namespaces : dict
|
||||
The namespaces to define in XML document as dicts with key
|
||||
being namespace and value the URI.
|
||||
|
||||
prefix : str
|
||||
The prefix for each element in XML document including root.
|
||||
|
||||
encoding : str
|
||||
Encoding of xml object or document.
|
||||
|
||||
xml_declaration : bool
|
||||
Whether to include xml declaration at top line item in xml.
|
||||
|
||||
pretty_print : bool
|
||||
Whether to write xml document with line breaks and indentation.
|
||||
|
||||
stylesheet : str or file-like
|
||||
A URL, file, file-like object, or a raw string containing XSLT.
|
||||
|
||||
compression : str or dict, default 'infer'
|
||||
For on-the-fly compression of the output data. If 'infer' and 'path_or_buffer'
|
||||
is path-like, then detect compression from the following extensions: '.gz',
|
||||
'.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2'
|
||||
(otherwise no compression).
|
||||
Set to ``None`` for no compression.
|
||||
Can also be a dict with key ``'method'`` set
|
||||
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``}
|
||||
and other key-value pairs are forwarded to
|
||||
``zipfile.ZipFile``, ``gzip.GzipFile``,
|
||||
``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
|
||||
``tarfile.TarFile``, respectively.
|
||||
As an example, the following could be passed for faster compression and to
|
||||
create a reproducible gzip archive:
|
||||
``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
|
||||
|
||||
storage_options : dict, optional
|
||||
Extra options that make sense for a particular storage connection, e.g.
|
||||
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
|
||||
are forwarded to ``urllib.request.Request`` as header options. For other
|
||||
URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
|
||||
forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
|
||||
details, and for more examples on storage options refer `here
|
||||
<https://pandas.pydata.org/docs/user_guide/io.html?
|
||||
highlight=storage_options#reading-writing-remote-files>`_.
|
||||
|
||||
See also
|
||||
--------
|
||||
pandas.io.formats.xml.EtreeXMLFormatter
|
||||
pandas.io.formats.xml.LxmlXMLFormatter
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
frame: DataFrame,
|
||||
path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
|
||||
index: bool = True,
|
||||
root_name: str | None = "data",
|
||||
row_name: str | None = "row",
|
||||
na_rep: str | None = None,
|
||||
attr_cols: list[str] | None = None,
|
||||
elem_cols: list[str] | None = None,
|
||||
namespaces: dict[str | None, str] | None = None,
|
||||
prefix: str | None = None,
|
||||
encoding: str = "utf-8",
|
||||
xml_declaration: bool | None = True,
|
||||
pretty_print: bool | None = True,
|
||||
stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None,
|
||||
compression: CompressionOptions = "infer",
|
||||
storage_options: StorageOptions | None = None,
|
||||
) -> None:
|
||||
self.frame = frame
|
||||
self.path_or_buffer = path_or_buffer
|
||||
self.index = index
|
||||
self.root_name = root_name
|
||||
self.row_name = row_name
|
||||
self.na_rep = na_rep
|
||||
self.attr_cols = attr_cols
|
||||
self.elem_cols = elem_cols
|
||||
self.namespaces = namespaces
|
||||
self.prefix = prefix
|
||||
self.encoding = encoding
|
||||
self.xml_declaration = xml_declaration
|
||||
self.pretty_print = pretty_print
|
||||
self.stylesheet = stylesheet
|
||||
self.compression: CompressionOptions = compression
|
||||
self.storage_options = storage_options
|
||||
|
||||
self.orig_cols = self.frame.columns.tolist()
|
||||
self.frame_dicts = self._process_dataframe()
|
||||
|
||||
self._validate_columns()
|
||||
self._validate_encoding()
|
||||
self.prefix_uri = self._get_prefix_uri()
|
||||
self._handle_indexes()
|
||||
|
||||
def _build_tree(self) -> bytes:
|
||||
"""
|
||||
Build tree from data.
|
||||
|
||||
This method initializes the root and builds attributes and elements
|
||||
with optional namespaces.
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def _validate_columns(self) -> None:
|
||||
"""
|
||||
Validate elems_cols and attrs_cols.
|
||||
|
||||
This method will check if columns is list-like.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If value is not a list and less then length of nodes.
|
||||
"""
|
||||
if self.attr_cols and not is_list_like(self.attr_cols):
|
||||
raise TypeError(
|
||||
f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
|
||||
)
|
||||
|
||||
if self.elem_cols and not is_list_like(self.elem_cols):
|
||||
raise TypeError(
|
||||
f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
|
||||
)
|
||||
|
||||
@final
|
||||
def _validate_encoding(self) -> None:
|
||||
"""
|
||||
Validate encoding.
|
||||
|
||||
This method will check if encoding is among listed under codecs.
|
||||
|
||||
Raises
|
||||
------
|
||||
LookupError
|
||||
* If encoding is not available in codecs.
|
||||
"""
|
||||
|
||||
codecs.lookup(self.encoding)
|
||||
|
||||
@final
|
||||
def _process_dataframe(self) -> dict[int | str, dict[str, Any]]:
|
||||
"""
|
||||
Adjust Data Frame to fit xml output.
|
||||
|
||||
This method will adjust underlying data frame for xml output,
|
||||
including optionally replacing missing values and including indexes.
|
||||
"""
|
||||
|
||||
df = self.frame
|
||||
|
||||
if self.index:
|
||||
df = df.reset_index()
|
||||
|
||||
if self.na_rep is not None:
|
||||
df = df.fillna(self.na_rep)
|
||||
|
||||
return df.to_dict(orient="index")
|
||||
|
||||
@final
|
||||
def _handle_indexes(self) -> None:
|
||||
"""
|
||||
Handle indexes.
|
||||
|
||||
This method will add indexes into attr_cols or elem_cols.
|
||||
"""
|
||||
|
||||
if not self.index:
|
||||
return
|
||||
|
||||
first_key = next(iter(self.frame_dicts))
|
||||
indexes: list[str] = [
|
||||
x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols
|
||||
]
|
||||
|
||||
if self.attr_cols:
|
||||
self.attr_cols = indexes + self.attr_cols
|
||||
|
||||
if self.elem_cols:
|
||||
self.elem_cols = indexes + self.elem_cols
|
||||
|
||||
def _get_prefix_uri(self) -> str:
|
||||
"""
|
||||
Get uri of namespace prefix.
|
||||
|
||||
This method retrieves corresponding URI to prefix in namespaces.
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
*If prefix is not included in namespace dict.
|
||||
"""
|
||||
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def _other_namespaces(self) -> dict:
|
||||
"""
|
||||
Define other namespaces.
|
||||
|
||||
This method will build dictionary of namespaces attributes
|
||||
for root element, conditionally with optional namespaces and
|
||||
prefix.
|
||||
"""
|
||||
|
||||
nmsp_dict: dict[str, str] = {}
|
||||
if self.namespaces:
|
||||
nmsp_dict = {
|
||||
f"xmlns{p if p == '' else f':{p}'}": n
|
||||
for p, n in self.namespaces.items()
|
||||
if n != self.prefix_uri[1:-1]
|
||||
}
|
||||
|
||||
return nmsp_dict
|
||||
|
||||
@final
|
||||
def _build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any:
|
||||
"""
|
||||
Create attributes of row.
|
||||
|
||||
This method adds attributes using attr_cols to row element and
|
||||
works with tuples for multindex or hierarchical columns.
|
||||
"""
|
||||
|
||||
if not self.attr_cols:
|
||||
return elem_row
|
||||
|
||||
for col in self.attr_cols:
|
||||
attr_name = self._get_flat_col_name(col)
|
||||
try:
|
||||
if not isna(d[col]):
|
||||
elem_row.attrib[attr_name] = str(d[col])
|
||||
except KeyError as err:
|
||||
raise KeyError(f"no valid column, {col}") from err
|
||||
return elem_row
|
||||
|
||||
@final
|
||||
def _get_flat_col_name(self, col: str | tuple) -> str:
|
||||
flat_col = col
|
||||
if isinstance(col, tuple):
|
||||
flat_col = (
|
||||
"".join([str(c) for c in col]).strip()
|
||||
if "" in col
|
||||
else "_".join([str(c) for c in col]).strip()
|
||||
)
|
||||
return f"{self.prefix_uri}{flat_col}"
|
||||
|
||||
@cache_readonly
|
||||
def _sub_element_cls(self):
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def _build_elems(self, d: dict[str, Any], elem_row: Any) -> None:
|
||||
"""
|
||||
Create child elements of row.
|
||||
|
||||
This method adds child elements using elem_cols to row element and
|
||||
works with tuples for multindex or hierarchical columns.
|
||||
"""
|
||||
sub_element_cls = self._sub_element_cls
|
||||
|
||||
if not self.elem_cols:
|
||||
return
|
||||
|
||||
for col in self.elem_cols:
|
||||
elem_name = self._get_flat_col_name(col)
|
||||
try:
|
||||
val = None if isna(d[col]) or d[col] == "" else str(d[col])
|
||||
sub_element_cls(elem_row, elem_name).text = val
|
||||
except KeyError as err:
|
||||
raise KeyError(f"no valid column, {col}") from err
|
||||
|
||||
@final
|
||||
def write_output(self) -> str | None:
|
||||
xml_doc = self._build_tree()
|
||||
|
||||
if self.path_or_buffer is not None:
|
||||
with get_handle(
|
||||
self.path_or_buffer,
|
||||
"wb",
|
||||
compression=self.compression,
|
||||
storage_options=self.storage_options,
|
||||
is_text=False,
|
||||
) as handles:
|
||||
handles.handle.write(xml_doc)
|
||||
return None
|
||||
|
||||
else:
|
||||
return xml_doc.decode(self.encoding).rstrip()
|
||||
|
||||
|
||||
class EtreeXMLFormatter(_BaseXMLFormatter):
|
||||
"""
|
||||
Class for formatting data in xml using Python standard library
|
||||
modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
|
||||
"""
|
||||
|
||||
def _build_tree(self) -> bytes:
|
||||
from xml.etree.ElementTree import (
|
||||
Element,
|
||||
SubElement,
|
||||
tostring,
|
||||
)
|
||||
|
||||
self.root = Element(
|
||||
f"{self.prefix_uri}{self.root_name}", attrib=self._other_namespaces()
|
||||
)
|
||||
|
||||
for d in self.frame_dicts.values():
|
||||
elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
|
||||
|
||||
if not self.attr_cols and not self.elem_cols:
|
||||
self.elem_cols = list(d.keys())
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
else:
|
||||
elem_row = self._build_attribs(d, elem_row)
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
self.out_xml = tostring(
|
||||
self.root,
|
||||
method="xml",
|
||||
encoding=self.encoding,
|
||||
xml_declaration=self.xml_declaration,
|
||||
)
|
||||
|
||||
if self.pretty_print:
|
||||
self.out_xml = self._prettify_tree()
|
||||
|
||||
if self.stylesheet is not None:
|
||||
raise ValueError(
|
||||
"To use stylesheet, you need lxml installed and selected as parser."
|
||||
)
|
||||
|
||||
return self.out_xml
|
||||
|
||||
def _get_prefix_uri(self) -> str:
|
||||
from xml.etree.ElementTree import register_namespace
|
||||
|
||||
uri = ""
|
||||
if self.namespaces:
|
||||
for p, n in self.namespaces.items():
|
||||
if isinstance(p, str) and isinstance(n, str):
|
||||
register_namespace(p, n)
|
||||
if self.prefix:
|
||||
try:
|
||||
uri = f"{{{self.namespaces[self.prefix]}}}"
|
||||
except KeyError as err:
|
||||
raise KeyError(
|
||||
f"{self.prefix} is not included in namespaces"
|
||||
) from err
|
||||
elif "" in self.namespaces:
|
||||
uri = f"{{{self.namespaces['']}}}"
|
||||
else:
|
||||
uri = ""
|
||||
|
||||
return uri
|
||||
|
||||
@cache_readonly
|
||||
def _sub_element_cls(self):
|
||||
from xml.etree.ElementTree import SubElement
|
||||
|
||||
return SubElement
|
||||
|
||||
def _prettify_tree(self) -> bytes:
|
||||
"""
|
||||
Output tree for pretty print format.
|
||||
|
||||
This method will pretty print xml with line breaks and indentation.
|
||||
"""
|
||||
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
dom = parseString(self.out_xml)
|
||||
|
||||
return dom.toprettyxml(indent=" ", encoding=self.encoding)
|
||||
|
||||
|
||||
class LxmlXMLFormatter(_BaseXMLFormatter):
|
||||
"""
|
||||
Class for formatting data in xml using Python standard library
|
||||
modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self._convert_empty_str_key()
|
||||
|
||||
def _build_tree(self) -> bytes:
|
||||
"""
|
||||
Build tree from data.
|
||||
|
||||
This method initializes the root and builds attributes and elements
|
||||
with optional namespaces.
|
||||
"""
|
||||
from lxml.etree import (
|
||||
Element,
|
||||
SubElement,
|
||||
tostring,
|
||||
)
|
||||
|
||||
self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces)
|
||||
|
||||
for d in self.frame_dicts.values():
|
||||
elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
|
||||
|
||||
if not self.attr_cols and not self.elem_cols:
|
||||
self.elem_cols = list(d.keys())
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
else:
|
||||
elem_row = self._build_attribs(d, elem_row)
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
self.out_xml = tostring(
|
||||
self.root,
|
||||
pretty_print=self.pretty_print,
|
||||
method="xml",
|
||||
encoding=self.encoding,
|
||||
xml_declaration=self.xml_declaration,
|
||||
)
|
||||
|
||||
if self.stylesheet is not None:
|
||||
self.out_xml = self._transform_doc()
|
||||
|
||||
return self.out_xml
|
||||
|
||||
def _convert_empty_str_key(self) -> None:
|
||||
"""
|
||||
Replace zero-length string in `namespaces`.
|
||||
|
||||
This method will replace '' with None to align to `lxml`
|
||||
requirement that empty string prefixes are not allowed.
|
||||
"""
|
||||
|
||||
if self.namespaces and "" in self.namespaces.keys():
|
||||
self.namespaces[None] = self.namespaces.pop("", "default")
|
||||
|
||||
def _get_prefix_uri(self) -> str:
|
||||
uri = ""
|
||||
if self.namespaces:
|
||||
if self.prefix:
|
||||
try:
|
||||
uri = f"{{{self.namespaces[self.prefix]}}}"
|
||||
except KeyError as err:
|
||||
raise KeyError(
|
||||
f"{self.prefix} is not included in namespaces"
|
||||
) from err
|
||||
elif "" in self.namespaces:
|
||||
uri = f"{{{self.namespaces['']}}}"
|
||||
else:
|
||||
uri = ""
|
||||
|
||||
return uri
|
||||
|
||||
@cache_readonly
|
||||
def _sub_element_cls(self):
|
||||
from lxml.etree import SubElement
|
||||
|
||||
return SubElement
|
||||
|
||||
def _transform_doc(self) -> bytes:
|
||||
"""
|
||||
Parse stylesheet from file or buffer and run it.
|
||||
|
||||
This method will parse stylesheet object into tree for parsing
|
||||
conditionally by its specific object type, then transforms
|
||||
original tree with XSLT script.
|
||||
"""
|
||||
from lxml.etree import (
|
||||
XSLT,
|
||||
XMLParser,
|
||||
fromstring,
|
||||
parse,
|
||||
)
|
||||
|
||||
style_doc = self.stylesheet
|
||||
assert style_doc is not None # is ensured by caller
|
||||
|
||||
handle_data = get_data_from_filepath(
|
||||
filepath_or_buffer=style_doc,
|
||||
encoding=self.encoding,
|
||||
compression=self.compression,
|
||||
storage_options=self.storage_options,
|
||||
)
|
||||
|
||||
with handle_data as xml_data:
|
||||
curr_parser = XMLParser(encoding=self.encoding)
|
||||
|
||||
if isinstance(xml_data, io.StringIO):
|
||||
xsl_doc = fromstring(
|
||||
xml_data.getvalue().encode(self.encoding), parser=curr_parser
|
||||
)
|
||||
else:
|
||||
xsl_doc = parse(xml_data, parser=curr_parser)
|
||||
|
||||
transformer = XSLT(xsl_doc)
|
||||
new_doc = transformer(self.root)
|
||||
|
||||
return bytes(new_doc)
|
||||
Reference in New Issue
Block a user