Initial commit: 首次建仓,建立目录结构
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,197 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import (
|
||||
is_ci_environment,
|
||||
is_platform_arm,
|
||||
is_platform_mac,
|
||||
is_platform_windows,
|
||||
)
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas.io.common as icom
|
||||
from pandas.io.parsers import read_csv
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def compression_to_extension():
|
||||
return {value: key for key, value in icom.extension_to_compression.items()}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tips_file(datapath):
|
||||
"""Path to the tips dataset"""
|
||||
return datapath("io", "data", "csv", "tips.csv")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def jsonl_file(datapath):
|
||||
"""Path to a JSONL dataset"""
|
||||
return datapath("io", "parser", "data", "items.jsonl")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def salaries_table(datapath):
|
||||
"""DataFrame with the salaries dataset"""
|
||||
return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def feather_file(datapath):
|
||||
return datapath("io", "data", "feather", "feather-0_3_1.feather")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def xml_file(datapath):
|
||||
return datapath("io", "data", "xml", "books.xml")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def aws_credentials(monkeysession):
|
||||
"""Mocked AWS Credentials for moto."""
|
||||
monkeysession.setenv("AWS_ACCESS_KEY_ID", "testing")
|
||||
monkeysession.setenv("AWS_SECRET_ACCESS_KEY", "testing")
|
||||
monkeysession.setenv("AWS_SECURITY_TOKEN", "testing")
|
||||
monkeysession.setenv("AWS_SESSION_AWS_SESSION_TOKEN", "testing")
|
||||
monkeysession.setenv("AWS_DEFAULT_REGION", "us-east-1")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def moto_server(aws_credentials):
|
||||
# use service container for Linux on GitHub Actions
|
||||
if is_ci_environment() and not (
|
||||
is_platform_mac() or is_platform_arm() or is_platform_windows()
|
||||
):
|
||||
yield "http://localhost:5000"
|
||||
else:
|
||||
moto_server = pytest.importorskip("moto.server")
|
||||
server = moto_server.ThreadedMotoServer(port=0)
|
||||
server.start()
|
||||
host, port = server.get_host_and_port()
|
||||
yield f"http://{host}:{port}"
|
||||
server.stop()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def moto_s3_resource(moto_server):
|
||||
boto3 = pytest.importorskip("boto3")
|
||||
s3 = boto3.resource("s3", endpoint_url=moto_server)
|
||||
return s3
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def s3so(moto_server):
|
||||
return {
|
||||
"client_kwargs": {
|
||||
"endpoint_url": moto_server,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s3_bucket_public(moto_s3_resource):
|
||||
"""
|
||||
Create a public S3 bucket using moto.
|
||||
"""
|
||||
bucket_name = f"pandas-test-{uuid.uuid4()}"
|
||||
bucket = moto_s3_resource.Bucket(bucket_name)
|
||||
bucket.create(ACL="public-read")
|
||||
yield bucket
|
||||
bucket.objects.delete()
|
||||
bucket.delete()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s3_bucket_private(moto_s3_resource):
|
||||
"""
|
||||
Create a private S3 bucket using moto.
|
||||
"""
|
||||
bucket_name = f"cant_get_it-{uuid.uuid4()}"
|
||||
bucket = moto_s3_resource.Bucket(bucket_name)
|
||||
bucket.create(ACL="private")
|
||||
yield bucket
|
||||
bucket.objects.delete()
|
||||
bucket.delete()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s3_bucket_public_with_data(
|
||||
s3_bucket_public, tips_file, jsonl_file, feather_file, xml_file
|
||||
):
|
||||
"""
|
||||
The following datasets
|
||||
are loaded.
|
||||
|
||||
- tips.csv
|
||||
- tips.csv.gz
|
||||
- tips.csv.bz2
|
||||
- items.jsonl
|
||||
"""
|
||||
test_s3_files = [
|
||||
("tips#1.csv", tips_file),
|
||||
("tips.csv", tips_file),
|
||||
("tips.csv.gz", tips_file + ".gz"),
|
||||
("tips.csv.bz2", tips_file + ".bz2"),
|
||||
("items.jsonl", jsonl_file),
|
||||
("simple_dataset.feather", feather_file),
|
||||
("books.xml", xml_file),
|
||||
]
|
||||
for s3_key, file_name in test_s3_files:
|
||||
with open(file_name, "rb") as f:
|
||||
s3_bucket_public.put_object(Key=s3_key, Body=f)
|
||||
return s3_bucket_public
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s3_bucket_private_with_data(
|
||||
s3_bucket_private, tips_file, jsonl_file, feather_file, xml_file
|
||||
):
|
||||
"""
|
||||
The following datasets
|
||||
are loaded.
|
||||
|
||||
- tips.csv
|
||||
- tips.csv.gz
|
||||
- tips.csv.bz2
|
||||
- items.jsonl
|
||||
"""
|
||||
test_s3_files = [
|
||||
("tips#1.csv", tips_file),
|
||||
("tips.csv", tips_file),
|
||||
("tips.csv.gz", tips_file + ".gz"),
|
||||
("tips.csv.bz2", tips_file + ".bz2"),
|
||||
("items.jsonl", jsonl_file),
|
||||
("simple_dataset.feather", feather_file),
|
||||
("books.xml", xml_file),
|
||||
]
|
||||
for s3_key, file_name in test_s3_files:
|
||||
with open(file_name, "rb") as f:
|
||||
s3_bucket_private.put_object(Key=s3_key, Body=f)
|
||||
return s3_bucket_private
|
||||
|
||||
|
||||
_compression_formats_params = [
|
||||
(".no_compress", None),
|
||||
("", None),
|
||||
(".gz", "gzip"),
|
||||
(".GZ", "gzip"),
|
||||
(".bz2", "bz2"),
|
||||
(".BZ2", "bz2"),
|
||||
(".zip", "zip"),
|
||||
(".ZIP", "zip"),
|
||||
(".xz", "xz"),
|
||||
(".XZ", "xz"),
|
||||
pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
|
||||
pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=_compression_formats_params[1:])
|
||||
def compression_format(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=_compression_formats_params)
|
||||
def compression_ext(request):
|
||||
return request.param[0]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,72 @@
|
||||
import functools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
pytest.importorskip("odf")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cd_and_set_engine(monkeypatch, datapath):
|
||||
func = functools.partial(pd.read_excel, engine="odf")
|
||||
monkeypatch.setattr(pd, "read_excel", func)
|
||||
monkeypatch.chdir(datapath("io", "data", "excel"))
|
||||
|
||||
|
||||
def test_read_invalid_types_raises():
|
||||
# the invalid_value_type.ods required manually editing
|
||||
# of the included content.xml file
|
||||
with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"):
|
||||
pd.read_excel("invalid_value_type.ods")
|
||||
|
||||
|
||||
def test_read_writer_table():
|
||||
# Also test reading tables from a text OpenDocument file
|
||||
# (.odt)
|
||||
index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header")
|
||||
expected = pd.DataFrame(
|
||||
[[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]],
|
||||
index=index,
|
||||
columns=["Column 1", "Unnamed: 2", "Column 3"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_newlines_between_xml_elements_table():
|
||||
# GH#45598
|
||||
expected = pd.DataFrame(
|
||||
[[1.0, 4.0, 7], [np.nan, np.nan, 8], [3.0, 6.0, 9]],
|
||||
columns=["Column 1", "Column 2", "Column 3"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("test_newlines.ods")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_unempty_cells():
|
||||
expected = pd.DataFrame(
|
||||
[1, np.nan, 3, np.nan, 5],
|
||||
columns=["Column 1"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("test_unempty_cells.ods")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_cell_annotation():
|
||||
expected = pd.DataFrame(
|
||||
["test", np.nan, "test 3"],
|
||||
columns=["Column 1"],
|
||||
)
|
||||
|
||||
result = pd.read_excel("test_cell_annotation.ods")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@ -0,0 +1,106 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
import re
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from pandas.io.excel import ExcelWriter
|
||||
|
||||
odf = pytest.importorskip("odf")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ext():
|
||||
return ".ods"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_excel(ext, tmp_path):
|
||||
tmp = tmp_path / f"{uuid.uuid4()}{ext}"
|
||||
tmp.touch()
|
||||
return str(tmp)
|
||||
|
||||
|
||||
def test_write_append_mode_raises(tmp_excel):
|
||||
msg = "Append mode is not supported with odf!"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ExcelWriter(tmp_excel, engine="odf", mode="a")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
|
||||
def test_engine_kwargs(tmp_excel, engine_kwargs):
|
||||
# GH 42286
|
||||
# GH 43445
|
||||
# test for error: OpenDocumentSpreadsheet does not accept any arguments
|
||||
if engine_kwargs is not None:
|
||||
error = re.escape(
|
||||
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
|
||||
)
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match=error,
|
||||
):
|
||||
ExcelWriter(tmp_excel, engine="odf", engine_kwargs=engine_kwargs)
|
||||
else:
|
||||
with ExcelWriter(tmp_excel, engine="odf", engine_kwargs=engine_kwargs) as _:
|
||||
pass
|
||||
|
||||
|
||||
def test_book_and_sheets_consistent(tmp_excel):
|
||||
# GH#45687 - Ensure sheets is updated if user modifies book
|
||||
with ExcelWriter(tmp_excel) as writer:
|
||||
assert writer.sheets == {}
|
||||
table = odf.table.Table(name="test_name")
|
||||
writer.book.spreadsheet.addElement(table)
|
||||
assert writer.sheets == {"test_name": table}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["value", "cell_value_type", "cell_value_attribute", "cell_value"],
|
||||
argvalues=[
|
||||
(True, "boolean", "boolean-value", "true"),
|
||||
("test string", "string", "string-value", "test string"),
|
||||
(1, "float", "value", "1"),
|
||||
(1.5, "float", "value", "1.5"),
|
||||
(
|
||||
datetime(2010, 10, 10, 10, 10, 10),
|
||||
"date",
|
||||
"date-value",
|
||||
"2010-10-10T10:10:10",
|
||||
),
|
||||
(date(2010, 10, 10), "date", "date-value", "2010-10-10"),
|
||||
],
|
||||
)
|
||||
def test_cell_value_type(
|
||||
tmp_excel, value, cell_value_type, cell_value_attribute, cell_value
|
||||
):
|
||||
# GH#54994 ODS: cell attributes should follow specification
|
||||
# http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
|
||||
from odf.namespaces import OFFICENS
|
||||
from odf.table import (
|
||||
TableCell,
|
||||
TableRow,
|
||||
)
|
||||
|
||||
table_cell_name = TableCell().qname
|
||||
|
||||
pd.DataFrame([[value]]).to_excel(tmp_excel, header=False, index=False)
|
||||
|
||||
with pd.ExcelFile(tmp_excel) as wb:
|
||||
sheet = wb._reader.get_sheet_by_index(0)
|
||||
sheet_rows = sheet.getElementsByType(TableRow)
|
||||
sheet_cells = [
|
||||
x
|
||||
for x in sheet_rows[0].childNodes
|
||||
if hasattr(x, "qname") and x.qname == table_cell_name
|
||||
]
|
||||
|
||||
cell = sheet_cells[0]
|
||||
assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
|
||||
assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
|
||||
@ -0,0 +1,431 @@
|
||||
import contextlib
|
||||
from pathlib import Path
|
||||
import re
|
||||
import uuid
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import (
|
||||
ExcelWriter,
|
||||
_OpenpyxlWriter,
|
||||
)
|
||||
from pandas.io.excel._openpyxl import OpenpyxlReader
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ext():
|
||||
return ".xlsx"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_excel(ext, tmp_path):
|
||||
tmp = tmp_path / f"{uuid.uuid4()}{ext}"
|
||||
tmp.touch()
|
||||
return str(tmp)
|
||||
|
||||
|
||||
def test_to_excel_styleconverter():
|
||||
from openpyxl import styles
|
||||
|
||||
hstyle = {
|
||||
"font": {"color": "00FF0000", "bold": True},
|
||||
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
|
||||
"alignment": {"horizontal": "center", "vertical": "top"},
|
||||
"fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
|
||||
"number_format": {"format_code": "0.00"},
|
||||
"protection": {"locked": True, "hidden": False},
|
||||
}
|
||||
|
||||
font_color = styles.Color("00FF0000")
|
||||
font = styles.Font(bold=True, color=font_color)
|
||||
side = styles.Side(style=styles.borders.BORDER_THIN)
|
||||
border = styles.Border(top=side, right=side, bottom=side, left=side)
|
||||
alignment = styles.Alignment(horizontal="center", vertical="top")
|
||||
fill_color = styles.Color(rgb="006666FF", tint=0.3)
|
||||
fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
|
||||
|
||||
number_format = "0.00"
|
||||
|
||||
protection = styles.Protection(locked=True, hidden=False)
|
||||
|
||||
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
|
||||
assert kw["font"] == font
|
||||
assert kw["border"] == border
|
||||
assert kw["alignment"] == alignment
|
||||
assert kw["fill"] == fill
|
||||
assert kw["number_format"] == number_format
|
||||
assert kw["protection"] == protection
|
||||
|
||||
|
||||
def test_write_cells_merge_styled(tmp_excel):
|
||||
from pandas.io.formats.excel import ExcelCell
|
||||
|
||||
sheet_name = "merge_styled"
|
||||
|
||||
sty_b1 = {"font": {"color": "00FF0000"}}
|
||||
sty_a2 = {"font": {"color": "0000FF00"}}
|
||||
|
||||
initial_cells = [
|
||||
ExcelCell(col=1, row=0, val=42, style=sty_b1),
|
||||
ExcelCell(col=0, row=1, val=99, style=sty_a2),
|
||||
]
|
||||
|
||||
sty_merged = {"font": {"color": "000000FF", "bold": True}}
|
||||
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
|
||||
openpyxl_sty_merged = sty_kwargs["font"]
|
||||
merge_cells = [
|
||||
ExcelCell(
|
||||
col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
|
||||
)
|
||||
]
|
||||
|
||||
with _OpenpyxlWriter(tmp_excel) as writer:
|
||||
writer._write_cells(initial_cells, sheet_name=sheet_name)
|
||||
writer._write_cells(merge_cells, sheet_name=sheet_name)
|
||||
|
||||
wks = writer.sheets[sheet_name]
|
||||
xcell_b1 = wks["B1"]
|
||||
xcell_a2 = wks["A2"]
|
||||
assert xcell_b1.font == openpyxl_sty_merged
|
||||
assert xcell_a2.font == openpyxl_sty_merged
|
||||
|
||||
|
||||
@pytest.mark.parametrize("iso_dates", [True, False])
|
||||
def test_engine_kwargs_write(tmp_excel, iso_dates):
|
||||
# GH 42286 GH 43445
|
||||
engine_kwargs = {"iso_dates": iso_dates}
|
||||
with ExcelWriter(
|
||||
tmp_excel, engine="openpyxl", engine_kwargs=engine_kwargs
|
||||
) as writer:
|
||||
assert writer.book.iso_dates == iso_dates
|
||||
# ExcelWriter won't allow us to close without writing something
|
||||
DataFrame().to_excel(writer)
|
||||
|
||||
|
||||
def test_engine_kwargs_append_invalid(tmp_excel):
|
||||
# GH 43445
|
||||
# test whether an invalid engine kwargs actually raises
|
||||
DataFrame(["hello", "world"]).to_excel(tmp_excel)
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match=re.escape(
|
||||
"load_workbook() got an unexpected keyword argument 'apple_banana'"
|
||||
),
|
||||
):
|
||||
with ExcelWriter(
|
||||
tmp_excel,
|
||||
engine="openpyxl",
|
||||
mode="a",
|
||||
engine_kwargs={"apple_banana": "fruit"},
|
||||
) as writer:
|
||||
# ExcelWriter needs us to write something to close properly
|
||||
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
|
||||
def test_engine_kwargs_append_data_only(tmp_excel, data_only, expected):
|
||||
# GH 43445
|
||||
# tests whether the data_only engine_kwarg actually works well for
|
||||
# openpyxl's load_workbook
|
||||
DataFrame(["=1+1"]).to_excel(tmp_excel)
|
||||
with ExcelWriter(
|
||||
tmp_excel, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
|
||||
) as writer:
|
||||
assert writer.sheets["Sheet1"]["B2"].value == expected
|
||||
# ExcelWriter needs us to writer something to close properly?
|
||||
DataFrame().to_excel(writer, sheet_name="Sheet2")
|
||||
|
||||
# ensure that data_only also works for reading
|
||||
# and that formulas/values roundtrip
|
||||
assert (
|
||||
pd.read_excel(
|
||||
tmp_excel,
|
||||
sheet_name="Sheet1",
|
||||
engine="openpyxl",
|
||||
engine_kwargs={"data_only": data_only},
|
||||
).iloc[0, 1]
|
||||
== expected
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
|
||||
@pytest.mark.parametrize("kwarg_value", [True, False])
|
||||
def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
|
||||
# GH 55027
|
||||
# test that `read_only` and `data_only` can be passed to
|
||||
# `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
|
||||
filename = datapath("io", "data", "excel", "test1" + ext)
|
||||
with contextlib.closing(
|
||||
OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
|
||||
) as reader:
|
||||
assert getattr(reader.book, kwarg_name) == kwarg_value
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
|
||||
)
|
||||
def test_write_append_mode(tmp_excel, mode, expected):
|
||||
df = DataFrame([1], columns=["baz"])
|
||||
|
||||
wb = openpyxl.Workbook()
|
||||
wb.worksheets[0].title = "foo"
|
||||
wb.worksheets[0]["A1"].value = "foo"
|
||||
wb.create_sheet("bar")
|
||||
wb.worksheets[1]["A1"].value = "bar"
|
||||
wb.save(tmp_excel)
|
||||
|
||||
with ExcelWriter(tmp_excel, engine="openpyxl", mode=mode) as writer:
|
||||
df.to_excel(writer, sheet_name="baz", index=False)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb2:
|
||||
result = [sheet.title for sheet in wb2.worksheets]
|
||||
assert result == expected
|
||||
|
||||
for index, cell_value in enumerate(expected):
|
||||
assert wb2.worksheets[index]["A1"].value == cell_value
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"if_sheet_exists,num_sheets,expected",
|
||||
[
|
||||
("new", 2, ["apple", "banana"]),
|
||||
("replace", 1, ["pear"]),
|
||||
("overlay", 1, ["pear", "banana"]),
|
||||
],
|
||||
)
|
||||
def test_if_sheet_exists_append_modes(tmp_excel, if_sheet_exists, num_sheets, expected):
|
||||
# GH 40230
|
||||
df1 = DataFrame({"fruit": ["apple", "banana"]})
|
||||
df2 = DataFrame({"fruit": ["pear"]})
|
||||
|
||||
df1.to_excel(tmp_excel, engine="openpyxl", sheet_name="foo", index=False)
|
||||
with ExcelWriter(
|
||||
tmp_excel, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
|
||||
) as writer:
|
||||
df2.to_excel(writer, sheet_name="foo", index=False)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
assert len(wb.sheetnames) == num_sheets
|
||||
assert wb.sheetnames[0] == "foo"
|
||||
result = pd.read_excel(wb, "foo", engine="openpyxl")
|
||||
assert list(result["fruit"]) == expected
|
||||
if len(wb.sheetnames) == 2:
|
||||
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
|
||||
tm.assert_frame_equal(result, df2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"startrow, startcol, greeting, goodbye",
|
||||
[
|
||||
(0, 0, ["poop", "world"], ["goodbye", "people"]),
|
||||
(0, 1, ["hello", "world"], ["poop", "people"]),
|
||||
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
|
||||
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
|
||||
],
|
||||
)
|
||||
def test_append_overlay_startrow_startcol(
|
||||
tmp_excel, startrow, startcol, greeting, goodbye
|
||||
):
|
||||
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
|
||||
df2 = DataFrame(["poop"])
|
||||
|
||||
df1.to_excel(tmp_excel, engine="openpyxl", sheet_name="poo", index=False)
|
||||
with ExcelWriter(
|
||||
tmp_excel, engine="openpyxl", mode="a", if_sheet_exists="overlay"
|
||||
) as writer:
|
||||
# use startrow+1 because we don't have a header
|
||||
df2.to_excel(
|
||||
writer,
|
||||
index=False,
|
||||
header=False,
|
||||
startrow=startrow + 1,
|
||||
startcol=startcol,
|
||||
sheet_name="poo",
|
||||
)
|
||||
|
||||
result = pd.read_excel(tmp_excel, sheet_name="poo", engine="openpyxl")
|
||||
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"if_sheet_exists,msg",
|
||||
[
|
||||
(
|
||||
"invalid",
|
||||
"'invalid' is not valid for if_sheet_exists. Valid options "
|
||||
"are 'error', 'new', 'replace' and 'overlay'.",
|
||||
),
|
||||
(
|
||||
"error",
|
||||
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
|
||||
),
|
||||
(
|
||||
None,
|
||||
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_if_sheet_exists_raises(tmp_excel, if_sheet_exists, msg):
|
||||
# GH 40230
|
||||
df = DataFrame({"fruit": ["pear"]})
|
||||
df.to_excel(tmp_excel, sheet_name="foo", engine="openpyxl")
|
||||
with pytest.raises(ValueError, match=re.escape(msg)):
|
||||
with ExcelWriter(
|
||||
tmp_excel, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
|
||||
) as writer:
|
||||
df.to_excel(writer, sheet_name="foo")
|
||||
|
||||
|
||||
def test_to_excel_with_openpyxl_engine(tmp_excel):
|
||||
# GH 29854
|
||||
df1 = DataFrame({"A": np.linspace(1, 10, 10)})
|
||||
df2 = DataFrame({"B": np.linspace(1, 20, 10)})
|
||||
df = pd.concat([df1, df2], axis=1)
|
||||
styled = df.style.map(
|
||||
lambda val: f"color: {'red' if val < 0 else 'black'}"
|
||||
).highlight_max()
|
||||
|
||||
styled.to_excel(tmp_excel, engine="openpyxl")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("read_only", [True, False])
|
||||
def test_read_workbook(datapath, ext, read_only):
|
||||
# GH 39528
|
||||
filename = datapath("io", "data", "excel", "test1" + ext)
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(filename, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl")
|
||||
expected = pd.read_excel(filename)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"header, expected_data",
|
||||
[
|
||||
(
|
||||
0,
|
||||
{
|
||||
"Title": [np.nan, "A", 1, 2, 3],
|
||||
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
|
||||
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
|
||||
},
|
||||
),
|
||||
(2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
|
||||
)
|
||||
# When read_only is None, use read_excel instead of a workbook
|
||||
@pytest.mark.parametrize("read_only", [True, False, None])
|
||||
def test_read_with_bad_dimension(
|
||||
datapath, ext, header, expected_data, filename, read_only
|
||||
):
|
||||
# GH 38956, 39001 - no/incorrect dimension information
|
||||
path = datapath("io", "data", "excel", f"{filename}{ext}")
|
||||
if read_only is None:
|
||||
result = pd.read_excel(path, header=header)
|
||||
else:
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(path, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl", header=header)
|
||||
expected = DataFrame(expected_data)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_mode_file(tmp_excel):
|
||||
# GH 39576
|
||||
df = DataFrame()
|
||||
|
||||
df.to_excel(tmp_excel, engine="openpyxl")
|
||||
|
||||
with ExcelWriter(
|
||||
tmp_excel, mode="a", engine="openpyxl", if_sheet_exists="new"
|
||||
) as writer:
|
||||
df.to_excel(writer)
|
||||
|
||||
# make sure that zip files are not concatenated by making sure that
|
||||
# "docProps/app.xml" only occurs twice in the file
|
||||
data = Path(tmp_excel).read_bytes()
|
||||
first = data.find(b"docProps/app.xml")
|
||||
second = data.find(b"docProps/app.xml", first + 1)
|
||||
third = data.find(b"docProps/app.xml", second + 1)
|
||||
assert second != -1 and third == -1
|
||||
|
||||
|
||||
# When read_only is None, use read_excel instead of a workbook
|
||||
@pytest.mark.parametrize("read_only", [True, False, None])
|
||||
def test_read_with_empty_trailing_rows(datapath, ext, read_only):
|
||||
# GH 39181
|
||||
path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
|
||||
if read_only is None:
|
||||
result = pd.read_excel(path)
|
||||
else:
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(path, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl")
|
||||
expected = DataFrame(
|
||||
{
|
||||
"Title": [np.nan, "A", 1, 2, 3],
|
||||
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
|
||||
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
# When read_only is None, use read_excel instead of a workbook
|
||||
@pytest.mark.parametrize("read_only", [True, False, None])
|
||||
def test_read_empty_with_blank_row(datapath, ext, read_only):
|
||||
# GH 39547 - empty excel file with a row that has no data
|
||||
path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
|
||||
if read_only is None:
|
||||
result = pd.read_excel(path)
|
||||
else:
|
||||
with contextlib.closing(
|
||||
openpyxl.load_workbook(path, read_only=read_only)
|
||||
) as wb:
|
||||
result = pd.read_excel(wb, engine="openpyxl")
|
||||
expected = DataFrame()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_book_and_sheets_consistent(tmp_excel):
|
||||
# GH#45687 - Ensure sheets is updated if user modifies book
|
||||
with ExcelWriter(tmp_excel, engine="openpyxl") as writer:
|
||||
assert writer.sheets == {}
|
||||
sheet = writer.book.create_sheet("test_name", 0)
|
||||
assert writer.sheets == {"test_name": sheet}
|
||||
|
||||
|
||||
def test_ints_spelled_with_decimals(datapath, ext):
|
||||
# GH 46988 - openpyxl returns this sheet with floats
|
||||
path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
|
||||
result = pd.read_excel(path)
|
||||
expected = DataFrame(range(2, 12), columns=[1])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_multiindex_header_no_index_names(datapath, ext):
|
||||
# GH#47487
|
||||
path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
|
||||
result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
|
||||
expected = DataFrame(
|
||||
[[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
|
||||
),
|
||||
index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,356 @@
|
||||
import contextlib
|
||||
import uuid
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Timestamp,
|
||||
period_range,
|
||||
read_excel,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import ExcelWriter
|
||||
from pandas.io.formats.excel import ExcelFormatter
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel
|
||||
# could compute styles and render to excel without jinja2, since there is no
|
||||
# 'template' file, but this needs the import error to delayed until render time.
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_excel(tmp_path):
|
||||
tmp = tmp_path / f"{uuid.uuid4()}.xlsx"
|
||||
tmp.touch()
|
||||
return str(tmp)
|
||||
|
||||
|
||||
def assert_equal_cell_styles(cell1, cell2):
|
||||
# TODO: should find a better way to check equality
|
||||
assert cell1.alignment.__dict__ == cell2.alignment.__dict__
|
||||
assert cell1.border.__dict__ == cell2.border.__dict__
|
||||
assert cell1.fill.__dict__ == cell2.fill.__dict__
|
||||
assert cell1.font.__dict__ == cell2.font.__dict__
|
||||
assert cell1.number_format == cell2.number_format
|
||||
assert cell1.protection.__dict__ == cell2.protection.__dict__
|
||||
|
||||
|
||||
def test_styler_default_values(tmp_excel):
|
||||
# GH 54154
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}])
|
||||
|
||||
with ExcelWriter(tmp_excel, engine="openpyxl") as writer:
|
||||
df.to_excel(writer, sheet_name="custom")
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
# Check font, spacing, indentation
|
||||
assert wb["custom"].cell(1, 1).font.bold is False
|
||||
assert wb["custom"].cell(1, 1).alignment.horizontal is None
|
||||
assert wb["custom"].cell(1, 1).alignment.vertical is None
|
||||
|
||||
# Check border
|
||||
assert wb["custom"].cell(1, 1).border.bottom.color is None
|
||||
assert wb["custom"].cell(1, 1).border.top.color is None
|
||||
assert wb["custom"].cell(1, 1).border.left.color is None
|
||||
assert wb["custom"].cell(1, 1).border.right.color is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"])
|
||||
def test_styler_to_excel_unstyled(engine, tmp_excel):
|
||||
# compare DataFrame.to_excel and Styler.to_excel when no styles applied
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
|
||||
with ExcelWriter(tmp_excel, engine=engine) as writer:
|
||||
df.to_excel(writer, sheet_name="dataframe")
|
||||
df.style.to_excel(writer, sheet_name="unstyled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
for col1, col2 in zip(
|
||||
wb["dataframe"].columns,
|
||||
wb["unstyled"].columns,
|
||||
strict=True,
|
||||
):
|
||||
assert len(col1) == len(col2)
|
||||
for cell1, cell2 in zip(col1, col2, strict=True):
|
||||
assert cell1.value == cell2.value
|
||||
assert_equal_cell_styles(cell1, cell2)
|
||||
|
||||
|
||||
shared_style_params = [
|
||||
(
|
||||
"background-color: #111222",
|
||||
["fill", "fgColor", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
(
|
||||
"color: #111222",
|
||||
["font", "color", "value"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("font-family: Arial;", ["font", "name"], "arial"),
|
||||
("font-weight: bold;", ["font", "b"], True),
|
||||
("font-style: italic;", ["font", "i"], True),
|
||||
("text-decoration: underline;", ["font", "u"], "single"),
|
||||
("number-format: $??,???.00;", ["number_format"], "$??,???.00"),
|
||||
("text-align: left;", ["alignment", "horizontal"], "left"),
|
||||
(
|
||||
"vertical-align: bottom;",
|
||||
["alignment", "vertical"],
|
||||
{"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails
|
||||
),
|
||||
("vertical-align: middle;", ["alignment", "vertical"], "center"),
|
||||
# Border widths
|
||||
("border-left: 2pt solid red", ["border", "left", "style"], "medium"),
|
||||
("border-left: 1pt dotted red", ["border", "left", "style"], "dotted"),
|
||||
("border-left: 2pt dotted red", ["border", "left", "style"], "mediumDashDotDot"),
|
||||
("border-left: 1pt dashed red", ["border", "left", "style"], "dashed"),
|
||||
("border-left: 2pt dashed red", ["border", "left", "style"], "mediumDashed"),
|
||||
("border-left: 1pt solid red", ["border", "left", "style"], "thin"),
|
||||
("border-left: 3pt solid red", ["border", "left", "style"], "thick"),
|
||||
# Border expansion
|
||||
(
|
||||
"border-left: 2pt solid #111222",
|
||||
["border", "left", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "top", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "top", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "right", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "right", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "bottom", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "bottom", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
("border: 1pt solid red", ["border", "left", "style"], "thin"),
|
||||
(
|
||||
"border: 1pt solid #111222",
|
||||
["border", "left", "color", "rgb"],
|
||||
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
|
||||
),
|
||||
# Border styles
|
||||
(
|
||||
"border-left-style: hair; border-left-color: black",
|
||||
["border", "left", "style"],
|
||||
"hair",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_styler_custom_style(tmp_excel):
|
||||
# GH 54154
|
||||
css_style = "background-color: #111222"
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
df = DataFrame([{"A": 1, "B": 2}, {"A": 1, "B": 2}])
|
||||
|
||||
with ExcelWriter(tmp_excel, engine="openpyxl") as writer:
|
||||
styler = df.style.map(lambda x: css_style)
|
||||
styler.to_excel(writer, sheet_name="custom", index=False)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
# Check font, spacing, indentation
|
||||
assert wb["custom"].cell(1, 1).font.bold is False
|
||||
assert wb["custom"].cell(1, 1).alignment.horizontal is None
|
||||
assert wb["custom"].cell(1, 1).alignment.vertical is None
|
||||
|
||||
# Check border
|
||||
assert wb["custom"].cell(1, 1).border.bottom.color is None
|
||||
assert wb["custom"].cell(1, 1).border.top.color is None
|
||||
assert wb["custom"].cell(1, 1).border.left.color is None
|
||||
assert wb["custom"].cell(1, 1).border.right.color is None
|
||||
|
||||
# Check background color
|
||||
assert wb["custom"].cell(2, 1).fill.fgColor.index == "00111222"
|
||||
assert wb["custom"].cell(3, 1).fill.fgColor.index == "00111222"
|
||||
assert wb["custom"].cell(2, 2).fill.fgColor.index == "00111222"
|
||||
assert wb["custom"].cell(3, 2).fill.fgColor.index == "00111222"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"])
|
||||
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
|
||||
def test_styler_to_excel_basic(engine, css, attrs, expected, tmp_excel):
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
styler = df.style.map(lambda x: css)
|
||||
|
||||
with ExcelWriter(tmp_excel, engine=engine) as writer:
|
||||
df.to_excel(writer, sheet_name="dataframe")
|
||||
styler.to_excel(writer, sheet_name="styled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
# test unstyled data cell does not have expected styles
|
||||
# test styled cell has expected styles
|
||||
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
|
||||
for attr in attrs:
|
||||
u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
|
||||
|
||||
if isinstance(expected, dict):
|
||||
assert u_cell is None or u_cell != expected[engine]
|
||||
assert s_cell == expected[engine]
|
||||
else:
|
||||
assert u_cell is None or u_cell != expected
|
||||
assert s_cell == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"])
|
||||
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
|
||||
def test_styler_to_excel_basic_indexes(engine, css, attrs, expected, tmp_excel):
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
|
||||
styler = df.style
|
||||
styler.map_index(lambda x: css, axis=0)
|
||||
styler.map_index(lambda x: css, axis=1)
|
||||
|
||||
null_styler = df.style
|
||||
null_styler.map(lambda x: "null: css;")
|
||||
null_styler.map_index(lambda x: "null: css;", axis=0)
|
||||
null_styler.map_index(lambda x: "null: css;", axis=1)
|
||||
|
||||
with ExcelWriter(tmp_excel, engine=engine) as writer:
|
||||
null_styler.to_excel(writer, sheet_name="null_styled")
|
||||
styler.to_excel(writer, sheet_name="styled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
# test null styled index cells does not have expected styles
|
||||
# test styled cell has expected styles
|
||||
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
|
||||
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
|
||||
for attr in attrs:
|
||||
ui_cell, si_cell = getattr(ui_cell, attr, None), getattr(si_cell, attr)
|
||||
uc_cell, sc_cell = getattr(uc_cell, attr, None), getattr(sc_cell, attr)
|
||||
|
||||
if isinstance(expected, dict):
|
||||
assert ui_cell is None or ui_cell != expected[engine]
|
||||
assert si_cell == expected[engine]
|
||||
assert uc_cell is None or uc_cell != expected[engine]
|
||||
assert sc_cell == expected[engine]
|
||||
else:
|
||||
assert ui_cell is None or ui_cell != expected
|
||||
assert si_cell == expected
|
||||
assert uc_cell is None or uc_cell != expected
|
||||
assert sc_cell == expected
|
||||
|
||||
|
||||
# From https://openpyxl.readthedocs.io/en/stable/api/openpyxl.styles.borders.html
|
||||
# Note: Leaving behavior of "width"-type styles undefined; user should use border-width
|
||||
# instead
|
||||
excel_border_styles = [
|
||||
# "thin",
|
||||
"dashed",
|
||||
"mediumDashDot",
|
||||
"dashDotDot",
|
||||
"hair",
|
||||
"dotted",
|
||||
"mediumDashDotDot",
|
||||
# "medium",
|
||||
"double",
|
||||
"dashDot",
|
||||
"slantDashDot",
|
||||
# "thick",
|
||||
"mediumDashed",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"])
|
||||
@pytest.mark.parametrize("border_style", excel_border_styles)
|
||||
def test_styler_to_excel_border_style(engine, border_style, tmp_excel):
|
||||
css = f"border-left: {border_style} black thin"
|
||||
attrs = ["border", "left", "style"]
|
||||
expected = border_style
|
||||
|
||||
pytest.importorskip(engine)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
styler = df.style.map(lambda x: css)
|
||||
|
||||
with ExcelWriter(tmp_excel, engine=engine) as writer:
|
||||
df.to_excel(writer, sheet_name="dataframe")
|
||||
styler.to_excel(writer, sheet_name="styled")
|
||||
|
||||
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
# test unstyled data cell does not have expected styles
|
||||
# test styled cell has expected styles
|
||||
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
|
||||
for attr in attrs:
|
||||
u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
|
||||
|
||||
if isinstance(expected, dict):
|
||||
assert u_cell is None or u_cell != expected[engine]
|
||||
assert s_cell == expected[engine]
|
||||
else:
|
||||
assert u_cell is None or u_cell != expected
|
||||
assert s_cell == expected
|
||||
|
||||
|
||||
def test_styler_custom_converter(tmp_excel):
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
|
||||
def custom_converter(css):
|
||||
return {"font": {"color": {"rgb": "111222"}}}
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
|
||||
styler = df.style.map(lambda x: "color: #888999")
|
||||
with ExcelWriter(tmp_excel, engine="openpyxl") as writer:
|
||||
ExcelFormatter(styler, style_converter=custom_converter).write(
|
||||
writer, sheet_name="custom"
|
||||
)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb:
|
||||
assert wb["custom"].cell(2, 2).font.color.value == "00111222"
|
||||
|
||||
|
||||
@pytest.mark.single_cpu
|
||||
@td.skip_if_not_us_locale
|
||||
def test_styler_to_s3(s3_bucket_public, s3so):
|
||||
# GH#46381
|
||||
mock_bucket_name = s3_bucket_public.name
|
||||
target_file = f"{uuid.uuid4()}.xlsx"
|
||||
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
|
||||
styler = df.style.set_sticky(axis="index")
|
||||
uri = f"s3://{mock_bucket_name}/{target_file}"
|
||||
styler.to_excel(uri, storage_options=s3so)
|
||||
result = read_excel(uri, index_col=0, storage_options=s3so)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("merge_cells", [True, False, "columns"])
|
||||
def test_format_hierarchical_rows_periodindex(merge_cells):
|
||||
# GH#60099
|
||||
df = DataFrame(
|
||||
{"A": [1, 2]},
|
||||
index=MultiIndex.from_arrays(
|
||||
[
|
||||
period_range(start="2006-10-06", end="2006-10-07", freq="D"),
|
||||
["X", "Y"],
|
||||
],
|
||||
names=["date", "category"],
|
||||
),
|
||||
)
|
||||
formatter = ExcelFormatter(df, merge_cells=merge_cells)
|
||||
formatted_cells = formatter._format_hierarchical_rows()
|
||||
|
||||
for cell in formatted_cells:
|
||||
if cell.row != 0 and cell.col == 0:
|
||||
assert isinstance(cell.val, Timestamp), (
|
||||
"Period should be converted to Timestamp"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.excel import ExcelFile
|
||||
from pandas.io.excel._base import inspect_excel_format
|
||||
|
||||
xlrd = pytest.importorskip("xlrd")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def read_ext_xlrd():
|
||||
"""
|
||||
Valid extensions for reading Excel files with xlrd.
|
||||
|
||||
Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm
|
||||
"""
|
||||
return ".xls"
|
||||
|
||||
|
||||
def test_read_xlrd_book(read_ext_xlrd, datapath):
|
||||
engine = "xlrd"
|
||||
sheet_name = "Sheet1"
|
||||
pth = datapath("io", "data", "excel", "test1.xls")
|
||||
with xlrd.open_workbook(pth) as book:
|
||||
with ExcelFile(book, engine=engine) as xl:
|
||||
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
|
||||
|
||||
expected = pd.read_excel(
|
||||
book, sheet_name=sheet_name, engine=engine, index_col=0
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_xlsx_fails(datapath):
|
||||
# GH 29375
|
||||
from xlrd.biffh import XLRDError
|
||||
|
||||
path = datapath("io", "data", "excel", "test1.xlsx")
|
||||
with pytest.raises(XLRDError, match="Excel xlsx file; not supported"):
|
||||
pd.read_excel(path, engine="xlrd")
|
||||
|
||||
|
||||
def test_nan_in_xls(datapath):
|
||||
# GH 54564
|
||||
path = datapath("io", "data", "excel", "test6.xls")
|
||||
|
||||
expected = pd.DataFrame({0: np.r_[0, 2].astype("int64"), 1: np.r_[1, np.nan]})
|
||||
|
||||
result = pd.read_excel(path, header=None)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_header",
|
||||
[
|
||||
b"\x09\x00\x04\x00\x07\x00\x10\x00",
|
||||
b"\x09\x02\x06\x00\x00\x00\x10\x00",
|
||||
b"\x09\x04\x06\x00\x00\x00\x10\x00",
|
||||
b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
|
||||
],
|
||||
)
|
||||
def test_read_old_xls_files(file_header):
|
||||
# GH 41226
|
||||
f = io.BytesIO(file_header)
|
||||
assert inspect_excel_format(f) == "xls"
|
||||
@ -0,0 +1,86 @@
|
||||
import contextlib
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
from pandas.io.excel import ExcelWriter
|
||||
|
||||
xlsxwriter = pytest.importorskip("xlsxwriter")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ext():
|
||||
return ".xlsx"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_excel(ext, tmp_path):
|
||||
tmp = tmp_path / f"{uuid.uuid4()}{ext}"
|
||||
tmp.touch()
|
||||
return str(tmp)
|
||||
|
||||
|
||||
def test_column_format(tmp_excel):
|
||||
# Test that column formats are applied to cells. Test for issue #9167.
|
||||
# Applicable to xlsxwriter only.
|
||||
openpyxl = pytest.importorskip("openpyxl")
|
||||
|
||||
frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})
|
||||
|
||||
with ExcelWriter(tmp_excel) as writer:
|
||||
frame.to_excel(writer)
|
||||
|
||||
# Add a number format to col B and ensure it is applied to cells.
|
||||
num_format = "#,##0"
|
||||
write_workbook = writer.book
|
||||
write_worksheet = write_workbook.worksheets()[0]
|
||||
col_format = write_workbook.add_format({"num_format": num_format})
|
||||
write_worksheet.set_column("B:B", None, col_format)
|
||||
|
||||
with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as read_workbook:
|
||||
try:
|
||||
read_worksheet = read_workbook["Sheet1"]
|
||||
except TypeError:
|
||||
# compat
|
||||
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
|
||||
|
||||
# Get the number format from the cell.
|
||||
try:
|
||||
cell = read_worksheet["B2"]
|
||||
except TypeError:
|
||||
# compat
|
||||
cell = read_worksheet.cell("B2")
|
||||
|
||||
try:
|
||||
read_num_format = cell.number_format
|
||||
except AttributeError:
|
||||
read_num_format = cell.style.number_format._format_code
|
||||
|
||||
assert read_num_format == num_format
|
||||
|
||||
|
||||
def test_write_append_mode_raises(tmp_excel):
|
||||
msg = "Append mode is not supported with xlsxwriter!"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ExcelWriter(tmp_excel, engine="xlsxwriter", mode="a")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
|
||||
def test_engine_kwargs(tmp_excel, nan_inf_to_errors):
|
||||
# GH 42286
|
||||
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
|
||||
with ExcelWriter(
|
||||
tmp_excel, engine="xlsxwriter", engine_kwargs=engine_kwargs
|
||||
) as writer:
|
||||
assert writer.book.nan_inf_to_errors == nan_inf_to_errors
|
||||
|
||||
|
||||
def test_book_and_sheets_consistent(tmp_excel):
|
||||
# GH#45687 - Ensure sheets is updated if user modifies book
|
||||
with ExcelWriter(tmp_excel, engine="xlsxwriter") as writer:
|
||||
assert writer.sheets == {}
|
||||
sheet = writer.book.add_worksheet("test_name")
|
||||
assert writer.sheets == {"test_name": sheet}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,360 @@
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
read_csv,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
|
||||
|
||||
def bar_grad(a=None, b=None, c=None, d=None):
|
||||
"""Used in multiple tests to simplify formatting of expected result"""
|
||||
ret = [("width", "10em")]
|
||||
if all(x is None for x in [a, b, c, d]):
|
||||
return ret
|
||||
return [
|
||||
*ret,
|
||||
(
|
||||
"background",
|
||||
f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def no_bar():
|
||||
return bar_grad()
|
||||
|
||||
|
||||
def bar_to(x, color="#d65f5f"):
|
||||
return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%")
|
||||
|
||||
|
||||
def bar_from_to(x, y, color="#d65f5f"):
|
||||
return bar_grad(
|
||||
f" transparent {x:.1f}%",
|
||||
f" {color} {x:.1f}%",
|
||||
f" {color} {y:.1f}%",
|
||||
f" transparent {y:.1f}%",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_pos():
|
||||
return DataFrame([[1], [2], [3]])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_neg():
|
||||
return DataFrame([[-1], [-2], [-3]])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_mix():
|
||||
return DataFrame([[-3], [1], [2]])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"align, exp",
|
||||
[
|
||||
("left", [no_bar(), bar_to(50), bar_to(100)]),
|
||||
("right", [bar_to(100), bar_from_to(50, 100), no_bar()]),
|
||||
("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]),
|
||||
("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]),
|
||||
("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]),
|
||||
(2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
|
||||
(np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
|
||||
],
|
||||
)
|
||||
def test_align_positive_cases(df_pos, align, exp):
|
||||
# test different align cases for all positive values
|
||||
result = df_pos.style.bar(align=align)._compute().ctx
|
||||
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"align, exp",
|
||||
[
|
||||
("left", [bar_to(100), bar_to(50), no_bar()]),
|
||||
("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]),
|
||||
("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]),
|
||||
("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]),
|
||||
("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]),
|
||||
(-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
|
||||
(np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
|
||||
],
|
||||
)
|
||||
def test_align_negative_cases(df_neg, align, exp):
|
||||
# test different align cases for all negative values
|
||||
result = df_neg.style.bar(align=align)._compute().ctx
|
||||
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"align, exp",
|
||||
[
|
||||
("left", [no_bar(), bar_to(80), bar_to(100)]),
|
||||
("right", [bar_to(100), bar_from_to(80, 100), no_bar()]),
|
||||
("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]),
|
||||
("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
|
||||
("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
|
||||
(-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
|
||||
(np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("nans", [True, False])
|
||||
def test_align_mixed_cases(df_mix, align, exp, nans):
|
||||
# test different align cases for mixed positive and negative values
|
||||
# also test no impact of NaNs and no_bar
|
||||
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
|
||||
if nans:
|
||||
df_mix.loc[3, :] = np.nan
|
||||
expected.update({(3, 0): no_bar()})
|
||||
result = df_mix.style.bar(align=align)._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"align, exp",
|
||||
[
|
||||
(
|
||||
"left",
|
||||
{
|
||||
"index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]],
|
||||
"columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]],
|
||||
"none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]],
|
||||
},
|
||||
),
|
||||
(
|
||||
"mid",
|
||||
{
|
||||
"index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]],
|
||||
"columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]],
|
||||
"none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]],
|
||||
},
|
||||
),
|
||||
(
|
||||
"zero",
|
||||
{
|
||||
"index": [
|
||||
[bar_from_to(50, 66.66), bar_from_to(50, 75)],
|
||||
[bar_from_to(50, 100), bar_from_to(50, 100)],
|
||||
],
|
||||
"columns": [
|
||||
[bar_from_to(50, 75), bar_from_to(50, 100)],
|
||||
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
|
||||
],
|
||||
"none": [
|
||||
[bar_from_to(50, 62.5), bar_from_to(50, 75)],
|
||||
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
2,
|
||||
{
|
||||
"index": [
|
||||
[bar_to(50), no_bar()],
|
||||
[bar_from_to(50, 100), bar_from_to(50, 100)],
|
||||
],
|
||||
"columns": [
|
||||
[bar_to(50), no_bar()],
|
||||
[bar_from_to(50, 75), bar_from_to(50, 100)],
|
||||
],
|
||||
"none": [
|
||||
[bar_from_to(25, 50), no_bar()],
|
||||
[bar_from_to(50, 75), bar_from_to(50, 100)],
|
||||
],
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("axis", ["index", "columns", "none"])
|
||||
def test_align_axis(align, exp, axis):
|
||||
# test all axis combinations with positive values and different aligns
|
||||
data = DataFrame([[1, 2], [3, 4]])
|
||||
result = (
|
||||
data.style.bar(align=align, axis=None if axis == "none" else axis)
|
||||
._compute()
|
||||
.ctx
|
||||
)
|
||||
expected = {
|
||||
(0, 0): exp[axis][0][0],
|
||||
(0, 1): exp[axis][0][1],
|
||||
(1, 0): exp[axis][1][0],
|
||||
(1, 1): exp[axis][1][1],
|
||||
}
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, vmin, vmax",
|
||||
[
|
||||
("positive", 1.5, 2.5),
|
||||
("negative", -2.5, -1.5),
|
||||
("mixed", -2.5, 1.5),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
|
||||
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
|
||||
def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
|
||||
# test that clipping occurs if any vmin > data_values or vmax < data_values
|
||||
if align == "mid": # mid acts as left or right in each case
|
||||
if values == "positive":
|
||||
align = "left"
|
||||
elif values == "negative":
|
||||
align = "right"
|
||||
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
|
||||
vmin = None if nullify == "vmin" else vmin
|
||||
vmax = None if nullify == "vmax" else vmax
|
||||
|
||||
clip_df = df.where(df <= (vmax if vmax else 999), other=vmax)
|
||||
clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin)
|
||||
|
||||
result = (
|
||||
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
|
||||
._compute()
|
||||
.ctx
|
||||
)
|
||||
expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, vmin, vmax",
|
||||
[
|
||||
("positive", 0.5, 4.5),
|
||||
("negative", -4.5, -0.5),
|
||||
("mixed", -4.5, 4.5),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
|
||||
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
|
||||
def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
|
||||
# test that widening occurs if any vmax > data_values or vmin < data_values
|
||||
if align == "mid": # mid acts as left or right in each case
|
||||
if values == "positive":
|
||||
align = "left"
|
||||
elif values == "negative":
|
||||
align = "right"
|
||||
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
|
||||
vmin = None if nullify == "vmin" else vmin
|
||||
vmax = None if nullify == "vmax" else vmax
|
||||
|
||||
expand_df = df.copy()
|
||||
expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax
|
||||
|
||||
result = (
|
||||
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
|
||||
._compute()
|
||||
.ctx
|
||||
)
|
||||
expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx
|
||||
assert result.items() <= expected.items()
|
||||
|
||||
|
||||
def test_numerics():
|
||||
# test data is pre-selected for numeric values
|
||||
data = DataFrame([[1, "a"], [2, "b"]])
|
||||
result = data.style.bar()._compute().ctx
|
||||
assert (0, 1) not in result
|
||||
assert (1, 1) not in result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"align, exp",
|
||||
[
|
||||
("left", [no_bar(), bar_to(100, "green")]),
|
||||
("right", [bar_to(100, "red"), no_bar()]),
|
||||
("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]),
|
||||
("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]),
|
||||
],
|
||||
)
|
||||
def test_colors_mixed(align, exp):
|
||||
data = DataFrame([[-1], [3]])
|
||||
result = data.style.bar(align=align, color=["red", "green"])._compute().ctx
|
||||
assert result == {(0, 0): exp[0], (1, 0): exp[1]}
|
||||
|
||||
|
||||
def test_bar_align_height():
|
||||
# test when keyword height is used 'no-repeat center' and 'background-size' present
|
||||
data = DataFrame([[1], [2]])
|
||||
result = data.style.bar(align="left", height=50)._compute().ctx
|
||||
bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center"
|
||||
expected = {
|
||||
(0, 0): [("width", "10em")],
|
||||
(1, 0): [
|
||||
("width", "10em"),
|
||||
("background", bg_s),
|
||||
("background-size", "100% 50.0%"),
|
||||
],
|
||||
}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_bar_value_error_raises():
|
||||
df = DataFrame({"A": [-100, -60, -30, -20]})
|
||||
|
||||
msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html()
|
||||
|
||||
msg = r"`width` must be a value in \[0, 100\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(width=200).to_html()
|
||||
|
||||
msg = r"`height` must be a value in \[0, 100\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(height=200).to_html()
|
||||
|
||||
|
||||
def test_bar_color_and_cmap_error_raises():
|
||||
df = DataFrame({"A": [1, 2, 3, 4]})
|
||||
msg = "`color` and `cmap` cannot both be given"
|
||||
# Test that providing both color and cmap raises a ValueError
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(color="#d65f5f", cmap="viridis").to_html()
|
||||
|
||||
|
||||
def test_bar_invalid_color_type_error_raises():
|
||||
df = DataFrame({"A": [1, 2, 3, 4]})
|
||||
msg = (
|
||||
r"`color` must be string or list or tuple of 2 strings,"
|
||||
r"\(eg: color=\['#d65f5f', '#5fba7d'\]\)"
|
||||
)
|
||||
# Test that providing an invalid color type raises a ValueError
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(color=123).to_html()
|
||||
|
||||
# Test that providing a color list with more than two elements raises a ValueError
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(color=["#d65f5f", "#5fba7d", "#abcdef"]).to_html()
|
||||
|
||||
|
||||
def test_styler_bar_with_NA_values():
|
||||
df1 = DataFrame({"A": [1, 2, NA, 4]})
|
||||
df2 = DataFrame([[NA, NA], [NA, NA]])
|
||||
expected_substring = "style type="
|
||||
html_output1 = df1.style.bar(subset="A").to_html()
|
||||
html_output2 = df2.style.bar(align="left", axis=None).to_html()
|
||||
assert expected_substring in html_output1
|
||||
assert expected_substring in html_output2
|
||||
|
||||
|
||||
def test_style_bar_with_pyarrow_NA_values():
|
||||
pytest.importorskip("pyarrow")
|
||||
data = """name,age,test1,test2,teacher
|
||||
Adam,15,95.0,80,Ashby
|
||||
Bob,16,81.0,82,Ashby
|
||||
Dave,16,89.0,84,Jones
|
||||
Fred,15,,88,Jones"""
|
||||
df = read_csv(io.StringIO(data), dtype_backend="pyarrow")
|
||||
expected_substring = "style type="
|
||||
html_output = df.style.bar(subset="test1").to_html()
|
||||
assert expected_substring in html_output
|
||||
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
|
||||
jinja2 = pytest.importorskip("jinja2")
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
data=[[0, -0.609], [1, -1.228]],
|
||||
columns=["A", "B"],
|
||||
index=["x", "y"],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0)
|
||||
|
||||
|
||||
def test_concat_bad_columns(styler):
|
||||
msg = "`other.data` must have same columns as `Styler.data"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler.concat(DataFrame([[1, 2]]).style)
|
||||
|
||||
|
||||
def test_concat_bad_type(styler):
|
||||
msg = "`other` must be of type `Styler`"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
styler.concat(DataFrame([[1, 2]]))
|
||||
|
||||
|
||||
def test_concat_bad_index_levels(styler, df):
|
||||
df = df.copy()
|
||||
df.index = MultiIndex.from_tuples([(0, 0), (1, 1)])
|
||||
msg = "number of index levels must be same in `other`"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler.concat(df.style)
|
||||
@ -0,0 +1,661 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Timestamp,
|
||||
option_context,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
from pandas.io.formats.style import Styler
|
||||
from pandas.io.formats.style_render import _str_escape
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
data=[[0, -0.609], [1, -1.228]],
|
||||
columns=["A", "B"],
|
||||
index=["x", "y"],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_multi():
|
||||
return (
|
||||
DataFrame(
|
||||
data=np.arange(16).reshape(4, 4),
|
||||
columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
|
||||
index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]),
|
||||
)
|
||||
.rename_axis(["0_0", "0_1"], axis=0)
|
||||
.rename_axis(["1_0", "1_1"], axis=1)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler_multi(df_multi):
|
||||
return Styler(df_multi, uuid_len=0)
|
||||
|
||||
|
||||
def test_display_format(styler):
|
||||
ctx = styler.format("{:0.1f}")._translate(True, True)
|
||||
assert all(["display_value" in c for c in row] for row in ctx["body"])
|
||||
assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"])
|
||||
assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
@pytest.mark.parametrize("columns", [True, False])
|
||||
def test_display_format_index(styler, index, columns):
|
||||
exp_index = ["x", "y"]
|
||||
if index:
|
||||
styler.format_index(lambda v: v.upper(), axis=0) # test callable
|
||||
exp_index = ["X", "Y"]
|
||||
|
||||
exp_columns = ["A", "B"]
|
||||
if columns:
|
||||
styler.format_index("*{}*", axis=1) # test string
|
||||
exp_columns = ["*A*", "*B*"]
|
||||
|
||||
ctx = styler._translate(True, True)
|
||||
|
||||
for r, row in enumerate(ctx["body"]):
|
||||
assert row[0]["display_value"] == exp_index[r]
|
||||
|
||||
for c, col in enumerate(ctx["head"][1:]):
|
||||
assert col["display_value"] == exp_columns[c]
|
||||
|
||||
|
||||
def test_format_dict(styler):
|
||||
ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "0.0"
|
||||
assert ctx["body"][0][2]["display_value"] == "-60.90%"
|
||||
|
||||
|
||||
def test_format_index_dict(styler):
|
||||
ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True)
|
||||
for i, val in enumerate(["X", "Y"]):
|
||||
assert ctx["body"][i][0]["display_value"] == val
|
||||
|
||||
|
||||
def test_format_string(styler):
|
||||
ctx = styler.format("{:.2f}")._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "0.00"
|
||||
assert ctx["body"][0][2]["display_value"] == "-0.61"
|
||||
assert ctx["body"][1][1]["display_value"] == "1.00"
|
||||
assert ctx["body"][1][2]["display_value"] == "-1.23"
|
||||
|
||||
|
||||
def test_format_callable(styler):
|
||||
ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "pos"
|
||||
assert ctx["body"][0][2]["display_value"] == "neg"
|
||||
assert ctx["body"][1][1]["display_value"] == "pos"
|
||||
assert ctx["body"][1][2]["display_value"] == "neg"
|
||||
|
||||
|
||||
def test_format_with_na_rep():
|
||||
# GH 21527 28358
|
||||
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
|
||||
|
||||
ctx = df.style.format(None, na_rep="-")._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "-"
|
||||
assert ctx["body"][0][2]["display_value"] == "-"
|
||||
|
||||
ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "-"
|
||||
assert ctx["body"][0][2]["display_value"] == "-"
|
||||
assert ctx["body"][1][1]["display_value"] == "110.00%"
|
||||
assert ctx["body"][1][2]["display_value"] == "120.00%"
|
||||
|
||||
ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True)
|
||||
assert ctx["body"][0][2]["display_value"] == "-"
|
||||
assert ctx["body"][1][2]["display_value"] == "120.00%"
|
||||
|
||||
|
||||
def test_format_index_with_na_rep():
|
||||
df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA])
|
||||
ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True)
|
||||
assert ctx["head"][0][1]["display_value"] == "A"
|
||||
for i in [2, 3, 4, 5]:
|
||||
assert ctx["head"][0][i]["display_value"] == "--"
|
||||
|
||||
|
||||
def test_format_non_numeric_na():
|
||||
# GH 21527 28358
|
||||
df = DataFrame(
|
||||
{
|
||||
"object": [None, np.nan, "foo"],
|
||||
"datetime": [None, NaT, Timestamp("20120101")],
|
||||
}
|
||||
)
|
||||
ctx = df.style.format(None, na_rep="-")._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "-"
|
||||
assert ctx["body"][0][2]["display_value"] == "-"
|
||||
assert ctx["body"][1][1]["display_value"] == "-"
|
||||
assert ctx["body"][1][2]["display_value"] == "-"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, attr, kwargs",
|
||||
[
|
||||
("format", "_display_funcs", {}),
|
||||
("format_index", "_display_funcs_index", {"axis": 0}),
|
||||
("format_index", "_display_funcs_columns", {"axis": 1}),
|
||||
],
|
||||
)
|
||||
def test_format_clear(styler, func, attr, kwargs):
|
||||
assert (0, 0) not in getattr(styler, attr) # using default
|
||||
getattr(styler, func)("{:.2f}", **kwargs)
|
||||
assert (0, 0) in getattr(styler, attr) # formatter is specified
|
||||
getattr(styler, func)(**kwargs)
|
||||
assert (0, 0) not in getattr(styler, attr) # formatter cleared to default
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"escape, exp",
|
||||
[
|
||||
("html", "<>&"%$#_{}~^\\~ ^ \\ "),
|
||||
(
|
||||
"latex",
|
||||
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
|
||||
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
|
||||
"\\textbackslash \\space ",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_format_escape_html(escape, exp):
|
||||
chars = '<>&"%$#_{}~^\\~ ^ \\ '
|
||||
df = DataFrame([[chars]])
|
||||
|
||||
s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
|
||||
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
|
||||
assert expected in s.to_html()
|
||||
|
||||
# only the value should be escaped before passing to the formatter
|
||||
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
|
||||
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
|
||||
assert expected in s.to_html()
|
||||
|
||||
# also test format_index()
|
||||
styler = Styler(DataFrame(columns=[chars]), uuid_len=0)
|
||||
styler.format_index("&{0}&", escape=None, axis=1)
|
||||
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&"
|
||||
styler.format_index("&{0}&", escape=escape, axis=1)
|
||||
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"chars, expected",
|
||||
[
|
||||
(
|
||||
r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
|
||||
"".join(
|
||||
[
|
||||
r"$ \$&%#_{}~^\ $ ",
|
||||
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
|
||||
r"\textbackslash \space \$",
|
||||
]
|
||||
),
|
||||
),
|
||||
(
|
||||
r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
|
||||
"".join(
|
||||
[
|
||||
r"\( &%#_{}~^\ \) ",
|
||||
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
|
||||
r"\textbackslash \space \textbackslash (",
|
||||
]
|
||||
),
|
||||
),
|
||||
(
|
||||
r"$\&%#_{}^\$",
|
||||
r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$",
|
||||
),
|
||||
(
|
||||
r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
|
||||
"".join(
|
||||
[
|
||||
r"$ \frac{1}{2} $",
|
||||
r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
|
||||
]
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_format_escape_latex_math(chars, expected):
|
||||
# GH 51903
|
||||
# latex-math escape works for each DataFrame cell separately. If we have
|
||||
# a combination of dollar signs and brackets, the dollar sign would apply.
|
||||
df = DataFrame([[chars]])
|
||||
s = df.style.format("{0}", escape="latex-math")
|
||||
assert s._translate(True, True)["body"][0][1]["display_value"] == expected
|
||||
|
||||
|
||||
def test_format_escape_na_rep():
|
||||
# tests the na_rep is not escaped
|
||||
df = DataFrame([['<>&"', None]])
|
||||
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
|
||||
ex = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
|
||||
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
|
||||
assert ex in s.to_html()
|
||||
assert expected2 in s.to_html()
|
||||
|
||||
# also test for format_index()
|
||||
df = DataFrame(columns=['<>&"', None])
|
||||
styler = Styler(df, uuid_len=0)
|
||||
styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1)
|
||||
ctx = styler._translate(True, True)
|
||||
assert ctx["head"][0][1]["display_value"] == "X&<>&">X"
|
||||
assert ctx["head"][0][2]["display_value"] == "&"
|
||||
|
||||
|
||||
def test_format_escape_floats(styler):
|
||||
# test given formatter for number format is not impacted by escape
|
||||
s = styler.format("{:.1f}", escape="html")
|
||||
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
|
||||
assert expected in s.to_html()
|
||||
# tests precision of floats is not impacted by escape
|
||||
s = styler.format(precision=1, escape="html")
|
||||
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
|
||||
assert expected in s.to_html()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("formatter", [5, True, [2.0]])
|
||||
@pytest.mark.parametrize("func", ["format", "format_index"])
|
||||
def test_format_raises(styler, formatter, func):
|
||||
with pytest.raises(TypeError, match="expected str or callable"):
|
||||
getattr(styler, func)(formatter)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"precision, expected",
|
||||
[
|
||||
(1, ["1.0", "2.0", "3.2", "4.6"]),
|
||||
(2, ["1.00", "2.01", "3.21", "4.57"]),
|
||||
(3, ["1.000", "2.009", "3.212", "4.566"]),
|
||||
],
|
||||
)
|
||||
def test_format_with_precision(precision, expected):
|
||||
# Issue #13257
|
||||
df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566])
|
||||
styler = Styler(df)
|
||||
styler.format(precision=precision)
|
||||
styler.format_index(precision=precision, axis=1)
|
||||
|
||||
ctx = styler._translate(True, True)
|
||||
for col, exp in enumerate(expected):
|
||||
assert ctx["body"][0][col + 1]["display_value"] == exp # format test
|
||||
assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test
|
||||
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
@pytest.mark.parametrize(
|
||||
"level, expected",
|
||||
[
|
||||
(0, ["X", "X", "_", "_"]), # level int
|
||||
("zero", ["X", "X", "_", "_"]), # level name
|
||||
(1, ["_", "_", "X", "X"]), # other level int
|
||||
("one", ["_", "_", "X", "X"]), # other level name
|
||||
([0, 1], ["X", "X", "X", "X"]), # both levels
|
||||
([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous
|
||||
([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name
|
||||
(["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed
|
||||
],
|
||||
)
|
||||
def test_format_index_level(axis, level, expected):
|
||||
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
if axis == 0:
|
||||
df.index = midx
|
||||
else:
|
||||
df.columns = midx
|
||||
|
||||
styler = df.style.format_index(lambda v: "X", level=level, axis=axis)
|
||||
ctx = styler._translate(True, True)
|
||||
|
||||
if axis == 0: # compare index
|
||||
result = [ctx["body"][s][0]["display_value"] for s in range(2)]
|
||||
result += [ctx["body"][s][1]["display_value"] for s in range(2)]
|
||||
else: # compare columns
|
||||
result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)]
|
||||
result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)]
|
||||
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_format_subset():
|
||||
df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
|
||||
ctx = df.style.format(
|
||||
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :]
|
||||
)._translate(True, True)
|
||||
expected = "0.1"
|
||||
raw_11 = "1.123400"
|
||||
assert ctx["body"][0][1]["display_value"] == expected
|
||||
assert ctx["body"][1][1]["display_value"] == raw_11
|
||||
assert ctx["body"][0][2]["display_value"] == "12.34%"
|
||||
|
||||
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == expected
|
||||
assert ctx["body"][1][1]["display_value"] == raw_11
|
||||
|
||||
ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == expected
|
||||
assert ctx["body"][0][2]["display_value"] == "0.123400"
|
||||
|
||||
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == expected
|
||||
assert ctx["body"][1][1]["display_value"] == raw_11
|
||||
|
||||
ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate(
|
||||
True, True
|
||||
)
|
||||
assert ctx["body"][0][1]["display_value"] == expected
|
||||
assert ctx["body"][1][1]["display_value"] == "1.1"
|
||||
assert ctx["body"][0][2]["display_value"] == "0.123400"
|
||||
assert ctx["body"][1][2]["display_value"] == raw_11
|
||||
|
||||
|
||||
@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
|
||||
@pytest.mark.parametrize("decimal", [".", "*"])
|
||||
@pytest.mark.parametrize("precision", [None, 2])
|
||||
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
|
||||
def test_format_thousands(formatter, decimal, precision, func, col):
|
||||
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
|
||||
result = getattr(styler, func)( # testing float
|
||||
thousands="_", formatter=formatter, decimal=decimal, precision=precision
|
||||
)._translate(True, True)
|
||||
assert "1_000_000" in result["body"][0][col]["display_value"]
|
||||
|
||||
styler = DataFrame([[1000000]], index=[1000000]).style
|
||||
result = getattr(styler, func)( # testing int
|
||||
thousands="_", formatter=formatter, decimal=decimal, precision=precision
|
||||
)._translate(True, True)
|
||||
assert "1_000_000" in result["body"][0][col]["display_value"]
|
||||
|
||||
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
|
||||
result = getattr(styler, func)( # testing complex
|
||||
thousands="_", formatter=formatter, decimal=decimal, precision=precision
|
||||
)._translate(True, True)
|
||||
assert "1_000_000" in result["body"][0][col]["display_value"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
|
||||
@pytest.mark.parametrize("thousands", [None, ",", "*"])
|
||||
@pytest.mark.parametrize("precision", [None, 4])
|
||||
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
|
||||
def test_format_decimal(formatter, thousands, precision, func, col):
|
||||
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
|
||||
result = getattr(styler, func)( # testing float
|
||||
decimal="_", formatter=formatter, thousands=thousands, precision=precision
|
||||
)._translate(True, True)
|
||||
assert "000_123" in result["body"][0][col]["display_value"]
|
||||
|
||||
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
|
||||
result = getattr(styler, func)( # testing complex
|
||||
decimal="_", formatter=formatter, thousands=thousands, precision=precision
|
||||
)._translate(True, True)
|
||||
assert "000_123" in result["body"][0][col]["display_value"]
|
||||
|
||||
|
||||
def test_str_escape_error():
|
||||
msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_str_escape("text", "bad_escape")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_str_escape("text", [])
|
||||
|
||||
_str_escape(2.00, "bad_escape") # OK since dtype is float
|
||||
|
||||
|
||||
def test_long_int_formatting():
|
||||
df = DataFrame(data=[[1234567890123456789]], columns=["test"])
|
||||
styler = df.style
|
||||
ctx = styler._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "1234567890123456789"
|
||||
|
||||
styler = df.style.format(thousands="_")
|
||||
ctx = styler._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] == "1_234_567_890_123_456_789"
|
||||
|
||||
|
||||
def test_format_options():
|
||||
df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]})
|
||||
ctx = df.style._translate(True, True)
|
||||
|
||||
# test option: na_rep
|
||||
assert ctx["body"][1][2]["display_value"] == "nan"
|
||||
with option_context("styler.format.na_rep", "MISSING"):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][1][2]["display_value"] == "MISSING"
|
||||
|
||||
# test option: decimal and precision
|
||||
assert ctx["body"][0][2]["display_value"] == "1.009000"
|
||||
with option_context("styler.format.decimal", "_"):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][0][2]["display_value"] == "1_009000"
|
||||
with option_context("styler.format.precision", 2):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][0][2]["display_value"] == "1.01"
|
||||
|
||||
# test option: thousands
|
||||
assert ctx["body"][0][1]["display_value"] == "2000"
|
||||
with option_context("styler.format.thousands", "_"):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][0][1]["display_value"] == "2_000"
|
||||
|
||||
# test option: escape
|
||||
assert ctx["body"][0][3]["display_value"] == "&<"
|
||||
assert ctx["body"][1][3]["display_value"] == "&~"
|
||||
with option_context("styler.format.escape", "html"):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][0][3]["display_value"] == "&<"
|
||||
with option_context("styler.format.escape", "latex"):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
|
||||
with option_context("styler.format.escape", "latex-math"):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
|
||||
|
||||
# test option: formatter
|
||||
with option_context("styler.format.formatter", {"int": "{:,.2f}"}):
|
||||
ctx_with_op = df.style._translate(True, True)
|
||||
assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00"
|
||||
|
||||
|
||||
def test_precision_zero(df):
|
||||
styler = Styler(df, precision=0)
|
||||
ctx = styler._translate(True, True)
|
||||
assert ctx["body"][0][2]["display_value"] == "-1"
|
||||
assert ctx["body"][1][2]["display_value"] == "-1"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"formatter, exp",
|
||||
[
|
||||
(lambda x: f"{x:.3f}", "9.000"),
|
||||
("{:.2f}", "9.00"),
|
||||
({0: "{:.1f}"}, "9.0"),
|
||||
(None, "9"),
|
||||
],
|
||||
)
|
||||
def test_formatter_options_validator(formatter, exp):
|
||||
df = DataFrame([[9]])
|
||||
with option_context("styler.format.formatter", formatter):
|
||||
assert f" {exp} " in df.style.to_latex()
|
||||
|
||||
|
||||
def test_formatter_options_raises():
|
||||
msg = "Value must be an instance of"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with option_context("styler.format.formatter", ["bad", "type"]):
|
||||
DataFrame().style.to_latex()
|
||||
|
||||
|
||||
def test_1level_multiindex():
|
||||
# GH 43383
|
||||
midx = MultiIndex.from_product([[1, 2]], names=[""])
|
||||
df = DataFrame(-1, index=midx, columns=[0, 1])
|
||||
ctx = df.style._translate(True, True)
|
||||
assert ctx["body"][0][0]["display_value"] == "1"
|
||||
assert ctx["body"][0][0]["is_visible"] is True
|
||||
assert ctx["body"][1][0]["display_value"] == "2"
|
||||
assert ctx["body"][1][0]["is_visible"] is True
|
||||
|
||||
|
||||
def test_boolean_format():
|
||||
# gh 46384: booleans do not collapse to integer representation on display
|
||||
df = DataFrame([[True, False]])
|
||||
ctx = df.style._translate(True, True)
|
||||
assert ctx["body"][0][1]["display_value"] is True
|
||||
assert ctx["body"][0][2]["display_value"] is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"hide, labels",
|
||||
[
|
||||
(False, [1, 2]),
|
||||
(True, [1, 2, 3, 4]),
|
||||
],
|
||||
)
|
||||
def test_relabel_raise_length(styler_multi, hide, labels):
|
||||
if hide:
|
||||
styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
|
||||
with pytest.raises(ValueError, match="``labels`` must be of length equal"):
|
||||
styler_multi.relabel_index(labels=labels)
|
||||
|
||||
|
||||
def test_relabel_index(styler_multi):
|
||||
labels = [(1, 2), (3, 4)]
|
||||
styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
|
||||
styler_multi.relabel_index(labels=labels)
|
||||
ctx = styler_multi._translate(True, True)
|
||||
assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items()
|
||||
assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items()
|
||||
assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items()
|
||||
assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items()
|
||||
|
||||
|
||||
def test_relabel_columns(styler_multi):
|
||||
labels = [(1, 2), (3, 4)]
|
||||
styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")])
|
||||
styler_multi.relabel_index(axis=1, labels=labels)
|
||||
ctx = styler_multi._translate(True, True)
|
||||
assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items()
|
||||
assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items()
|
||||
assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items()
|
||||
assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items()
|
||||
|
||||
|
||||
def test_relabel_roundtrip(styler):
|
||||
styler.relabel_index(["{}", "{}"])
|
||||
ctx = styler._translate(True, True)
|
||||
assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items()
|
||||
assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
@pytest.mark.parametrize(
|
||||
"level, expected",
|
||||
[
|
||||
(0, ["X", "one"]), # level int
|
||||
("zero", ["X", "one"]), # level name
|
||||
(1, ["zero", "X"]), # other level int
|
||||
("one", ["zero", "X"]), # other level name
|
||||
([0, 1], ["X", "X"]), # both levels
|
||||
([0, "zero"], ["X", "one"]), # level int and name simultaneous
|
||||
([0, "one"], ["X", "X"]), # both levels as int and name
|
||||
(["one", "zero"], ["X", "X"]), # both level names, reversed
|
||||
],
|
||||
)
|
||||
def test_format_index_names_level(axis, level, expected):
|
||||
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
if axis == 0:
|
||||
df.index = midx
|
||||
else:
|
||||
df.columns = midx
|
||||
|
||||
styler = df.style.format_index_names(lambda v: "X", level=level, axis=axis)
|
||||
ctx = styler._translate(True, True)
|
||||
|
||||
if axis == 0: # compare index
|
||||
result = [ctx["head"][1][s]["display_value"] for s in range(2)]
|
||||
else: # compare columns
|
||||
result = [ctx["head"][s][0]["display_value"] for s in range(2)]
|
||||
assert expected == result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr, kwargs",
|
||||
[
|
||||
("_display_funcs_index_names", {"axis": 0}),
|
||||
("_display_funcs_column_names", {"axis": 1}),
|
||||
],
|
||||
)
|
||||
def test_format_index_names_clear(styler, attr, kwargs):
|
||||
assert 0 not in getattr(styler, attr) # using default
|
||||
styler.format_index_names("{:.2f}", **kwargs)
|
||||
assert 0 in getattr(styler, attr) # formatter is specified
|
||||
styler.format_index_names(**kwargs)
|
||||
assert 0 not in getattr(styler, attr) # formatter cleared to default
|
||||
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
def test_format_index_names_callable(styler_multi, axis):
|
||||
ctx = styler_multi.format_index_names(
|
||||
lambda v: v.replace("_", "A"), axis=axis
|
||||
)._translate(True, True)
|
||||
result = [
|
||||
ctx["head"][2][0]["display_value"],
|
||||
ctx["head"][2][1]["display_value"],
|
||||
ctx["head"][0][1]["display_value"],
|
||||
ctx["head"][1][1]["display_value"],
|
||||
]
|
||||
if axis == 0:
|
||||
expected = ["0A0", "0A1", "1_0", "1_1"]
|
||||
else:
|
||||
expected = ["0_0", "0_1", "1A0", "1A1"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_format_index_names_dict(styler_multi):
|
||||
ctx = (
|
||||
styler_multi.format_index_names({"0_0": "{:<<5}"})
|
||||
.format_index_names({"1_1": "{:>>4}"}, axis=1)
|
||||
._translate(True, True)
|
||||
)
|
||||
assert ctx["head"][2][0]["display_value"] == "0_0<<"
|
||||
assert ctx["head"][1][1]["display_value"] == ">1_1"
|
||||
|
||||
|
||||
def test_format_index_names_with_hidden_levels(styler_multi):
|
||||
ctx = styler_multi._translate(True, True)
|
||||
full_head_height = len(ctx["head"])
|
||||
full_head_width = len(ctx["head"][0])
|
||||
assert full_head_height == 3
|
||||
assert full_head_width == 6
|
||||
|
||||
ctx = (
|
||||
styler_multi.hide(axis=0, level=1)
|
||||
.hide(axis=1, level=1)
|
||||
.format_index_names("{:>>4}", axis=1)
|
||||
.format_index_names("{:!<5}")
|
||||
._translate(True, True)
|
||||
)
|
||||
assert len(ctx["head"]) == full_head_height - 1
|
||||
assert len(ctx["head"][0]) == full_head_width - 1
|
||||
assert ctx["head"][0][0]["display_value"] == ">1_0"
|
||||
assert ctx["head"][1][0]["display_value"] == "0_0!!"
|
||||
@ -0,0 +1,221 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
|
||||
@pytest.fixture(params=[(None, "float64"), (NA, "Int64")])
|
||||
def df(request):
|
||||
# GH 45804
|
||||
dtype = request.param[1]
|
||||
item = np.nan if dtype == "float64" else NA
|
||||
return DataFrame(
|
||||
{"A": [0, item, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0)
|
||||
|
||||
|
||||
def test_highlight_null(styler):
|
||||
result = styler.highlight_null()._compute().ctx
|
||||
expected = {
|
||||
(1, 0): [("background-color", "red")],
|
||||
(1, 1): [("background-color", "red")],
|
||||
}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_highlight_null_subset(styler):
|
||||
# GH 31345
|
||||
result = (
|
||||
styler.highlight_null(color="red", subset=["A"])
|
||||
.highlight_null(color="green", subset=["B"])
|
||||
._compute()
|
||||
.ctx
|
||||
)
|
||||
expected = {
|
||||
(1, 0): [("background-color", "red")],
|
||||
(1, 1): [("background-color", "green")],
|
||||
}
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
|
||||
def test_highlight_minmax_basic(df, f):
|
||||
expected = {
|
||||
(0, 1): [("background-color", "red")],
|
||||
# ignores NaN row,
|
||||
(2, 0): [("background-color", "red")],
|
||||
}
|
||||
if f == "highlight_min":
|
||||
df = -df
|
||||
result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
{"axis": None, "color": "red"}, # test axis
|
||||
{"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN
|
||||
{"axis": None, "props": "background-color: red"}, # test props
|
||||
],
|
||||
)
|
||||
def test_highlight_minmax_ext(df, f, kwargs):
|
||||
expected = {(2, 0): [("background-color", "red")]}
|
||||
if f == "highlight_min":
|
||||
df = -df
|
||||
result = getattr(df.style, f)(**kwargs)._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
|
||||
@pytest.mark.parametrize("axis", [None, 0, 1])
|
||||
def test_highlight_minmax_nulls(f, axis):
|
||||
# GH 42750
|
||||
expected = {
|
||||
(1, 0): [("background-color", "yellow")],
|
||||
(1, 1): [("background-color", "yellow")],
|
||||
}
|
||||
if axis == 1:
|
||||
expected.update({(2, 1): [("background-color", "yellow")]})
|
||||
|
||||
if f == "highlight_max":
|
||||
df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]})
|
||||
else:
|
||||
df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]})
|
||||
|
||||
result = getattr(df.style, f)(axis=axis)._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
{"left": 0, "right": 1}, # test basic range
|
||||
{"left": 0, "right": 1, "props": "background-color: yellow"}, # test props
|
||||
{"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset
|
||||
{"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right
|
||||
{"right": 1}, # test no left
|
||||
{"left": [0, 0, 11], "axis": 0}, # test left as sequence
|
||||
{"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis
|
||||
{"left": 0, "right": [0, 1], "axis": 1}, # test sequence right
|
||||
],
|
||||
)
|
||||
def test_highlight_between(styler, kwargs):
|
||||
expected = {
|
||||
(0, 0): [("background-color", "yellow")],
|
||||
(0, 1): [("background-color", "yellow")],
|
||||
}
|
||||
result = styler.highlight_between(**kwargs)._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arg, map, axis",
|
||||
[
|
||||
("left", [1, 2], 0), # 0 axis has 3 elements not 2
|
||||
("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3
|
||||
("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2)
|
||||
("right", [1, 2], 0), # same tests as above for 'right' not 'left'
|
||||
("right", [1, 2, 3], 1), # ..
|
||||
("right", np.array([[1, 2], [1, 2]]), None), # ..
|
||||
],
|
||||
)
|
||||
def test_highlight_between_raises(arg, styler, map, axis):
|
||||
msg = f"supplied '{arg}' is not correct shape"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler.highlight_between(**{arg: map, "axis": axis})._compute()
|
||||
|
||||
|
||||
def test_highlight_between_raises2(styler):
|
||||
msg = "values can be 'both', 'left', 'right', or 'neither'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler.highlight_between(inclusive="badstring")._compute()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler.highlight_between(inclusive=1)._compute()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"inclusive, expected",
|
||||
[
|
||||
(
|
||||
"both",
|
||||
{
|
||||
(0, 0): [("background-color", "yellow")],
|
||||
(0, 1): [("background-color", "yellow")],
|
||||
},
|
||||
),
|
||||
("neither", {}),
|
||||
("left", {(0, 0): [("background-color", "yellow")]}),
|
||||
("right", {(0, 1): [("background-color", "yellow")]}),
|
||||
],
|
||||
)
|
||||
def test_highlight_between_inclusive(styler, inclusive, expected):
|
||||
kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]}
|
||||
result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute()
|
||||
assert result.ctx == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
{"q_left": 0.5, "q_right": 1, "axis": 0}, # base case
|
||||
{"q_left": 0.5, "q_right": 1, "axis": None}, # test axis
|
||||
{"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset
|
||||
{"q_left": 0.5, "axis": 0}, # test no high
|
||||
{"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low
|
||||
{"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop
|
||||
],
|
||||
)
|
||||
def test_highlight_quantile(styler, kwargs):
|
||||
expected = {
|
||||
(2, 0): [("background-color", "yellow")],
|
||||
(2, 1): [("background-color", "yellow")],
|
||||
}
|
||||
result = styler.highlight_quantile(**kwargs)._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f,kwargs",
|
||||
[
|
||||
("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}),
|
||||
("highlight_max", {"axis": 0, "subset": [0]}),
|
||||
("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}),
|
||||
("highlight_between", {"subset": [0]}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
int,
|
||||
float,
|
||||
"datetime64[ns]",
|
||||
str,
|
||||
"timedelta64[ns]",
|
||||
],
|
||||
)
|
||||
def test_all_highlight_dtypes(f, kwargs, dtype):
|
||||
df = DataFrame([[0, 10], [20, 30]], dtype=dtype)
|
||||
if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)):
|
||||
return None # quantile incompatible with str
|
||||
if f == "highlight_between":
|
||||
kwargs["left"] = df.iloc[1, 0] # set the range low for testing
|
||||
|
||||
expected = {(1, 0): [("background-color", "yellow")]}
|
||||
result = getattr(df.style, f)(**kwargs)._compute().ctx
|
||||
assert result == expected
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,305 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
Series,
|
||||
)
|
||||
|
||||
mpl = pytest.importorskip("matplotlib")
|
||||
pytest.importorskip("jinja2")
|
||||
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("mpl_cleanup")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame([[1, 2], [2, 4]], columns=["A", "B"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_blank():
|
||||
return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler_blank(df_blank):
|
||||
return Styler(df_blank, uuid_len=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
|
||||
def test_function_gradient(styler, f):
|
||||
for c_map in [None, "YlOrRd"]:
|
||||
result = getattr(styler, f)(cmap=c_map)._compute().ctx
|
||||
assert all("#" in x[0][1] for x in result.values())
|
||||
assert result[(0, 0)] == result[(0, 1)]
|
||||
assert result[(1, 0)] == result[(1, 1)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
|
||||
def test_background_gradient_color(styler, f):
|
||||
result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx
|
||||
if f == "background_gradient":
|
||||
assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")]
|
||||
elif f == "text_gradient":
|
||||
assert result[(1, 0)] == [("color", "#fff7fb")]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"axis, expected",
|
||||
[
|
||||
(0, ["low", "low", "high", "high"]),
|
||||
(1, ["low", "high", "low", "high"]),
|
||||
(None, ["low", "mid", "mid", "high"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
|
||||
def test_background_gradient_axis(styler, axis, expected, f):
|
||||
if f == "background_gradient":
|
||||
colors = {
|
||||
"low": [("background-color", "#f7fbff"), ("color", "#000000")],
|
||||
"mid": [("background-color", "#abd0e6"), ("color", "#000000")],
|
||||
"high": [("background-color", "#08306b"), ("color", "#f1f1f1")],
|
||||
}
|
||||
elif f == "text_gradient":
|
||||
colors = {
|
||||
"low": [("color", "#f7fbff")],
|
||||
"mid": [("color", "#abd0e6")],
|
||||
"high": [("color", "#08306b")],
|
||||
}
|
||||
result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx
|
||||
for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]):
|
||||
assert result[cell] == colors[expected[i]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cmap, expected",
|
||||
[
|
||||
(
|
||||
"PuBu",
|
||||
{
|
||||
(4, 5): [("background-color", "#86b0d3"), ("color", "#000000")],
|
||||
(4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")],
|
||||
},
|
||||
),
|
||||
(
|
||||
"YlOrRd",
|
||||
{
|
||||
(4, 8): [("background-color", "#fd913e"), ("color", "#000000")],
|
||||
(4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")],
|
||||
},
|
||||
),
|
||||
(
|
||||
None,
|
||||
{
|
||||
(7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")],
|
||||
(7, 1): [("background-color", "#4cc26c"), ("color", "#000000")],
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_text_color_threshold(cmap, expected):
|
||||
# GH 39888
|
||||
df = DataFrame(np.arange(100).reshape(10, 10))
|
||||
result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx
|
||||
for k in expected.keys():
|
||||
assert result[k] == expected[k]
|
||||
|
||||
|
||||
def test_background_gradient_vmin_vmax():
|
||||
# GH 12145
|
||||
df = DataFrame(range(5))
|
||||
ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx
|
||||
assert ctx[(0, 0)] == ctx[(1, 0)]
|
||||
assert ctx[(4, 0)] == ctx[(3, 0)]
|
||||
|
||||
|
||||
def test_background_gradient_int64():
|
||||
# GH 28869
|
||||
df1 = Series(range(3)).to_frame()
|
||||
df2 = Series(range(3), dtype="Int64").to_frame()
|
||||
ctx1 = df1.style.background_gradient()._compute().ctx
|
||||
ctx2 = df2.style.background_gradient()._compute().ctx
|
||||
assert ctx2[(0, 0)] == ctx1[(0, 0)]
|
||||
assert ctx2[(1, 0)] == ctx1[(1, 0)]
|
||||
assert ctx2[(2, 0)] == ctx1[(2, 0)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"axis, gmap, expected",
|
||||
[
|
||||
(
|
||||
0,
|
||||
[1, 2],
|
||||
{
|
||||
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
|
||||
(1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
|
||||
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
|
||||
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
|
||||
},
|
||||
),
|
||||
(
|
||||
1,
|
||||
[1, 2],
|
||||
{
|
||||
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
|
||||
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
|
||||
(0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
|
||||
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
|
||||
},
|
||||
),
|
||||
(
|
||||
None,
|
||||
np.array([[2, 1], [1, 2]]),
|
||||
{
|
||||
(0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
|
||||
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
|
||||
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
|
||||
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected):
|
||||
# tests when gmap is given as a sequence and converted to ndarray
|
||||
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)]
|
||||
)
|
||||
def test_background_gradient_gmap_array_raises(gmap, axis):
|
||||
# test when gmap as converted ndarray is bad shape
|
||||
df = DataFrame([[0, 0, 0], [0, 0, 0]])
|
||||
msg = "supplied 'gmap' is not correct shape"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.background_gradient(gmap=gmap, axis=axis)._compute()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"gmap",
|
||||
[
|
||||
DataFrame( # reverse the columns
|
||||
[[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"]
|
||||
),
|
||||
DataFrame( # reverse the index
|
||||
[[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"]
|
||||
),
|
||||
DataFrame( # reverse the index and columns
|
||||
[[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"]
|
||||
),
|
||||
DataFrame( # add unnecessary columns
|
||||
[[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"]
|
||||
),
|
||||
DataFrame( # add unnecessary index
|
||||
[[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to
|
||||
[
|
||||
(None, [[1, 2], [2, 1]]),
|
||||
(["A"], [[1], [2]]), # slice only column "A" in data and gmap
|
||||
(["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data
|
||||
(IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap
|
||||
(IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data
|
||||
],
|
||||
)
|
||||
def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap):
|
||||
# test gmap given as DataFrame that it aligns to the data including subset
|
||||
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset)
|
||||
result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset)
|
||||
assert expected._compute().ctx == result._compute().ctx
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"gmap, axis, exp_gmap",
|
||||
[
|
||||
(Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # reverse the index
|
||||
(Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # reverse the cols
|
||||
(Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx
|
||||
(Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col
|
||||
],
|
||||
)
|
||||
def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap):
|
||||
# test gmap given as Series that it aligns to the data including subset
|
||||
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute()
|
||||
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute()
|
||||
assert expected.ctx == result.ctx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("axis", [1, 0])
|
||||
def test_background_gradient_gmap_wrong_dataframe(styler_blank, axis):
|
||||
# test giving a gmap in DataFrame but with wrong axis
|
||||
gmap = DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"])
|
||||
msg = "'gmap' is a DataFrame but underlying data for operations is a Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler_blank.background_gradient(gmap=gmap, axis=axis)._compute()
|
||||
|
||||
|
||||
def test_background_gradient_gmap_wrong_series(styler_blank):
|
||||
# test giving a gmap in Series form but with wrong axis
|
||||
msg = "'gmap' is a Series but underlying data for operations is a DataFrame"
|
||||
gmap = Series([1, 2], index=["X", "Y"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
styler_blank.background_gradient(gmap=gmap, axis=None)._compute()
|
||||
|
||||
|
||||
def test_background_gradient_nullable_dtypes():
|
||||
# GH 50712
|
||||
df1 = DataFrame([[1], [0], [np.nan]], dtype=float)
|
||||
df2 = DataFrame([[1], [0], [None]], dtype="Int64")
|
||||
|
||||
ctx1 = df1.style.background_gradient()._compute().ctx
|
||||
ctx2 = df2.style.background_gradient()._compute().ctx
|
||||
assert ctx1 == ctx2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cmap",
|
||||
["PuBu", mpl.colormaps["PuBu"]],
|
||||
)
|
||||
def test_bar_colormap(cmap):
|
||||
data = DataFrame([[1, 2], [3, 4]])
|
||||
ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx
|
||||
pubu_colors = {
|
||||
(0, 0): "#d0d1e6",
|
||||
(1, 0): "#056faf",
|
||||
(0, 1): "#73a9cf",
|
||||
(1, 1): "#023858",
|
||||
}
|
||||
for k, v in pubu_colors.items():
|
||||
assert v in ctx[k][1][1]
|
||||
|
||||
|
||||
def test_bar_color_raises(df):
|
||||
msg = "`color` must be string or list or tuple of 2 strings"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(color={"a", "b"}).to_html()
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(color=["a", "b", "c"]).to_html()
|
||||
|
||||
msg = "`color` and `cmap` cannot both be given"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.style.bar(color="something", cmap="something else").to_html()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("plot_method", ["scatter", "hexbin"])
|
||||
def test_pass_colormap_instance(df, plot_method):
|
||||
# https://github.com/pandas-dev/pandas/issues/49374
|
||||
cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]])
|
||||
df["c"] = df.A + df.B
|
||||
kwargs = {"x": "A", "y": "B", "c": "c", "colormap": cmap}
|
||||
if plot_method == "hexbin":
|
||||
kwargs["C"] = kwargs.pop("c")
|
||||
getattr(df.plot, plot_method)(**kwargs)
|
||||
@ -0,0 +1,140 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
|
||||
index=["i", "j", "j"],
|
||||
columns=["c", "d", "d"],
|
||||
dtype=float,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0)
|
||||
|
||||
|
||||
def test_format_non_unique(df):
|
||||
# GH 41269
|
||||
|
||||
# test dict
|
||||
html = df.style.format({"d": "{:.1f}"}).to_html()
|
||||
for val in ["1.000000<", "4.000000<", "7.000000<"]:
|
||||
assert val in html
|
||||
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]:
|
||||
assert val in html
|
||||
|
||||
# test subset
|
||||
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html()
|
||||
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]:
|
||||
assert val in html
|
||||
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]:
|
||||
assert val in html
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["apply", "map"])
|
||||
def test_apply_map_non_unique_raises(df, func):
|
||||
# GH 41269
|
||||
if func == "apply":
|
||||
op = lambda s: ["color: red;"] * len(s)
|
||||
else:
|
||||
op = lambda v: "color: red;"
|
||||
|
||||
with pytest.raises(KeyError, match="`Styler.apply` and `.map` are not"):
|
||||
getattr(df.style, func)(op)._compute()
|
||||
|
||||
|
||||
def test_table_styles_dict_non_unique_index(styler):
|
||||
styles = styler.set_table_styles(
|
||||
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1
|
||||
).table_styles
|
||||
assert styles == [
|
||||
{"selector": "td.row1", "props": [("a", "v")]},
|
||||
{"selector": "td.row2", "props": [("a", "v")]},
|
||||
]
|
||||
|
||||
|
||||
def test_table_styles_dict_non_unique_columns(styler):
|
||||
styles = styler.set_table_styles(
|
||||
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0
|
||||
).table_styles
|
||||
assert styles == [
|
||||
{"selector": "td.col1", "props": [("a", "v")]},
|
||||
{"selector": "td.col2", "props": [("a", "v")]},
|
||||
]
|
||||
|
||||
|
||||
def test_tooltips_non_unique_raises(styler):
|
||||
# ttips has unique keys
|
||||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
|
||||
styler.set_tooltips(ttips=ttips) # OK
|
||||
|
||||
# ttips has non-unique columns
|
||||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
|
||||
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
|
||||
styler.set_tooltips(ttips=ttips)
|
||||
|
||||
# ttips has non-unique index
|
||||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
|
||||
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
|
||||
styler.set_tooltips(ttips=ttips)
|
||||
|
||||
|
||||
def test_set_td_classes_non_unique_raises(styler):
|
||||
# classes has unique keys
|
||||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
|
||||
styler.set_td_classes(classes=classes) # OK
|
||||
|
||||
# classes has non-unique columns
|
||||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
|
||||
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
|
||||
styler.set_td_classes(classes=classes)
|
||||
|
||||
# classes has non-unique index
|
||||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
|
||||
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
|
||||
styler.set_td_classes(classes=classes)
|
||||
|
||||
|
||||
def test_hide_columns_non_unique(styler):
|
||||
ctx = styler.hide(["d"], axis="columns")._translate(True, True)
|
||||
|
||||
assert ctx["head"][0][1]["display_value"] == "c"
|
||||
assert ctx["head"][0][1]["is_visible"] is True
|
||||
|
||||
assert ctx["head"][0][2]["display_value"] == "d"
|
||||
assert ctx["head"][0][2]["is_visible"] is False
|
||||
|
||||
assert ctx["head"][0][3]["display_value"] == "d"
|
||||
assert ctx["head"][0][3]["is_visible"] is False
|
||||
|
||||
assert ctx["body"][0][1]["is_visible"] is True
|
||||
assert ctx["body"][0][2]["is_visible"] is False
|
||||
assert ctx["body"][0][3]["is_visible"] is False
|
||||
|
||||
|
||||
def test_latex_non_unique(styler):
|
||||
result = styler.to_latex()
|
||||
assert result == dedent(
|
||||
"""\
|
||||
\\begin{tabular}{lrrr}
|
||||
& c & d & d \\\\
|
||||
i & 1.000000 & 2.000000 & 3.000000 \\\\
|
||||
j & 4.000000 & 5.000000 & 6.000000 \\\\
|
||||
j & 7.000000 & 8.000000 & 9.000000 \\\\
|
||||
\\end{tabular}
|
||||
"""
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,96 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
{"A": [0, 1], "B": [-0.61, -1.22], "C": Series(["ab", "cd"], dtype=object)}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0, precision=2)
|
||||
|
||||
|
||||
def test_basic_string(styler):
|
||||
result = styler.to_string()
|
||||
expected = dedent(
|
||||
"""\
|
||||
A B C
|
||||
0 0 -0.61 ab
|
||||
1 1 -1.22 cd
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_string_delimiter(styler):
|
||||
result = styler.to_string(delimiter=";")
|
||||
expected = dedent(
|
||||
"""\
|
||||
;A;B;C
|
||||
0;0;-0.61;ab
|
||||
1;1;-1.22;cd
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_concat(styler):
|
||||
result = styler.concat(styler.data.agg(["sum"]).style).to_string()
|
||||
expected = dedent(
|
||||
"""\
|
||||
A B C
|
||||
0 0 -0.61 ab
|
||||
1 1 -1.22 cd
|
||||
sum 1 -1.830000 abcd
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_concat_recursion(styler):
|
||||
df = styler.data
|
||||
styler1 = styler
|
||||
styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
|
||||
styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
|
||||
result = styler1.concat(styler2.concat(styler3)).to_string()
|
||||
expected = dedent(
|
||||
"""\
|
||||
A B C
|
||||
0 0 -0.61 ab
|
||||
1 1 -1.22 cd
|
||||
sum 1 -1.830 abcd
|
||||
sum 1 -1.8300 abcd
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_concat_chain(styler):
|
||||
df = styler.data
|
||||
styler1 = styler
|
||||
styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
|
||||
styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
|
||||
result = styler1.concat(styler2).concat(styler3).to_string()
|
||||
expected = dedent(
|
||||
"""\
|
||||
A B C
|
||||
0 0 -0.61 ab
|
||||
1 1 -1.22 cd
|
||||
sum 1 -1.830 abcd
|
||||
sum 1 -1.8300 abcd
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
@ -0,0 +1,96 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
{"A": [0, 1], "B": [-0.61, -1.22], "C": Series(["ab", "cd"], dtype=object)}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0, precision=2)
|
||||
|
||||
|
||||
def test_basic_table(styler):
|
||||
result = styler.to_typst()
|
||||
expected = dedent(
|
||||
"""\
|
||||
#table(
|
||||
columns: 4,
|
||||
[], [A], [B], [C],
|
||||
|
||||
[0], [0], [-0.61], [ab],
|
||||
[1], [1], [-1.22], [cd],
|
||||
)"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_concat(styler):
|
||||
result = styler.concat(styler.data.agg(["sum"]).style).to_typst()
|
||||
expected = dedent(
|
||||
"""\
|
||||
#table(
|
||||
columns: 4,
|
||||
[], [A], [B], [C],
|
||||
|
||||
[0], [0], [-0.61], [ab],
|
||||
[1], [1], [-1.22], [cd],
|
||||
[sum], [1], [-1.830000], [abcd],
|
||||
)"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_concat_recursion(styler):
|
||||
df = styler.data
|
||||
styler1 = styler
|
||||
styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
|
||||
styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
|
||||
result = styler1.concat(styler2.concat(styler3)).to_typst()
|
||||
expected = dedent(
|
||||
"""\
|
||||
#table(
|
||||
columns: 4,
|
||||
[], [A], [B], [C],
|
||||
|
||||
[0], [0], [-0.61], [ab],
|
||||
[1], [1], [-1.22], [cd],
|
||||
[sum], [1], [-1.830], [abcd],
|
||||
[sum], [1], [-1.8300], [abcd],
|
||||
)"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_concat_chain(styler):
|
||||
df = styler.data
|
||||
styler1 = styler
|
||||
styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
|
||||
styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
|
||||
result = styler1.concat(styler2).concat(styler3).to_typst()
|
||||
expected = dedent(
|
||||
"""\
|
||||
#table(
|
||||
columns: 4,
|
||||
[], [A], [B], [C],
|
||||
|
||||
[0], [0], [-0.61], [ab],
|
||||
[1], [1], [-1.22], [cd],
|
||||
[sum], [1], [-1.830], [abcd],
|
||||
[sum], [1], [-1.8300], [abcd],
|
||||
)"""
|
||||
)
|
||||
assert result == expected
|
||||
@ -0,0 +1,179 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
pytest.importorskip("jinja2")
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
|
||||
columns=["A", "B", "C"],
|
||||
index=["x", "y", "z"],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def styler(df):
|
||||
return Styler(df, uuid_len=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, columns, index",
|
||||
[
|
||||
# Test basic reindex and ignoring blank
|
||||
([["Min", "Max"], [np.nan, ""]], ["A", "C"], ["x", "y"]),
|
||||
# Test non-referenced columns, reversed col names, short index
|
||||
([["Max", "Min", "Bad-Col"]], ["C", "A", "D"], ["x"]),
|
||||
],
|
||||
)
|
||||
def test_tooltip_render(data, columns, index, styler):
|
||||
ttips = DataFrame(data=data, columns=columns, index=index)
|
||||
|
||||
# GH 21266
|
||||
result = styler.set_tooltips(ttips).to_html()
|
||||
|
||||
# test tooltip table level class
|
||||
assert "#T_ .pd-t {\n visibility: hidden;\n" in result
|
||||
|
||||
# test 'Min' tooltip added
|
||||
assert "#T_ #T__row0_col0:hover .pd-t {\n visibility: visible;\n}" in result
|
||||
assert '#T_ #T__row0_col0 .pd-t::after {\n content: "Min";\n}' in result
|
||||
assert 'class="data row0 col0" >0<span class="pd-t"></span></td>' in result
|
||||
|
||||
# test 'Max' tooltip added
|
||||
assert "#T_ #T__row0_col2:hover .pd-t {\n visibility: visible;\n}" in result
|
||||
assert '#T_ #T__row0_col2 .pd-t::after {\n content: "Max";\n}' in result
|
||||
assert 'class="data row0 col2" >2<span class="pd-t"></span></td>' in result
|
||||
|
||||
# test Nan, empty string and bad column ignored
|
||||
assert "#T_ #T__row1_col0:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row1_col1:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row0_col1:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row1_col2:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "Bad-Col" not in result
|
||||
|
||||
|
||||
def test_tooltip_ignored(styler):
|
||||
# GH 21266
|
||||
result = styler.to_html() # no set_tooltips() creates no <span>
|
||||
assert '<style type="text/css">\n</style>' in result
|
||||
assert '<span class="pd-t"></span>' not in result
|
||||
assert 'title="' not in result
|
||||
|
||||
|
||||
def test_tooltip_css_class(styler):
|
||||
# GH 21266
|
||||
result = styler.set_tooltips(
|
||||
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
|
||||
css_class="other-class",
|
||||
props=[("color", "green")],
|
||||
).to_html()
|
||||
assert "#T_ .other-class {\n color: green;\n" in result
|
||||
assert '#T_ #T__row0_col0 .other-class::after {\n content: "tooltip";\n' in result
|
||||
|
||||
# GH 39563
|
||||
result = styler.set_tooltips( # set_tooltips overwrites previous
|
||||
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
|
||||
css_class="another-class",
|
||||
props="color:green;color:red;",
|
||||
).to_html()
|
||||
assert "#T_ .another-class {\n color: green;\n color: red;\n}" in result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, columns, index",
|
||||
[
|
||||
# Test basic reindex and ignoring blank
|
||||
([["Min", "Max"], [np.nan, ""]], ["A", "C"], ["x", "y"]),
|
||||
# Test non-referenced columns, reversed col names, short index
|
||||
([["Max", "Min", "Bad-Col"]], ["C", "A", "D"], ["x"]),
|
||||
],
|
||||
)
|
||||
def test_tooltip_render_as_title(data, columns, index, styler):
|
||||
ttips = DataFrame(data=data, columns=columns, index=index)
|
||||
# GH 56605
|
||||
result = styler.set_tooltips(ttips, as_title_attribute=True).to_html()
|
||||
|
||||
# test css not added
|
||||
assert "#T_ .pd-t {\n visibility: hidden;\n" not in result
|
||||
|
||||
# test 'Min' tooltip added as title attribute and css does not exist
|
||||
assert "#T_ #T__row0_col0:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert '#T_ #T__row0_col0 .pd-t::after {\n content: "Min";\n}' not in result
|
||||
assert 'class="data row0 col0" title="Min">0</td>' in result
|
||||
|
||||
# test 'Max' tooltip added as title attribute and css does not exist
|
||||
assert "#T_ #T__row0_col2:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert '#T_ #T__row0_col2 .pd-t::after {\n content: "Max";\n}' not in result
|
||||
assert 'class="data row0 col2" title="Max">2</td>' in result
|
||||
|
||||
# test Nan, empty string and bad column ignored
|
||||
assert "#T_ #T__row1_col0:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row1_col1:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row0_col1:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row1_col2:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "Bad-Col" not in result
|
||||
assert 'class="data row0 col1" >1</td>' in result
|
||||
assert 'class="data row1 col0" >3</td>' in result
|
||||
assert 'class="data row1 col1" >4</td>' in result
|
||||
assert 'class="data row1 col2" >5</td>' in result
|
||||
assert 'class="data row2 col0" >6</td>' in result
|
||||
assert 'class="data row2 col1" >7</td>' in result
|
||||
assert 'class="data row2 col2" >8</td>' in result
|
||||
|
||||
|
||||
def test_tooltip_render_as_title_with_hidden_index_level():
|
||||
df = DataFrame(
|
||||
data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
|
||||
columns=["A", "B", "C"],
|
||||
index=MultiIndex.from_arrays(
|
||||
[["x", "y", "z"], [1, 2, 3], ["aa", "bb", "cc"]],
|
||||
names=["alpha", "num", "char"],
|
||||
),
|
||||
)
|
||||
ttips = DataFrame(
|
||||
# Test basic reindex and ignoring blank, and hide level 2 (num) from index
|
||||
data=[["Min", "Max"], [np.nan, ""]],
|
||||
columns=["A", "C"],
|
||||
index=MultiIndex.from_arrays(
|
||||
[["x", "y"], [1, 2], ["aa", "bb"]], names=["alpha", "num", "char"]
|
||||
),
|
||||
)
|
||||
styler = Styler(df, uuid_len=0)
|
||||
styler = styler.hide(axis=0, level=-1, names=True)
|
||||
# GH 56605
|
||||
result = styler.set_tooltips(ttips, as_title_attribute=True).to_html()
|
||||
|
||||
# test css not added
|
||||
assert "#T_ .pd-t {\n visibility: hidden;\n" not in result
|
||||
|
||||
# test 'Min' tooltip added as title attribute and css does not exist
|
||||
assert "#T_ #T__row0_col0:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert '#T_ #T__row0_col0 .pd-t::after {\n content: "Min";\n}' not in result
|
||||
assert 'class="data row0 col0" title="Min">0</td>' in result
|
||||
|
||||
# test 'Max' tooltip added as title attribute and css does not exist
|
||||
assert "#T_ #T__row0_col2:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert '#T_ #T__row0_col2 .pd-t::after {\n content: "Max";\n}' not in result
|
||||
assert 'class="data row0 col2" title="Max">2</td>' in result
|
||||
|
||||
# test Nan, empty string and bad column ignored
|
||||
assert "#T_ #T__row1_col0:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row1_col1:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row0_col1:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "#T_ #T__row1_col2:hover .pd-t {\n visibility: visible;\n}" not in result
|
||||
assert "Bad-Col" not in result
|
||||
assert 'class="data row0 col1" >1</td>' in result
|
||||
assert 'class="data row1 col0" >3</td>' in result
|
||||
assert 'class="data row1 col1" >4</td>' in result
|
||||
assert 'class="data row1 col2" >5</td>' in result
|
||||
assert 'class="data row2 col0" >6</td>' in result
|
||||
assert 'class="data row2 col1" >7</td>' in result
|
||||
assert 'class="data row2 col2" >8</td>' in result
|
||||
@ -0,0 +1,72 @@
|
||||
import locale
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas._config import detect_console_encoding
|
||||
|
||||
|
||||
class MockEncoding:
|
||||
"""
|
||||
Used to add a side effect when accessing the 'encoding' property. If the
|
||||
side effect is a str in nature, the value will be returned. Otherwise, the
|
||||
side effect should be an exception that will be raised.
|
||||
"""
|
||||
|
||||
def __init__(self, encoding) -> None:
|
||||
super().__init__()
|
||||
self.val = encoding
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return self.raise_or_return(self.val)
|
||||
|
||||
@staticmethod
|
||||
def raise_or_return(val):
|
||||
if isinstance(val, str):
|
||||
return val
|
||||
else:
|
||||
raise val
|
||||
|
||||
|
||||
@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]])
|
||||
def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
|
||||
# Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
|
||||
# they have values filled.
|
||||
# GH 21552
|
||||
with monkeypatch.context() as context:
|
||||
context.setattr(f"sys.{empty}", MockEncoding(""))
|
||||
context.setattr(f"sys.{filled}", MockEncoding(filled))
|
||||
assert detect_console_encoding() == filled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
|
||||
def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
|
||||
# GH 21552
|
||||
with monkeypatch.context() as context:
|
||||
context.setattr("locale.getpreferredencoding", lambda: "foo")
|
||||
context.setattr("sys.stdout", MockEncoding(encoding))
|
||||
assert detect_console_encoding() == "foo"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"std,locale",
|
||||
[
|
||||
["ascii", "ascii"],
|
||||
["ascii", locale.Error],
|
||||
[AttributeError, "ascii"],
|
||||
[AttributeError, locale.Error],
|
||||
[OSError, "ascii"],
|
||||
[OSError, locale.Error],
|
||||
],
|
||||
)
|
||||
def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
|
||||
# When both the stdout/stdin encoding and locale preferred encoding checks
|
||||
# fail (or return 'ascii', we should default to the sys default encoding.
|
||||
# GH 21552
|
||||
with monkeypatch.context() as context:
|
||||
context.setattr(
|
||||
"locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale)
|
||||
)
|
||||
context.setattr("sys.stdout", MockEncoding(std))
|
||||
context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding")
|
||||
assert detect_console_encoding() == "sysDefaultEncoding"
|
||||
@ -0,0 +1,288 @@
|
||||
import pytest
|
||||
|
||||
from pandas.errors import CSSWarning
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.formats.css import CSSResolver
|
||||
|
||||
|
||||
def assert_resolves(css, props, inherited=None):
|
||||
resolve = CSSResolver()
|
||||
actual = resolve(css, inherited=inherited)
|
||||
assert props == actual
|
||||
|
||||
|
||||
def assert_same_resolution(css1, css2, inherited=None):
|
||||
resolve = CSSResolver()
|
||||
resolved1 = resolve(css1, inherited=inherited)
|
||||
resolved2 = resolve(css2, inherited=inherited)
|
||||
assert resolved1 == resolved2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name,norm,abnorm",
|
||||
[
|
||||
(
|
||||
"whitespace",
|
||||
"hello: world; foo: bar",
|
||||
" \t hello \t :\n world \n ; \n foo: \tbar\n\n",
|
||||
),
|
||||
("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"),
|
||||
("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"),
|
||||
("empty-list", "", ";"),
|
||||
],
|
||||
)
|
||||
def test_css_parse_normalisation(name, norm, abnorm):
|
||||
assert_same_resolution(norm, abnorm)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_css,remainder,msg",
|
||||
[
|
||||
# No colon
|
||||
("hello-world", "", "expected a colon"),
|
||||
("border-style: solid; hello-world", "border-style: solid", "expected a colon"),
|
||||
(
|
||||
"border-style: solid; hello-world; font-weight: bold",
|
||||
"border-style: solid; font-weight: bold",
|
||||
"expected a colon",
|
||||
),
|
||||
# Unclosed string fail
|
||||
# Invalid size
|
||||
("font-size: blah", "font-size: 1em", "Unhandled size"),
|
||||
("font-size: 1a2b", "font-size: 1em", "Unhandled size"),
|
||||
("font-size: 1e5pt", "font-size: 1em", "Unhandled size"),
|
||||
("font-size: 1+6pt", "font-size: 1em", "Unhandled size"),
|
||||
("font-size: 1unknownunit", "font-size: 1em", "Unhandled size"),
|
||||
("font-size: 10", "font-size: 1em", "Unhandled size"),
|
||||
("font-size: 10 pt", "font-size: 1em", "Unhandled size"),
|
||||
# Too many args
|
||||
("border-top: 1pt solid red green", "border-top: 1pt solid green", "Too many"),
|
||||
],
|
||||
)
|
||||
def test_css_parse_invalid(invalid_css, remainder, msg):
|
||||
with tm.assert_produces_warning(CSSWarning, match=msg):
|
||||
assert_same_resolution(invalid_css, remainder)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"shorthand,expansions",
|
||||
[
|
||||
("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]),
|
||||
("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]),
|
||||
(
|
||||
"border-width",
|
||||
[
|
||||
"border-top-width",
|
||||
"border-right-width",
|
||||
"border-bottom-width",
|
||||
"border-left-width",
|
||||
],
|
||||
),
|
||||
(
|
||||
"border-color",
|
||||
[
|
||||
"border-top-color",
|
||||
"border-right-color",
|
||||
"border-bottom-color",
|
||||
"border-left-color",
|
||||
],
|
||||
),
|
||||
(
|
||||
"border-style",
|
||||
[
|
||||
"border-top-style",
|
||||
"border-right-style",
|
||||
"border-bottom-style",
|
||||
"border-left-style",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_css_side_shorthands(shorthand, expansions):
|
||||
top, right, bottom, left = expansions
|
||||
|
||||
assert_resolves(
|
||||
f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}
|
||||
)
|
||||
|
||||
assert_resolves(
|
||||
f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}
|
||||
)
|
||||
|
||||
assert_resolves(
|
||||
f"{shorthand}: 1pt 4pt 2pt",
|
||||
{top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"},
|
||||
)
|
||||
|
||||
assert_resolves(
|
||||
f"{shorthand}: 1pt 4pt 2pt 0pt",
|
||||
{top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(CSSWarning, match="Could not expand"):
|
||||
assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"shorthand,sides",
|
||||
[
|
||||
("border-top", ["top"]),
|
||||
("border-right", ["right"]),
|
||||
("border-bottom", ["bottom"]),
|
||||
("border-left", ["left"]),
|
||||
("border", ["top", "right", "bottom", "left"]),
|
||||
],
|
||||
)
|
||||
def test_css_border_shorthand_sides(shorthand, sides):
|
||||
def create_border_dict(sides, color=None, style=None, width=None):
|
||||
resolved = {}
|
||||
for side in sides:
|
||||
if color:
|
||||
resolved[f"border-{side}-color"] = color
|
||||
if style:
|
||||
resolved[f"border-{side}-style"] = style
|
||||
if width:
|
||||
resolved[f"border-{side}-width"] = width
|
||||
return resolved
|
||||
|
||||
assert_resolves(
|
||||
f"{shorthand}: 1pt red solid", create_border_dict(sides, "red", "solid", "1pt")
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"prop, expected",
|
||||
[
|
||||
("1pt red solid", ("red", "solid", "1pt")),
|
||||
("red 1pt solid", ("red", "solid", "1pt")),
|
||||
("red solid 1pt", ("red", "solid", "1pt")),
|
||||
("solid 1pt red", ("red", "solid", "1pt")),
|
||||
("red solid", ("red", "solid", "1.500000pt")),
|
||||
# Note: color=black is not CSS conforming
|
||||
# (See https://drafts.csswg.org/css-backgrounds/#border-shorthands)
|
||||
("1pt solid", ("black", "solid", "1pt")),
|
||||
("1pt red", ("red", "none", "1pt")),
|
||||
("red", ("red", "none", "1.500000pt")),
|
||||
("1pt", ("black", "none", "1pt")),
|
||||
("solid", ("black", "solid", "1.500000pt")),
|
||||
# Sizes
|
||||
("1em", ("black", "none", "12pt")),
|
||||
],
|
||||
)
|
||||
def test_css_border_shorthands(prop, expected):
|
||||
color, style, width = expected
|
||||
|
||||
assert_resolves(
|
||||
f"border-left: {prop}",
|
||||
{
|
||||
"border-left-color": color,
|
||||
"border-left-style": style,
|
||||
"border-left-width": width,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"style,inherited,equiv",
|
||||
[
|
||||
("margin: 1px; margin: 2px", "", "margin: 2px"),
|
||||
("margin: 1px", "margin: 2px", "margin: 1px"),
|
||||
("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"),
|
||||
(
|
||||
"margin: 1px; margin-top: 2px",
|
||||
"",
|
||||
"margin-left: 1px; margin-right: 1px; margin-bottom: 1px; margin-top: 2px",
|
||||
),
|
||||
("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"),
|
||||
("margin: 1px", "margin-top: 2px", "margin: 1px"),
|
||||
(
|
||||
"margin: 1px; margin-top: inherit",
|
||||
"margin: 2px",
|
||||
"margin: 1px; margin-top: 2px",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_css_precedence(style, inherited, equiv):
|
||||
resolve = CSSResolver()
|
||||
inherited_props = resolve(inherited)
|
||||
style_props = resolve(style, inherited=inherited_props)
|
||||
equiv_props = resolve(equiv)
|
||||
assert style_props == equiv_props
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"style,equiv",
|
||||
[
|
||||
(
|
||||
"margin: 1px; margin-top: inherit",
|
||||
"margin-bottom: 1px; margin-right: 1px; margin-left: 1px",
|
||||
),
|
||||
("margin-top: inherit", ""),
|
||||
("margin-top: initial", ""),
|
||||
],
|
||||
)
|
||||
def test_css_none_absent(style, equiv):
|
||||
assert_same_resolution(style, equiv)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"size,resolved",
|
||||
[
|
||||
("xx-small", "6pt"),
|
||||
("x-small", f"{7.5:f}pt"),
|
||||
("small", f"{9.6:f}pt"),
|
||||
("medium", "12pt"),
|
||||
("large", f"{13.5:f}pt"),
|
||||
("x-large", "18pt"),
|
||||
("xx-large", "24pt"),
|
||||
("8px", "6pt"),
|
||||
("1.25pc", "15pt"),
|
||||
(".25in", "18pt"),
|
||||
("02.54cm", "72pt"),
|
||||
("25.4mm", "72pt"),
|
||||
("101.6q", "72pt"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("relative_to", [None, "16pt"]) # invariant to inherited size
|
||||
def test_css_absolute_font_size(size, relative_to, resolved):
|
||||
if relative_to is None:
|
||||
inherited = None
|
||||
else:
|
||||
inherited = {"font-size": relative_to}
|
||||
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"size,relative_to,resolved",
|
||||
[
|
||||
("1em", None, "12pt"),
|
||||
("1.0em", None, "12pt"),
|
||||
("1.25em", None, "15pt"),
|
||||
("1em", "16pt", "16pt"),
|
||||
("1.0em", "16pt", "16pt"),
|
||||
("1.25em", "16pt", "20pt"),
|
||||
("1rem", "16pt", "12pt"),
|
||||
("1.0rem", "16pt", "12pt"),
|
||||
("1.25rem", "16pt", "15pt"),
|
||||
("100%", None, "12pt"),
|
||||
("125%", None, "15pt"),
|
||||
("100%", "16pt", "16pt"),
|
||||
("125%", "16pt", "20pt"),
|
||||
("2ex", None, "12pt"),
|
||||
("2.0ex", None, "12pt"),
|
||||
("2.50ex", None, "15pt"),
|
||||
("inherit", "16pt", "16pt"),
|
||||
("smaller", None, "10pt"),
|
||||
("smaller", "18pt", "15pt"),
|
||||
("larger", None, f"{14.4:f}pt"),
|
||||
("larger", "15pt", "18pt"),
|
||||
],
|
||||
)
|
||||
def test_css_relative_font_size(size, relative_to, resolved):
|
||||
if relative_to is None:
|
||||
inherited = None
|
||||
else:
|
||||
inherited = {"font-size": relative_to}
|
||||
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
|
||||
@ -0,0 +1,254 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
reset_option,
|
||||
set_eng_float_format,
|
||||
)
|
||||
|
||||
from pandas.io.formats.format import EngFormatter
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_float_format():
|
||||
yield
|
||||
reset_option("display.float_format")
|
||||
|
||||
|
||||
class TestEngFormatter:
|
||||
def test_eng_float_formatter2(self, float_frame):
|
||||
df = float_frame
|
||||
df.loc[5] = 0
|
||||
|
||||
set_eng_float_format()
|
||||
repr(df)
|
||||
|
||||
set_eng_float_format(use_eng_prefix=True)
|
||||
repr(df)
|
||||
|
||||
set_eng_float_format(accuracy=0)
|
||||
repr(df)
|
||||
|
||||
def test_eng_float_formatter(self):
|
||||
df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})
|
||||
|
||||
set_eng_float_format()
|
||||
result = df.to_string()
|
||||
expected = (
|
||||
" A\n"
|
||||
"0 1.410E+00\n"
|
||||
"1 141.000E+00\n"
|
||||
"2 14.100E+03\n"
|
||||
"3 1.410E+06"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
set_eng_float_format(use_eng_prefix=True)
|
||||
result = df.to_string()
|
||||
expected = " A\n0 1.410\n1 141.000\n2 14.100k\n3 1.410M"
|
||||
assert result == expected
|
||||
|
||||
set_eng_float_format(accuracy=0)
|
||||
result = df.to_string()
|
||||
expected = " A\n0 1E+00\n1 141E+00\n2 14E+03\n3 1E+06"
|
||||
assert result == expected
|
||||
|
||||
def compare(self, formatter, input, output):
|
||||
formatted_input = formatter(input)
|
||||
assert formatted_input == output
|
||||
|
||||
def compare_all(self, formatter, in_out):
|
||||
"""
|
||||
Parameters:
|
||||
-----------
|
||||
formatter: EngFormatter under test
|
||||
in_out: list of tuples. Each tuple = (number, expected_formatting)
|
||||
|
||||
It is tested if 'formatter(number) == expected_formatting'.
|
||||
*number* should be >= 0 because formatter(-number) == fmt is also
|
||||
tested. *fmt* is derived from *expected_formatting*
|
||||
"""
|
||||
for input, output in in_out:
|
||||
self.compare(formatter, input, output)
|
||||
self.compare(formatter, -input, "-" + output[1:])
|
||||
|
||||
def test_exponents_with_eng_prefix(self):
|
||||
formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
|
||||
f = np.sqrt(2)
|
||||
in_out = [
|
||||
(f * 10**-24, " 1.414y"),
|
||||
(f * 10**-23, " 14.142y"),
|
||||
(f * 10**-22, " 141.421y"),
|
||||
(f * 10**-21, " 1.414z"),
|
||||
(f * 10**-20, " 14.142z"),
|
||||
(f * 10**-19, " 141.421z"),
|
||||
(f * 10**-18, " 1.414a"),
|
||||
(f * 10**-17, " 14.142a"),
|
||||
(f * 10**-16, " 141.421a"),
|
||||
(f * 10**-15, " 1.414f"),
|
||||
(f * 10**-14, " 14.142f"),
|
||||
(f * 10**-13, " 141.421f"),
|
||||
(f * 10**-12, " 1.414p"),
|
||||
(f * 10**-11, " 14.142p"),
|
||||
(f * 10**-10, " 141.421p"),
|
||||
(f * 10**-9, " 1.414n"),
|
||||
(f * 10**-8, " 14.142n"),
|
||||
(f * 10**-7, " 141.421n"),
|
||||
(f * 10**-6, " 1.414u"),
|
||||
(f * 10**-5, " 14.142u"),
|
||||
(f * 10**-4, " 141.421u"),
|
||||
(f * 10**-3, " 1.414m"),
|
||||
(f * 10**-2, " 14.142m"),
|
||||
(f * 10**-1, " 141.421m"),
|
||||
(f * 10**0, " 1.414"),
|
||||
(f * 10**1, " 14.142"),
|
||||
(f * 10**2, " 141.421"),
|
||||
(f * 10**3, " 1.414k"),
|
||||
(f * 10**4, " 14.142k"),
|
||||
(f * 10**5, " 141.421k"),
|
||||
(f * 10**6, " 1.414M"),
|
||||
(f * 10**7, " 14.142M"),
|
||||
(f * 10**8, " 141.421M"),
|
||||
(f * 10**9, " 1.414G"),
|
||||
(f * 10**10, " 14.142G"),
|
||||
(f * 10**11, " 141.421G"),
|
||||
(f * 10**12, " 1.414T"),
|
||||
(f * 10**13, " 14.142T"),
|
||||
(f * 10**14, " 141.421T"),
|
||||
(f * 10**15, " 1.414P"),
|
||||
(f * 10**16, " 14.142P"),
|
||||
(f * 10**17, " 141.421P"),
|
||||
(f * 10**18, " 1.414E"),
|
||||
(f * 10**19, " 14.142E"),
|
||||
(f * 10**20, " 141.421E"),
|
||||
(f * 10**21, " 1.414Z"),
|
||||
(f * 10**22, " 14.142Z"),
|
||||
(f * 10**23, " 141.421Z"),
|
||||
(f * 10**24, " 1.414Y"),
|
||||
(f * 10**25, " 14.142Y"),
|
||||
(f * 10**26, " 141.421Y"),
|
||||
]
|
||||
self.compare_all(formatter, in_out)
|
||||
|
||||
def test_exponents_without_eng_prefix(self):
|
||||
formatter = EngFormatter(accuracy=4, use_eng_prefix=False)
|
||||
f = np.pi
|
||||
in_out = [
|
||||
(f * 10**-24, " 3.1416E-24"),
|
||||
(f * 10**-23, " 31.4159E-24"),
|
||||
(f * 10**-22, " 314.1593E-24"),
|
||||
(f * 10**-21, " 3.1416E-21"),
|
||||
(f * 10**-20, " 31.4159E-21"),
|
||||
(f * 10**-19, " 314.1593E-21"),
|
||||
(f * 10**-18, " 3.1416E-18"),
|
||||
(f * 10**-17, " 31.4159E-18"),
|
||||
(f * 10**-16, " 314.1593E-18"),
|
||||
(f * 10**-15, " 3.1416E-15"),
|
||||
(f * 10**-14, " 31.4159E-15"),
|
||||
(f * 10**-13, " 314.1593E-15"),
|
||||
(f * 10**-12, " 3.1416E-12"),
|
||||
(f * 10**-11, " 31.4159E-12"),
|
||||
(f * 10**-10, " 314.1593E-12"),
|
||||
(f * 10**-9, " 3.1416E-09"),
|
||||
(f * 10**-8, " 31.4159E-09"),
|
||||
(f * 10**-7, " 314.1593E-09"),
|
||||
(f * 10**-6, " 3.1416E-06"),
|
||||
(f * 10**-5, " 31.4159E-06"),
|
||||
(f * 10**-4, " 314.1593E-06"),
|
||||
(f * 10**-3, " 3.1416E-03"),
|
||||
(f * 10**-2, " 31.4159E-03"),
|
||||
(f * 10**-1, " 314.1593E-03"),
|
||||
(f * 10**0, " 3.1416E+00"),
|
||||
(f * 10**1, " 31.4159E+00"),
|
||||
(f * 10**2, " 314.1593E+00"),
|
||||
(f * 10**3, " 3.1416E+03"),
|
||||
(f * 10**4, " 31.4159E+03"),
|
||||
(f * 10**5, " 314.1593E+03"),
|
||||
(f * 10**6, " 3.1416E+06"),
|
||||
(f * 10**7, " 31.4159E+06"),
|
||||
(f * 10**8, " 314.1593E+06"),
|
||||
(f * 10**9, " 3.1416E+09"),
|
||||
(f * 10**10, " 31.4159E+09"),
|
||||
(f * 10**11, " 314.1593E+09"),
|
||||
(f * 10**12, " 3.1416E+12"),
|
||||
(f * 10**13, " 31.4159E+12"),
|
||||
(f * 10**14, " 314.1593E+12"),
|
||||
(f * 10**15, " 3.1416E+15"),
|
||||
(f * 10**16, " 31.4159E+15"),
|
||||
(f * 10**17, " 314.1593E+15"),
|
||||
(f * 10**18, " 3.1416E+18"),
|
||||
(f * 10**19, " 31.4159E+18"),
|
||||
(f * 10**20, " 314.1593E+18"),
|
||||
(f * 10**21, " 3.1416E+21"),
|
||||
(f * 10**22, " 31.4159E+21"),
|
||||
(f * 10**23, " 314.1593E+21"),
|
||||
(f * 10**24, " 3.1416E+24"),
|
||||
(f * 10**25, " 31.4159E+24"),
|
||||
(f * 10**26, " 314.1593E+24"),
|
||||
]
|
||||
self.compare_all(formatter, in_out)
|
||||
|
||||
def test_rounding(self):
|
||||
formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
|
||||
in_out = [
|
||||
(5.55555, " 5.556"),
|
||||
(55.5555, " 55.556"),
|
||||
(555.555, " 555.555"),
|
||||
(5555.55, " 5.556k"),
|
||||
(55555.5, " 55.556k"),
|
||||
(555555, " 555.555k"),
|
||||
]
|
||||
self.compare_all(formatter, in_out)
|
||||
|
||||
formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
|
||||
in_out = [
|
||||
(5.55555, " 5.6"),
|
||||
(55.5555, " 55.6"),
|
||||
(555.555, " 555.6"),
|
||||
(5555.55, " 5.6k"),
|
||||
(55555.5, " 55.6k"),
|
||||
(555555, " 555.6k"),
|
||||
]
|
||||
self.compare_all(formatter, in_out)
|
||||
|
||||
formatter = EngFormatter(accuracy=0, use_eng_prefix=True)
|
||||
in_out = [
|
||||
(5.55555, " 6"),
|
||||
(55.5555, " 56"),
|
||||
(555.555, " 556"),
|
||||
(5555.55, " 6k"),
|
||||
(55555.5, " 56k"),
|
||||
(555555, " 556k"),
|
||||
]
|
||||
self.compare_all(formatter, in_out)
|
||||
|
||||
formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
|
||||
result = formatter(0)
|
||||
assert result == " 0.000"
|
||||
|
||||
def test_nan(self):
|
||||
# Issue #11981
|
||||
|
||||
formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
|
||||
result = formatter(np.nan)
|
||||
assert result == "NaN"
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1.5, 10.3, 20.5],
|
||||
"b": [50.3, 60.67, 70.12],
|
||||
"c": [100.2, 101.33, 120.33],
|
||||
}
|
||||
)
|
||||
pt = df.pivot_table(values="a", index="b", columns="c")
|
||||
set_eng_float_format(accuracy=1)
|
||||
result = pt.to_string()
|
||||
assert "NaN" in result
|
||||
|
||||
def test_inf(self):
|
||||
# Issue #11981
|
||||
|
||||
formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
|
||||
result = formatter(np.inf)
|
||||
assert result == "inf"
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,91 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestTableSchemaRepr:
|
||||
def test_publishes(self, ip):
|
||||
ipython = ip.instance(config=ip.config)
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
objects = [df["A"], df] # dataframe / series
|
||||
expected_keys = [
|
||||
{"text/plain", "application/vnd.dataresource+json"},
|
||||
{"text/plain", "text/html", "application/vnd.dataresource+json"},
|
||||
]
|
||||
|
||||
opt = cf.option_context("display.html.table_schema", True)
|
||||
last_obj = None
|
||||
for obj, expected in zip(objects, expected_keys):
|
||||
last_obj = obj
|
||||
with cf.option_context("display.html.table_schema", True):
|
||||
# Can't reuse opt on all systems GH#58055
|
||||
formatted = ipython.display_formatter.format(obj)
|
||||
assert set(formatted[0].keys()) == expected
|
||||
|
||||
with_latex = cf.option_context("styler.render.repr", "latex")
|
||||
|
||||
with opt, with_latex:
|
||||
formatted = ipython.display_formatter.format(last_obj)
|
||||
|
||||
expected = {
|
||||
"text/plain",
|
||||
"text/html",
|
||||
"text/latex",
|
||||
"application/vnd.dataresource+json",
|
||||
}
|
||||
assert set(formatted[0].keys()) == expected
|
||||
|
||||
def test_publishes_not_implemented(self, ip):
|
||||
# column MultiIndex
|
||||
# GH#15996
|
||||
midx = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, len(midx))), columns=midx
|
||||
)
|
||||
|
||||
opt = cf.option_context("display.html.table_schema", True)
|
||||
|
||||
with opt:
|
||||
formatted = ip.instance(config=ip.config).display_formatter.format(df)
|
||||
|
||||
expected = {"text/plain", "text/html"}
|
||||
assert set(formatted[0].keys()) == expected
|
||||
|
||||
def test_config_on(self):
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
with cf.option_context("display.html.table_schema", True):
|
||||
result = df._repr_data_resource_()
|
||||
|
||||
assert result is not None
|
||||
|
||||
def test_config_default_off(self):
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
with cf.option_context("display.html.table_schema", False):
|
||||
result = df._repr_data_resource_()
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_enable_data_resource_formatter(self, ip):
|
||||
# GH#10491
|
||||
formatters = ip.instance(config=ip.config).display_formatter.formatters
|
||||
mimetype = "application/vnd.dataresource+json"
|
||||
|
||||
with cf.option_context("display.html.table_schema", True):
|
||||
assert "application/vnd.dataresource+json" in formatters
|
||||
assert formatters[mimetype].enabled
|
||||
|
||||
# still there, just disabled
|
||||
assert "application/vnd.dataresource+json" in formatters
|
||||
assert not formatters[mimetype].enabled
|
||||
|
||||
# able to re-set
|
||||
with cf.option_context("display.html.table_schema", True):
|
||||
assert "application/vnd.dataresource+json" in formatters
|
||||
assert formatters[mimetype].enabled
|
||||
# smoke test that it works
|
||||
ip.instance(config=ip.config).display_formatter.format(cf)
|
||||
@ -0,0 +1,172 @@
|
||||
# Note! This file is aimed specifically at pandas.io.formats.printing utility
|
||||
# functions, not the general printing of pandas objects.
|
||||
from collections.abc import Mapping
|
||||
import string
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas._config.config as cf
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from pandas.io.formats import printing
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_names, expected_names",
|
||||
[
|
||||
(["'a b"], "['\\'a b']"), # Escape leading quote
|
||||
(["test's b"], "['test\\'s b']"), # Escape apostrophe
|
||||
(["'test' b"], "['\\'test\\' b']"), # Escape surrounding quotes
|
||||
(["test b'"], "['test b\\'']"), # Escape single quote
|
||||
(["test\n' b"], "['test\\n\\' b']"), # Escape quotes, preserve newline
|
||||
],
|
||||
)
|
||||
def test_formatted_index_names(input_names, expected_names):
|
||||
# GH#60190
|
||||
df = pd.DataFrame({name: [1, 2, 3] for name in input_names}).set_index(input_names)
|
||||
formatted_names = str(df.index.names)
|
||||
|
||||
assert formatted_names == expected_names
|
||||
|
||||
|
||||
def test_adjoin():
|
||||
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
|
||||
expected = "a dd ggg\nb ee hhh\nc ff iii"
|
||||
|
||||
adjoined = printing.adjoin(2, *data)
|
||||
|
||||
assert adjoined == expected
|
||||
|
||||
|
||||
class MyMapping(Mapping):
|
||||
def __getitem__(self, key):
|
||||
return 4
|
||||
|
||||
def __iter__(self):
|
||||
return iter(["a", "b"])
|
||||
|
||||
def __len__(self):
|
||||
return 2
|
||||
|
||||
|
||||
class TestPPrintThing:
|
||||
def test_repr_binary_type(self):
|
||||
letters = string.ascii_letters
|
||||
try:
|
||||
raw = bytes(letters, encoding=cf.get_option("display.encoding"))
|
||||
except TypeError:
|
||||
raw = bytes(letters)
|
||||
b = str(raw.decode("utf-8"))
|
||||
res = printing.pprint_thing(b, quote_strings=True)
|
||||
assert res == repr(b)
|
||||
res = printing.pprint_thing(b, quote_strings=False)
|
||||
assert res == b
|
||||
|
||||
def test_repr_obeys_max_seq_limit(self):
|
||||
with cf.option_context("display.max_seq_items", 2000):
|
||||
assert len(printing.pprint_thing(list(range(1000)))) > 1000
|
||||
|
||||
with cf.option_context("display.max_seq_items", 5):
|
||||
assert len(printing.pprint_thing(list(range(1000)))) < 100
|
||||
|
||||
with cf.option_context("display.max_seq_items", 1):
|
||||
assert len(printing.pprint_thing(list(range(1000)))) < 9
|
||||
|
||||
def test_repr_set(self):
|
||||
assert printing.pprint_thing({1}) == "{1}"
|
||||
|
||||
def test_repr_dict(self):
|
||||
assert printing.pprint_thing({"a": 4, "b": 4}) == "{'a': 4, 'b': 4}"
|
||||
|
||||
def test_repr_mapping(self):
|
||||
assert printing.pprint_thing(MyMapping()) == "{'a': 4, 'b': 4}"
|
||||
|
||||
def test_repr_frozenset(self):
|
||||
assert printing.pprint_thing(frozenset([1, 2])) == "frozenset({1, 2})"
|
||||
|
||||
|
||||
class TestFormatBase:
|
||||
def test_adjoin(self):
|
||||
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
|
||||
expected = "a dd ggg\nb ee hhh\nc ff iii"
|
||||
|
||||
adjoined = printing.adjoin(2, *data)
|
||||
|
||||
assert adjoined == expected
|
||||
|
||||
def test_adjoin_unicode(self):
|
||||
data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]]
|
||||
expected = "あ dd ggg\nb ええ hhh\nc ff いいい"
|
||||
adjoined = printing.adjoin(2, *data)
|
||||
assert adjoined == expected
|
||||
|
||||
adj = printing._EastAsianTextAdjustment()
|
||||
|
||||
expected = """あ dd ggg
|
||||
b ええ hhh
|
||||
c ff いいい"""
|
||||
|
||||
adjoined = adj.adjoin(2, *data)
|
||||
assert adjoined == expected
|
||||
cols = adjoined.split("\n")
|
||||
assert adj.len(cols[0]) == 13
|
||||
assert adj.len(cols[1]) == 13
|
||||
assert adj.len(cols[2]) == 16
|
||||
|
||||
expected = """あ dd ggg
|
||||
b ええ hhh
|
||||
c ff いいい"""
|
||||
|
||||
adjoined = adj.adjoin(7, *data)
|
||||
assert adjoined == expected
|
||||
cols = adjoined.split("\n")
|
||||
assert adj.len(cols[0]) == 23
|
||||
assert adj.len(cols[1]) == 23
|
||||
assert adj.len(cols[2]) == 26
|
||||
|
||||
def test_justify(self):
|
||||
adj = printing._EastAsianTextAdjustment()
|
||||
|
||||
def just(x, *args, **kwargs):
|
||||
# wrapper to test single str
|
||||
return adj.justify([x], *args, **kwargs)[0]
|
||||
|
||||
assert just("abc", 5, mode="left") == "abc "
|
||||
assert just("abc", 5, mode="center") == " abc "
|
||||
assert just("abc", 5, mode="right") == " abc"
|
||||
assert just("abc", 5, mode="left") == "abc "
|
||||
assert just("abc", 5, mode="center") == " abc "
|
||||
assert just("abc", 5, mode="right") == " abc"
|
||||
|
||||
assert just("パンダ", 5, mode="left") == "パンダ"
|
||||
assert just("パンダ", 5, mode="center") == "パンダ"
|
||||
assert just("パンダ", 5, mode="right") == "パンダ"
|
||||
|
||||
assert just("パンダ", 10, mode="left") == "パンダ "
|
||||
assert just("パンダ", 10, mode="center") == " パンダ "
|
||||
assert just("パンダ", 10, mode="right") == " パンダ"
|
||||
|
||||
def test_east_asian_len(self):
|
||||
adj = printing._EastAsianTextAdjustment()
|
||||
|
||||
assert adj.len("abc") == 3
|
||||
assert adj.len("abc") == 3
|
||||
|
||||
assert adj.len("パンダ") == 6
|
||||
assert adj.len("パンダ") == 5
|
||||
assert adj.len("パンダpanda") == 11
|
||||
assert adj.len("パンダpanda") == 10
|
||||
|
||||
def test_ambiguous_width(self):
|
||||
adj = printing._EastAsianTextAdjustment()
|
||||
assert adj.len("¡¡ab") == 4
|
||||
|
||||
with cf.option_context("display.unicode.ambiguous_as_wide", True):
|
||||
adj = printing._EastAsianTextAdjustment()
|
||||
assert adj.len("¡¡ab") == 6
|
||||
|
||||
data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]]
|
||||
expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい"
|
||||
adjoined = adj.adjoin(2, *data)
|
||||
assert adjoined == expected
|
||||
@ -0,0 +1,886 @@
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from zipfile import ZipFile
|
||||
|
||||
from _csv import Error
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
compat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestToCSV:
|
||||
def test_to_csv_with_single_column(self, temp_file):
|
||||
# see gh-18676, https://bugs.python.org/issue32255
|
||||
#
|
||||
# Python's CSV library adds an extraneous '""'
|
||||
# before the newline when the NaN-value is in
|
||||
# the first row. Otherwise, only the newline
|
||||
# character is added. This behavior is inconsistent
|
||||
# and was patched in https://bugs.python.org/pull_request4672.
|
||||
df1 = DataFrame([None, 1])
|
||||
expected1 = """\
|
||||
""
|
||||
1.0
|
||||
"""
|
||||
df1.to_csv(temp_file, header=None, index=None)
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected1
|
||||
|
||||
df2 = DataFrame([1, None])
|
||||
expected2 = """\
|
||||
1.0
|
||||
""
|
||||
"""
|
||||
df2.to_csv(temp_file, header=None, index=None)
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected2
|
||||
|
||||
def test_to_csv_default_encoding(self, temp_file):
|
||||
# GH17097
|
||||
df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
|
||||
|
||||
# the default to_csv encoding is uft-8.
|
||||
df.to_csv(temp_file)
|
||||
tm.assert_frame_equal(pd.read_csv(temp_file, index_col=0), df)
|
||||
|
||||
def test_to_csv_quotechar(self, temp_file):
|
||||
df = DataFrame({"col": [1, 2]})
|
||||
expected = """\
|
||||
"","col"
|
||||
"0","1"
|
||||
"1","2"
|
||||
"""
|
||||
|
||||
df.to_csv(temp_file, quoting=1) # 1=QUOTE_ALL
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected
|
||||
|
||||
expected = """\
|
||||
$$,$col$
|
||||
$0$,$1$
|
||||
$1$,$2$
|
||||
"""
|
||||
|
||||
df.to_csv(temp_file, quoting=1, quotechar="$")
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected
|
||||
|
||||
with pytest.raises(TypeError, match="quotechar"):
|
||||
df.to_csv(temp_file, quoting=1, quotechar=None)
|
||||
|
||||
def test_to_csv_doublequote(self, temp_file):
|
||||
df = DataFrame({"col": ['a"a', '"bb"']})
|
||||
expected = '''\
|
||||
"","col"
|
||||
"0","a""a"
|
||||
"1","""bb"""
|
||||
'''
|
||||
|
||||
df.to_csv(temp_file, quoting=1, doublequote=True) # QUOTE_ALL
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected
|
||||
|
||||
with pytest.raises(Error, match="escapechar"):
|
||||
df.to_csv(temp_file, doublequote=False) # no escapechar set
|
||||
|
||||
def test_to_csv_escapechar(self, temp_file):
|
||||
df = DataFrame({"col": ['a"a', '"bb"']})
|
||||
expected = """\
|
||||
"","col"
|
||||
"0","a\\"a"
|
||||
"1","\\"bb\\""
|
||||
"""
|
||||
|
||||
df.to_csv(temp_file, quoting=1, doublequote=False, escapechar="\\")
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected
|
||||
|
||||
df = DataFrame({"col": ["a,a", ",bb,"]})
|
||||
expected = """\
|
||||
,col
|
||||
0,a\\,a
|
||||
1,\\,bb\\,
|
||||
"""
|
||||
|
||||
df.to_csv(temp_file, quoting=3, escapechar="\\") # QUOTE_NONE
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected
|
||||
|
||||
def test_csv_to_string(self):
|
||||
df = DataFrame({"col": [1, 2]})
|
||||
expected_rows = [",col", "0,1", "1,2"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.to_csv() == expected
|
||||
|
||||
def test_to_csv_decimal(self):
|
||||
# see gh-781
|
||||
df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
|
||||
|
||||
expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
|
||||
expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.to_csv() == expected_default
|
||||
|
||||
expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
|
||||
expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.to_csv(decimal=",", sep=";") == expected_european_excel
|
||||
|
||||
expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
|
||||
expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.to_csv(float_format="%.2f") == expected_float_format_default
|
||||
|
||||
expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
|
||||
expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert (
|
||||
df.to_csv(decimal=",", sep=";", float_format="%.2f")
|
||||
== expected_float_format
|
||||
)
|
||||
|
||||
# see gh-11553: testing if decimal is taken into account for '0.0'
|
||||
df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
|
||||
|
||||
expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.to_csv(index=False, decimal="^") == expected
|
||||
|
||||
# same but for an index
|
||||
assert df.set_index("a").to_csv(decimal="^") == expected
|
||||
|
||||
# same for a multi-index
|
||||
assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
|
||||
|
||||
def test_to_csv_float_format(self):
|
||||
# testing if float_format is taken into account for the index
|
||||
# GH 11553
|
||||
df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
|
||||
|
||||
expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.set_index("a").to_csv(float_format="%.2f") == expected
|
||||
|
||||
# same for a multi-index
|
||||
assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
|
||||
|
||||
def test_to_csv_na_rep(self):
|
||||
# see gh-11553
|
||||
#
|
||||
# Testing if NaN values are correctly represented in the index.
|
||||
df = DataFrame({"a": [0, np.nan], "b": [0, 1], "c": [2, 3]})
|
||||
expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
assert df.set_index("a").to_csv(na_rep="_") == expected
|
||||
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
|
||||
|
||||
# now with an index containing only NaNs
|
||||
df = DataFrame({"a": np.nan, "b": [0, 1], "c": [2, 3]})
|
||||
expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
assert df.set_index("a").to_csv(na_rep="_") == expected
|
||||
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
|
||||
|
||||
# check if na_rep parameter does not break anything when no NaN
|
||||
df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
|
||||
expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
assert df.set_index("a").to_csv(na_rep="_") == expected
|
||||
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
|
||||
|
||||
csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
|
||||
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
|
||||
assert expected == csv
|
||||
|
||||
def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
|
||||
# GH 29975
|
||||
# Make sure full na_rep shows up when a dtype is provided
|
||||
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
|
||||
csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
|
||||
na_rep="ZZZZZ"
|
||||
)
|
||||
assert expected == csv
|
||||
|
||||
def test_to_csv_date_format(self):
|
||||
# GH 10209
|
||||
df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
|
||||
df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="D")})
|
||||
|
||||
expected_rows = [
|
||||
",A",
|
||||
"0,2013-01-01 00:00:00",
|
||||
"1,2013-01-01 00:00:01",
|
||||
"2,2013-01-01 00:00:02",
|
||||
"3,2013-01-01 00:00:03",
|
||||
"4,2013-01-01 00:00:04",
|
||||
]
|
||||
expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df_sec.to_csv() == expected_default_sec
|
||||
|
||||
expected_rows = [
|
||||
",A",
|
||||
"0,2013-01-01 00:00:00",
|
||||
"1,2013-01-02 00:00:00",
|
||||
"2,2013-01-03 00:00:00",
|
||||
"3,2013-01-04 00:00:00",
|
||||
"4,2013-01-05 00:00:00",
|
||||
]
|
||||
expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
|
||||
|
||||
expected_rows = [
|
||||
",A",
|
||||
"0,2013-01-01",
|
||||
"1,2013-01-01",
|
||||
"2,2013-01-01",
|
||||
"3,2013-01-01",
|
||||
"4,2013-01-01",
|
||||
]
|
||||
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
|
||||
|
||||
expected_rows = [
|
||||
",A",
|
||||
"0,2013-01-01",
|
||||
"1,2013-01-02",
|
||||
"2,2013-01-03",
|
||||
"3,2013-01-04",
|
||||
"4,2013-01-05",
|
||||
]
|
||||
expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df_day.to_csv() == expected_default_day
|
||||
assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
|
||||
|
||||
# see gh-7791
|
||||
#
|
||||
# Testing if date_format parameter is taken into account
|
||||
# for multi-indexed DataFrames.
|
||||
df_sec["B"] = 0
|
||||
df_sec["C"] = 1
|
||||
|
||||
expected_rows = ["A,B,C", "2013-01-01,0,1.0"]
|
||||
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
|
||||
assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
|
||||
|
||||
def test_to_csv_different_datetime_formats(self):
|
||||
# GH#21734
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": pd.to_datetime("1970-01-01"),
|
||||
"datetime": pd.date_range("1970-01-01", periods=2, freq="h"),
|
||||
}
|
||||
)
|
||||
expected_rows = [
|
||||
"date,datetime",
|
||||
"1970-01-01,1970-01-01 00:00:00",
|
||||
"1970-01-01,1970-01-01 01:00:00",
|
||||
]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert df.to_csv(index=False) == expected
|
||||
|
||||
def test_to_csv_date_format_in_categorical(self):
|
||||
# GH#40754
|
||||
ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
|
||||
ser = ser.astype("category")
|
||||
expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
|
||||
assert ser.to_csv(index=False) == expected
|
||||
|
||||
ser = pd.Series(
|
||||
pd.date_range(
|
||||
start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin"
|
||||
).append(pd.DatetimeIndex([pd.NaT]))
|
||||
)
|
||||
ser = ser.astype("category")
|
||||
assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
|
||||
|
||||
def test_to_csv_float_ea_float_format(self):
|
||||
# GH#45991
|
||||
df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
|
||||
df["a"] = df["a"].astype("Float64")
|
||||
result = df.to_csv(index=False, float_format="%.5f")
|
||||
expected = tm.convert_rows_list_to_csv_str(
|
||||
["a,b", "1.10000,c", "2.02000,c", ",c", "6.00001,c"]
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
def test_to_csv_float_ea_no_float_format(self):
|
||||
# GH#45991
|
||||
df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
|
||||
df["a"] = df["a"].astype("Float64")
|
||||
result = df.to_csv(index=False)
|
||||
expected = tm.convert_rows_list_to_csv_str(
|
||||
["a,b", "1.1,c", "2.02,c", ",c", "6.000006,c"]
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
def test_to_csv_float_ea_nan_distinguish(self, using_nan_is_na):
|
||||
# GH#61617, GH#65227 - to_csv should not crash when FloatingArray
|
||||
# contains unmasked NaN (with distinguish_nan_and_na=True)
|
||||
df = DataFrame({"a": pd.array([np.nan, pd.NA, 3.0], dtype="Float64"), "b": "c"})
|
||||
result = df.to_csv(index=False)
|
||||
if using_nan_is_na:
|
||||
expected = tm.convert_rows_list_to_csv_str(["a,b", ",c", ",c", "3.0,c"])
|
||||
else:
|
||||
expected = tm.convert_rows_list_to_csv_str(["a,b", "nan,c", ",c", "3.0,c"])
|
||||
assert result == expected
|
||||
|
||||
def test_to_csv_float_ea_nan_distinguish_series(self, using_nan_is_na):
|
||||
# GH#65227 - Series.to_csv with FloatingArray containing both NaN and NA
|
||||
ser = pd.Series((1, pd.NA, 0), index=["a", "b", "c"], dtype="Float64", name="x")
|
||||
ser = ser / ser
|
||||
result = ser.to_csv()
|
||||
if using_nan_is_na:
|
||||
expected = tm.convert_rows_list_to_csv_str([",x", "a,1.0", "b,", "c,"])
|
||||
else:
|
||||
expected = tm.convert_rows_list_to_csv_str([",x", "a,1.0", "b,", "c,nan"])
|
||||
assert result == expected
|
||||
|
||||
def test_to_csv_multi_index(self):
|
||||
# see gh-6618
|
||||
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
|
||||
|
||||
exp_rows = [",1", ",2", "0,1"]
|
||||
exp = tm.convert_rows_list_to_csv_str(exp_rows)
|
||||
assert df.to_csv() == exp
|
||||
|
||||
exp_rows = ["1", "2", "1"]
|
||||
exp = tm.convert_rows_list_to_csv_str(exp_rows)
|
||||
assert df.to_csv(index=False) == exp
|
||||
|
||||
df = DataFrame(
|
||||
[1],
|
||||
columns=pd.MultiIndex.from_arrays([[1], [2]]),
|
||||
index=pd.MultiIndex.from_arrays([[1], [2]]),
|
||||
)
|
||||
|
||||
exp_rows = [",,1", ",,2", "1,2,1"]
|
||||
exp = tm.convert_rows_list_to_csv_str(exp_rows)
|
||||
assert df.to_csv() == exp
|
||||
|
||||
exp_rows = ["1", "2", "1"]
|
||||
exp = tm.convert_rows_list_to_csv_str(exp_rows)
|
||||
assert df.to_csv(index=False) == exp
|
||||
|
||||
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
|
||||
|
||||
exp_rows = [",foo", ",bar", "0,1"]
|
||||
exp = tm.convert_rows_list_to_csv_str(exp_rows)
|
||||
assert df.to_csv() == exp
|
||||
|
||||
exp_rows = ["foo", "bar", "1"]
|
||||
exp = tm.convert_rows_list_to_csv_str(exp_rows)
|
||||
assert df.to_csv(index=False) == exp
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ind,expected",
|
||||
[
|
||||
(
|
||||
pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]),
|
||||
"x,data\n1.0,1\n",
|
||||
),
|
||||
(
|
||||
pd.MultiIndex(
|
||||
levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"]
|
||||
),
|
||||
"x,y,data\n1.0,2.0,1\n",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_csv_single_level_multi_index(self, ind, expected, frame_or_series):
|
||||
# see gh-19589
|
||||
obj = frame_or_series(pd.Series([1], ind, name="data"))
|
||||
|
||||
result = obj.to_csv(lineterminator="\n", header=True)
|
||||
assert result == expected
|
||||
|
||||
def test_to_csv_string_array_ascii(self, temp_file):
|
||||
# GH 10813
|
||||
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
|
||||
df = DataFrame(str_array)
|
||||
expected_ascii = """\
|
||||
,names
|
||||
0,"['foo', 'bar']"
|
||||
1,"['baz', 'qux']"
|
||||
"""
|
||||
df.to_csv(temp_file, encoding="ascii")
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected_ascii
|
||||
|
||||
def test_to_csv_string_array_utf8(self, temp_file):
|
||||
# GH 10813
|
||||
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
|
||||
df = DataFrame(str_array)
|
||||
expected_utf8 = """\
|
||||
,names
|
||||
0,"['foo', 'bar']"
|
||||
1,"['baz', 'qux']"
|
||||
"""
|
||||
df.to_csv(temp_file, encoding="utf-8")
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected_utf8
|
||||
|
||||
def test_to_csv_string_with_lf(self, temp_file):
|
||||
# GH 20353
|
||||
data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
|
||||
df = DataFrame(data)
|
||||
|
||||
# case 1: The default line terminator(=os.linesep)(PR 21406)
|
||||
os_linesep = os.linesep.encode("utf-8")
|
||||
expected_noarg = (
|
||||
b"int,str_lf"
|
||||
+ os_linesep
|
||||
+ b"1,abc"
|
||||
+ os_linesep
|
||||
+ b'2,"d\nef"'
|
||||
+ os_linesep
|
||||
+ b'3,"g\nh\n\ni"'
|
||||
+ os_linesep
|
||||
)
|
||||
df.to_csv(temp_file, index=False)
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == expected_noarg
|
||||
|
||||
# case 2: LF as line terminator
|
||||
expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
|
||||
df.to_csv(temp_file, lineterminator="\n", index=False)
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == expected_lf
|
||||
|
||||
# case 3: CRLF as line terminator
|
||||
# 'lineterminator' should not change inner element
|
||||
expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
|
||||
df.to_csv(temp_file, lineterminator="\r\n", index=False)
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == expected_crlf
|
||||
|
||||
def test_to_csv_string_with_crlf(self, temp_file):
|
||||
# GH 20353
|
||||
data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
|
||||
df = DataFrame(data)
|
||||
# case 1: The default line terminator(=os.linesep)(PR 21406)
|
||||
os_linesep = os.linesep.encode("utf-8")
|
||||
expected_noarg = (
|
||||
b"int,str_crlf"
|
||||
+ os_linesep
|
||||
+ b"1,abc"
|
||||
+ os_linesep
|
||||
+ b'2,"d\r\nef"'
|
||||
+ os_linesep
|
||||
+ b'3,"g\r\nh\r\n\r\ni"'
|
||||
+ os_linesep
|
||||
)
|
||||
df.to_csv(temp_file, index=False)
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == expected_noarg
|
||||
|
||||
# case 2: LF as line terminator
|
||||
expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
|
||||
df.to_csv(temp_file, lineterminator="\n", index=False)
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == expected_lf
|
||||
|
||||
# case 3: CRLF as line terminator
|
||||
# 'lineterminator' should not change inner element
|
||||
expected_crlf = (
|
||||
b'int,str_crlf\r\n1,abc\r\n2,"d\r\nef"\r\n3,"g\r\nh\r\n\r\ni"\r\n'
|
||||
)
|
||||
df.to_csv(temp_file, lineterminator="\r\n", index=False)
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == expected_crlf
|
||||
|
||||
def test_to_csv_stdout_file(self, capsys):
|
||||
# GH 21561
|
||||
df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
|
||||
expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
|
||||
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
df.to_csv(sys.stdout, encoding="ascii")
|
||||
captured = capsys.readouterr()
|
||||
|
||||
assert captured.out == expected_ascii
|
||||
assert not sys.stdout.closed
|
||||
|
||||
@pytest.mark.xfail(
|
||||
compat.is_platform_windows(),
|
||||
reason=(
|
||||
"Especially in Windows, file stream should not be passed"
|
||||
"to csv writer without newline='' option."
|
||||
"(https://docs.python.org/3/library/csv.html#csv.writer)"
|
||||
),
|
||||
)
|
||||
def test_to_csv_write_to_open_file(self, temp_file):
|
||||
# GH 21696
|
||||
df = DataFrame({"a": ["x", "y", "z"]})
|
||||
expected = """\
|
||||
manual header
|
||||
x
|
||||
y
|
||||
z
|
||||
"""
|
||||
with open(temp_file, "w", encoding="utf-8") as f:
|
||||
f.write("manual header\n")
|
||||
df.to_csv(f, header=None, index=None)
|
||||
with open(temp_file, encoding="utf-8") as f:
|
||||
assert f.read() == expected
|
||||
|
||||
def test_to_csv_write_to_open_file_with_newline_py3(self, temp_file):
|
||||
# see gh-21696
|
||||
# see gh-20353
|
||||
df = DataFrame({"a": ["x", "y", "z"]})
|
||||
expected_rows = ["x", "y", "z"]
|
||||
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
with open(temp_file, "w", newline="", encoding="utf-8") as f:
|
||||
f.write("manual header\n")
|
||||
df.to_csv(f, header=None, index=None)
|
||||
|
||||
with open(temp_file, "rb") as f:
|
||||
assert f.read() == bytes(expected, "utf-8")
|
||||
|
||||
@pytest.mark.parametrize("to_infer", [True, False])
|
||||
@pytest.mark.parametrize("read_infer", [True, False])
|
||||
def test_to_csv_compression(
|
||||
self,
|
||||
compression_only,
|
||||
read_infer,
|
||||
to_infer,
|
||||
compression_to_extension,
|
||||
temp_file,
|
||||
):
|
||||
# see gh-15008
|
||||
compression = compression_only
|
||||
|
||||
df = DataFrame({"A": [1]})
|
||||
|
||||
to_compression = "infer" if to_infer else compression
|
||||
read_compression = "infer" if read_infer else compression
|
||||
|
||||
path_ext = str(temp_file) + "." + compression_to_extension[compression]
|
||||
df.to_csv(path_ext, compression=to_compression)
|
||||
result = pd.read_csv(path_ext, index_col=0, compression=read_compression)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_to_csv_compression_dict(self, compression_only, temp_file):
|
||||
# GH 26023
|
||||
method = compression_only
|
||||
df = DataFrame({"ABC": [1]})
|
||||
extension = {
|
||||
"gzip": "gz",
|
||||
"zstd": "zst",
|
||||
}.get(method, method)
|
||||
|
||||
path = str(temp_file) + "." + extension
|
||||
df.to_csv(path, compression={"method": method})
|
||||
read_df = pd.read_csv(path, index_col=0)
|
||||
tm.assert_frame_equal(read_df, df)
|
||||
|
||||
def test_to_csv_compression_dict_no_method_raises(self, temp_file):
|
||||
# GH 26023
|
||||
df = DataFrame({"ABC": [1]})
|
||||
compression = {"some_option": True}
|
||||
msg = "must have key 'method'"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_csv(temp_file, compression=compression)
|
||||
|
||||
@pytest.mark.parametrize("compression", ["zip", "infer"])
|
||||
@pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
|
||||
def test_to_csv_zip_arguments(self, compression, archive_name, temp_file):
|
||||
# GH 26023
|
||||
df = DataFrame({"ABC": [1]})
|
||||
|
||||
path = str(temp_file) + ".zip"
|
||||
df.to_csv(
|
||||
path, compression={"method": compression, "archive_name": archive_name}
|
||||
)
|
||||
with ZipFile(path) as zp:
|
||||
assert len(zp.filelist) == 1
|
||||
archived_file = zp.filelist[0].filename
|
||||
assert archived_file == archive_name
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename,expected_arcname",
|
||||
[
|
||||
("archive.csv", "archive.csv"),
|
||||
("archive.tsv", "archive.tsv"),
|
||||
("archive.csv.zip", "archive.csv"),
|
||||
("archive.tsv.zip", "archive.tsv"),
|
||||
("archive.zip", "archive"),
|
||||
],
|
||||
)
|
||||
def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname):
|
||||
# GH 39465
|
||||
df = DataFrame({"ABC": [1]})
|
||||
path = tmp_path / filename
|
||||
df.to_csv(path, compression="zip")
|
||||
with ZipFile(path) as zp:
|
||||
assert len(zp.filelist) == 1
|
||||
archived_file = zp.filelist[0].filename
|
||||
assert archived_file == expected_arcname
|
||||
|
||||
@pytest.mark.parametrize("df_new_type", ["Int64"])
|
||||
def test_to_csv_na_rep_long_string(self, df_new_type):
|
||||
# see gh-25099
|
||||
df = DataFrame({"c": [pd.NA] * 3})
|
||||
df = df.astype(df_new_type)
|
||||
expected_rows = ["c", "mynull", "mynull", "mynull"]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
|
||||
result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
|
||||
|
||||
assert expected == result
|
||||
|
||||
def test_to_csv_timedelta_precision(self):
|
||||
# GH 6783
|
||||
s = pd.Series([1, 1]).astype("timedelta64[ns]")
|
||||
buf = io.StringIO()
|
||||
s.to_csv(buf)
|
||||
result = buf.getvalue()
|
||||
expected_rows = [
|
||||
",0",
|
||||
"0,0 days 00:00:00.000000001",
|
||||
"1,0 days 00:00:00.000000001",
|
||||
]
|
||||
expected = tm.convert_rows_list_to_csv_str(expected_rows)
|
||||
assert result == expected
|
||||
|
||||
def test_na_rep_truncated(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/31447
|
||||
result = pd.Series(range(8, 12)).to_csv(na_rep="-")
|
||||
expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
|
||||
assert result == expected
|
||||
|
||||
result = pd.Series([True, False]).to_csv(na_rep="nan")
|
||||
expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
|
||||
assert result == expected
|
||||
|
||||
result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
|
||||
expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
|
||||
def test_to_csv_errors(self, errors, temp_file):
|
||||
# GH 22610
|
||||
data = ["\ud800foo"]
|
||||
ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
|
||||
|
||||
ser.to_csv(temp_file, errors=errors)
|
||||
# No use in reading back the data as it is not the same anymore
|
||||
# due to the error handling
|
||||
|
||||
@pytest.mark.parametrize("mode", ["wb", "w"])
|
||||
def test_to_csv_binary_handle(self, mode, temp_file):
|
||||
"""
|
||||
Binary file objects should work (if 'mode' contains a 'b') or even without
|
||||
it in most cases.
|
||||
|
||||
GH 35058 and GH 19827
|
||||
"""
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD")),
|
||||
index=Index([f"i-{i}" for i in range(30)]),
|
||||
)
|
||||
|
||||
with open(temp_file, mode="w+b") as handle:
|
||||
df.to_csv(handle, mode=mode)
|
||||
tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0))
|
||||
|
||||
@pytest.mark.parametrize("mode", ["wb", "w"])
|
||||
def test_to_csv_encoding_binary_handle(self, mode, temp_file):
|
||||
"""
|
||||
Binary file objects should honor a specified encoding.
|
||||
|
||||
GH 23854 and GH 13068 with binary handles
|
||||
"""
|
||||
# example from GH 23854
|
||||
content = "a, b, 🐟".encode("utf-8-sig")
|
||||
buffer = io.BytesIO(content)
|
||||
df = pd.read_csv(buffer, encoding="utf-8-sig")
|
||||
|
||||
buffer = io.BytesIO()
|
||||
df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False)
|
||||
buffer.seek(0) # tests whether file handle wasn't closed
|
||||
assert buffer.getvalue().startswith(content)
|
||||
|
||||
# example from GH 13068
|
||||
with open(temp_file, "w+b") as handle:
|
||||
DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
|
||||
|
||||
handle.seek(0)
|
||||
assert handle.read().startswith(b'\xef\xbb\xbf""')
|
||||
|
||||
|
||||
def test_to_csv_iterative_compression_name(compression, temp_file):
|
||||
# GH 38714
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD")),
|
||||
index=Index([f"i-{i}" for i in range(30)]),
|
||||
)
|
||||
df.to_csv(temp_file, compression=compression, chunksize=1)
|
||||
tm.assert_frame_equal(
|
||||
pd.read_csv(temp_file, compression=compression, index_col=0), df
|
||||
)
|
||||
|
||||
|
||||
def test_to_csv_iterative_compression_buffer(compression):
|
||||
# GH 38714
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD")),
|
||||
index=Index([f"i-{i}" for i in range(30)]),
|
||||
)
|
||||
with io.BytesIO() as buffer:
|
||||
df.to_csv(buffer, compression=compression, chunksize=1)
|
||||
buffer.seek(0)
|
||||
tm.assert_frame_equal(
|
||||
pd.read_csv(buffer, compression=compression, index_col=0), df
|
||||
)
|
||||
assert not buffer.closed
|
||||
|
||||
|
||||
def test_new_style_float_format_basic():
|
||||
df = DataFrame({"A": [1234.56789, 9876.54321]})
|
||||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
|
||||
expected = ",A\n0,1234.57\n1,9876.54\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_float_format_thousands():
|
||||
df = DataFrame({"A": [1234.56789, 9876.54321]})
|
||||
result = df.to_csv(float_format="{:,.2f}", lineterminator="\n")
|
||||
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_scientific_format():
|
||||
df = DataFrame({"A": [0.000123, 0.000456]})
|
||||
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
|
||||
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_with_nan():
|
||||
df = DataFrame({"A": [1.23, np.nan, 4.56]})
|
||||
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
|
||||
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_with_mixed_types():
|
||||
df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
|
||||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
|
||||
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_with_mixed_types_in_column():
|
||||
df = DataFrame({"A": [1.23, "text", 4.56]})
|
||||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
|
||||
expected = ",A\n0,1.23\n1,text\n2,4.56\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_invalid_new_style_format_missing_brace():
|
||||
df = DataFrame({"A": [1.23]})
|
||||
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
|
||||
df.to_csv(float_format="{:.2f")
|
||||
|
||||
|
||||
def test_invalid_new_style_format_specifier():
|
||||
df = DataFrame({"A": [1.23]})
|
||||
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
|
||||
df.to_csv(float_format="{:.2z}")
|
||||
|
||||
|
||||
def test_old_style_format_compatibility():
|
||||
df = DataFrame({"A": [1234.56789, 9876.54321]})
|
||||
result = df.to_csv(float_format="%.2f", lineterminator="\n")
|
||||
expected = ",A\n0,1234.57\n1,9876.54\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_callable_float_format_compatibility():
|
||||
df = DataFrame({"A": [1234.56789, 9876.54321]})
|
||||
result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n")
|
||||
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_no_float_format():
|
||||
df = DataFrame({"A": [1.23, 4.56]})
|
||||
result = df.to_csv(float_format=None, lineterminator="\n")
|
||||
expected = ",A\n0,1.23\n1,4.56\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_large_numbers():
|
||||
df = DataFrame({"A": [1e308, 2e308]})
|
||||
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
|
||||
expected = ",A\n0,1.00e+308\n1,inf\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_zero_and_negative():
|
||||
df = DataFrame({"A": [0.0, -1.23456]})
|
||||
result = df.to_csv(float_format="{:+.2f}", lineterminator="\n")
|
||||
expected = ",A\n0,+0.00\n1,-1.23\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_unicode_format():
|
||||
df = DataFrame({"A": [1.23, 4.56]})
|
||||
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n")
|
||||
expected = ",A\n0,1.23€\n1,4.56€\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_empty_dataframe():
|
||||
df = DataFrame({"A": []})
|
||||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
|
||||
expected = ",A\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_multi_column_float():
|
||||
df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
|
||||
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
|
||||
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_invalid_float_format_type():
|
||||
df = DataFrame({"A": [1.23]})
|
||||
with pytest.raises(ValueError, match="float_format must be a string or callable"):
|
||||
df.to_csv(float_format=123)
|
||||
|
||||
|
||||
def test_new_style_with_inf():
|
||||
df = DataFrame({"A": [1.23, np.inf, -np.inf]})
|
||||
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
|
||||
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_with_precision_edge():
|
||||
df = DataFrame({"A": [1.23456789]})
|
||||
result = df.to_csv(float_format="{:.10f}", lineterminator="\n")
|
||||
expected = ",A\n0,1.2345678900\n"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_new_style_with_template():
|
||||
df = DataFrame({"A": [1234.56789]})
|
||||
result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n")
|
||||
expected = ',A\n0,"Value: 1,234.57"\n'
|
||||
assert result == expected
|
||||
@ -0,0 +1,473 @@
|
||||
"""Tests formatting as writer-agnostic ExcelCells
|
||||
|
||||
ExcelFormatter is tested implicitly in pandas/tests/io/excel
|
||||
"""
|
||||
|
||||
import string
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.errors import CSSWarning
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.formats.excel import (
|
||||
CssExcelCell,
|
||||
CSSToExcelConverter,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"css,expected",
|
||||
[
|
||||
# FONT
|
||||
# - name
|
||||
("font-family: foo,bar", {"font": {"name": "foo"}}),
|
||||
('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}),
|
||||
("font-family: foo,\nbar", {"font": {"name": "foo"}}),
|
||||
("font-family: foo, bar, baz", {"font": {"name": "foo"}}),
|
||||
("font-family: bar, foo", {"font": {"name": "bar"}}),
|
||||
("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}),
|
||||
("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}),
|
||||
('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}),
|
||||
('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}),
|
||||
# - family
|
||||
("font-family: serif", {"font": {"name": "serif", "family": 1}}),
|
||||
("font-family: Serif", {"font": {"name": "serif", "family": 1}}),
|
||||
("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}),
|
||||
("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}),
|
||||
("font-family: roman, sans serif", {"font": {"name": "roman"}}),
|
||||
("font-family: roman, sansserif", {"font": {"name": "roman"}}),
|
||||
("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}),
|
||||
("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}),
|
||||
# - size
|
||||
("font-size: 1em", {"font": {"size": 12}}),
|
||||
("font-size: xx-small", {"font": {"size": 6}}),
|
||||
("font-size: x-small", {"font": {"size": 7.5}}),
|
||||
("font-size: small", {"font": {"size": 9.6}}),
|
||||
("font-size: medium", {"font": {"size": 12}}),
|
||||
("font-size: large", {"font": {"size": 13.5}}),
|
||||
("font-size: x-large", {"font": {"size": 18}}),
|
||||
("font-size: xx-large", {"font": {"size": 24}}),
|
||||
("font-size: 50%", {"font": {"size": 6}}),
|
||||
# - bold
|
||||
("font-weight: 100", {"font": {"bold": False}}),
|
||||
("font-weight: 200", {"font": {"bold": False}}),
|
||||
("font-weight: 300", {"font": {"bold": False}}),
|
||||
("font-weight: 400", {"font": {"bold": False}}),
|
||||
("font-weight: normal", {"font": {"bold": False}}),
|
||||
("font-weight: lighter", {"font": {"bold": False}}),
|
||||
("font-weight: bold", {"font": {"bold": True}}),
|
||||
("font-weight: bolder", {"font": {"bold": True}}),
|
||||
("font-weight: 700", {"font": {"bold": True}}),
|
||||
("font-weight: 800", {"font": {"bold": True}}),
|
||||
("font-weight: 900", {"font": {"bold": True}}),
|
||||
# - italic
|
||||
("font-style: italic", {"font": {"italic": True}}),
|
||||
("font-style: oblique", {"font": {"italic": True}}),
|
||||
# - underline
|
||||
("text-decoration: underline", {"font": {"underline": "single"}}),
|
||||
("text-decoration: overline", {}),
|
||||
("text-decoration: none", {}),
|
||||
# - strike
|
||||
("text-decoration: line-through", {"font": {"strike": True}}),
|
||||
(
|
||||
"text-decoration: underline line-through",
|
||||
{"font": {"strike": True, "underline": "single"}},
|
||||
),
|
||||
(
|
||||
"text-decoration: underline; text-decoration: line-through",
|
||||
{"font": {"strike": True}},
|
||||
),
|
||||
# - color
|
||||
("color: red", {"font": {"color": "FF0000"}}),
|
||||
("color: #ff0000", {"font": {"color": "FF0000"}}),
|
||||
("color: #f0a", {"font": {"color": "FF00AA"}}),
|
||||
# - shadow
|
||||
("text-shadow: none", {"font": {"shadow": False}}),
|
||||
("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}),
|
||||
("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}),
|
||||
("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}),
|
||||
("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}),
|
||||
("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}),
|
||||
("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}),
|
||||
("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}),
|
||||
("text-shadow: 0px -2em", {"font": {"shadow": True}}),
|
||||
# FILL
|
||||
# - color, fillType
|
||||
(
|
||||
"background-color: red",
|
||||
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
|
||||
),
|
||||
(
|
||||
"background-color: #ff0000",
|
||||
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
|
||||
),
|
||||
(
|
||||
"background-color: #f0a",
|
||||
{"fill": {"fgColor": "FF00AA", "patternType": "solid"}},
|
||||
),
|
||||
# BORDER
|
||||
# - style
|
||||
(
|
||||
"border-style: solid",
|
||||
{
|
||||
"border": {
|
||||
"top": {"style": "medium"},
|
||||
"bottom": {"style": "medium"},
|
||||
"left": {"style": "medium"},
|
||||
"right": {"style": "medium"},
|
||||
}
|
||||
},
|
||||
),
|
||||
(
|
||||
"border-style: solid; border-width: thin",
|
||||
{
|
||||
"border": {
|
||||
"top": {"style": "thin"},
|
||||
"bottom": {"style": "thin"},
|
||||
"left": {"style": "thin"},
|
||||
"right": {"style": "thin"},
|
||||
}
|
||||
},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: thin",
|
||||
{"border": {"top": {"style": "thin"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: 1pt",
|
||||
{"border": {"top": {"style": "thin"}}},
|
||||
),
|
||||
("border-top-style: solid", {"border": {"top": {"style": "medium"}}}),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: medium",
|
||||
{"border": {"top": {"style": "medium"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: 2pt",
|
||||
{"border": {"top": {"style": "medium"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: thick",
|
||||
{"border": {"top": {"style": "thick"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: 4pt",
|
||||
{"border": {"top": {"style": "thick"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: none",
|
||||
{"border": {"top": {"style": "none"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-width: 0.000001pt",
|
||||
{"border": {"top": {"style": "none"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: dotted",
|
||||
{"border": {"top": {"style": "mediumDashDotDot"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: dotted; border-top-width: thin",
|
||||
{"border": {"top": {"style": "dotted"}}},
|
||||
),
|
||||
("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}),
|
||||
(
|
||||
"border-top-style: dashed; border-top-width: thin",
|
||||
{"border": {"top": {"style": "dashed"}}},
|
||||
),
|
||||
("border-top-style: double", {"border": {"top": {"style": "double"}}}),
|
||||
# - color
|
||||
(
|
||||
"border-style: solid; border-color: #0000ff",
|
||||
{
|
||||
"border": {
|
||||
"top": {"style": "medium", "color": "0000FF"},
|
||||
"right": {"style": "medium", "color": "0000FF"},
|
||||
"bottom": {"style": "medium", "color": "0000FF"},
|
||||
"left": {"style": "medium", "color": "0000FF"},
|
||||
}
|
||||
},
|
||||
),
|
||||
(
|
||||
"border-top-style: double; border-top-color: blue",
|
||||
{"border": {"top": {"style": "double", "color": "0000FF"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: solid; border-top-color: #06c",
|
||||
{"border": {"top": {"style": "medium", "color": "0066CC"}}},
|
||||
),
|
||||
(
|
||||
"border-top-color: blue",
|
||||
{"border": {"top": {"color": "0000FF", "style": "none"}}},
|
||||
),
|
||||
(
|
||||
"border-top-style: slantDashDot; border-top-color: blue",
|
||||
{"border": {"top": {"style": "slantDashDot", "color": "0000FF"}}},
|
||||
),
|
||||
# ALIGNMENT
|
||||
# - horizontal
|
||||
("text-align: center", {"alignment": {"horizontal": "center"}}),
|
||||
("text-align: left", {"alignment": {"horizontal": "left"}}),
|
||||
("text-align: right", {"alignment": {"horizontal": "right"}}),
|
||||
("text-align: justify", {"alignment": {"horizontal": "justify"}}),
|
||||
# - vertical
|
||||
("vertical-align: top", {"alignment": {"vertical": "top"}}),
|
||||
("vertical-align: text-top", {"alignment": {"vertical": "top"}}),
|
||||
("vertical-align: middle", {"alignment": {"vertical": "center"}}),
|
||||
("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}),
|
||||
("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}),
|
||||
# - wrap_text
|
||||
("white-space: nowrap", {"alignment": {"wrap_text": False}}),
|
||||
("white-space: pre", {"alignment": {"wrap_text": False}}),
|
||||
("white-space: pre-line", {"alignment": {"wrap_text": False}}),
|
||||
("white-space: normal", {"alignment": {"wrap_text": True}}),
|
||||
# NUMBER FORMAT
|
||||
("number-format: 0%", {"number_format": {"format_code": "0%"}}),
|
||||
(
|
||||
"number-format: 0§[Red](0)§-§@;",
|
||||
{"number_format": {"format_code": "0;[red](0);-;@"}}, # GH 46152
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_css_to_excel(css, expected):
|
||||
convert = CSSToExcelConverter()
|
||||
assert expected == convert(css)
|
||||
|
||||
|
||||
def test_css_to_excel_multiple():
|
||||
convert = CSSToExcelConverter()
|
||||
actual = convert(
|
||||
"""
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
color: red;
|
||||
border-width: thin;
|
||||
text-align: center;
|
||||
vertical-align: top;
|
||||
unused: something;
|
||||
"""
|
||||
)
|
||||
assert {
|
||||
"font": {"bold": True, "underline": "single", "color": "FF0000"},
|
||||
"border": {
|
||||
"top": {"style": "thin"},
|
||||
"right": {"style": "thin"},
|
||||
"bottom": {"style": "thin"},
|
||||
"left": {"style": "thin"},
|
||||
},
|
||||
"alignment": {"horizontal": "center", "vertical": "top"},
|
||||
} == actual
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"css",
|
||||
[
|
||||
"border-top-style: unhandled-border-style",
|
||||
"border-style: another-unhandled-style",
|
||||
],
|
||||
)
|
||||
def test_css_to_excel_unhandled_border_style_warns(css):
|
||||
"""Test that unhandled border styles raise a CSSWarning."""
|
||||
convert = CSSToExcelConverter()
|
||||
with tm.assert_produces_warning(CSSWarning, match="Unhandled border style format"):
|
||||
convert(css)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"css,inherited,expected",
|
||||
[
|
||||
("font-weight: bold", "", {"font": {"bold": True}}),
|
||||
("", "font-weight: bold", {"font": {"bold": True}}),
|
||||
(
|
||||
"font-weight: bold",
|
||||
"font-style: italic",
|
||||
{"font": {"bold": True, "italic": True}},
|
||||
),
|
||||
("font-style: normal", "font-style: italic", {"font": {"italic": False}}),
|
||||
("font-style: inherit", "", {}),
|
||||
(
|
||||
"font-style: normal; font-style: inherit",
|
||||
"font-style: italic",
|
||||
{"font": {"italic": True}},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_css_to_excel_inherited(css, inherited, expected):
|
||||
convert = CSSToExcelConverter(inherited)
|
||||
assert expected == convert(css)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_color,output_color",
|
||||
(
|
||||
list(CSSToExcelConverter.NAMED_COLORS.items())
|
||||
+ [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()]
|
||||
+ [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]
|
||||
),
|
||||
)
|
||||
def test_css_to_excel_good_colors(input_color, output_color):
|
||||
# see gh-18392
|
||||
css = (
|
||||
f"border-top-color: {input_color}; "
|
||||
f"border-right-color: {input_color}; "
|
||||
f"border-bottom-color: {input_color}; "
|
||||
f"border-left-color: {input_color}; "
|
||||
f"background-color: {input_color}; "
|
||||
f"color: {input_color}"
|
||||
)
|
||||
|
||||
expected = {}
|
||||
|
||||
expected["fill"] = {"patternType": "solid", "fgColor": output_color}
|
||||
|
||||
expected["font"] = {"color": output_color}
|
||||
|
||||
expected["border"] = {
|
||||
k: {"color": output_color, "style": "none"}
|
||||
for k in ("top", "right", "bottom", "left")
|
||||
}
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
convert = CSSToExcelConverter()
|
||||
assert expected == convert(css)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_color", [None, "not-a-color"])
|
||||
def test_css_to_excel_bad_colors(input_color):
|
||||
# see gh-18392
|
||||
css = (
|
||||
f"border-top-color: {input_color}; "
|
||||
f"border-right-color: {input_color}; "
|
||||
f"border-bottom-color: {input_color}; "
|
||||
f"border-left-color: {input_color}; "
|
||||
f"background-color: {input_color}; "
|
||||
f"color: {input_color}"
|
||||
)
|
||||
|
||||
expected = {}
|
||||
|
||||
if input_color is not None:
|
||||
expected["fill"] = {"patternType": "solid"}
|
||||
|
||||
with tm.assert_produces_warning(CSSWarning, match="Unhandled color format"):
|
||||
convert = CSSToExcelConverter()
|
||||
assert expected == convert(css)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_color", ["#", "#1234567"])
|
||||
def test_css_to_excel_invalid_color_raises(input_color):
|
||||
"""Test that invalid colors raise a ValueError."""
|
||||
css = (
|
||||
f"border-top-color: {input_color}; "
|
||||
f"border-right-color: {input_color}; "
|
||||
f"border-bottom-color: {input_color}; "
|
||||
f"border-left-color: {input_color}; "
|
||||
f"background-color: {input_color}; "
|
||||
f"color: {input_color}"
|
||||
)
|
||||
|
||||
convert = CSSToExcelConverter()
|
||||
with pytest.raises(ValueError, match=f"Unexpected color {input_color}"):
|
||||
convert(css)
|
||||
|
||||
|
||||
def tests_css_named_colors_valid():
|
||||
upper_hexs = set(map(str.upper, string.hexdigits))
|
||||
for color in CSSToExcelConverter.NAMED_COLORS.values():
|
||||
assert len(color) == 6 and all(c in upper_hexs for c in color)
|
||||
|
||||
|
||||
def test_css_named_colors_from_mpl_present():
|
||||
mpl_colors = pytest.importorskip("matplotlib.colors")
|
||||
|
||||
pd_colors = CSSToExcelConverter.NAMED_COLORS
|
||||
for name, color in mpl_colors.CSS4_COLORS.items():
|
||||
assert name in pd_colors and pd_colors[name] == color[1:]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"styles,expected",
|
||||
[
|
||||
([("color", "green"), ("color", "red")], "color: red;"),
|
||||
([("font-weight", "bold"), ("font-weight", "normal")], "font-weight: normal;"),
|
||||
([("text-align", "center"), ("TEXT-ALIGN", "right")], "text-align: right;"),
|
||||
],
|
||||
)
|
||||
def test_css_excel_cell_precedence(styles, expected):
|
||||
"""It applies favors latter declarations over former declarations"""
|
||||
# See GH 47371
|
||||
converter = CSSToExcelConverter()
|
||||
converter._call_cached.cache_clear()
|
||||
css_styles = {(0, 0): styles}
|
||||
cell = CssExcelCell(
|
||||
row=0,
|
||||
col=0,
|
||||
val="",
|
||||
style=None,
|
||||
css_styles=css_styles,
|
||||
css_row=0,
|
||||
css_col=0,
|
||||
css_converter=converter,
|
||||
)
|
||||
converter._call_cached.cache_clear()
|
||||
|
||||
assert cell.style == converter(expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"styles,cache_hits,cache_misses",
|
||||
[
|
||||
([[("color", "green"), ("color", "red"), ("color", "green")]], 0, 1),
|
||||
(
|
||||
[
|
||||
[("font-weight", "bold")],
|
||||
[("font-weight", "normal"), ("font-weight", "bold")],
|
||||
],
|
||||
1,
|
||||
1,
|
||||
),
|
||||
([[("text-align", "center")], [("TEXT-ALIGN", "center")]], 1, 1),
|
||||
(
|
||||
[
|
||||
[("font-weight", "bold"), ("text-align", "center")],
|
||||
[("font-weight", "bold"), ("text-align", "left")],
|
||||
],
|
||||
0,
|
||||
2,
|
||||
),
|
||||
(
|
||||
[
|
||||
[("font-weight", "bold"), ("text-align", "center")],
|
||||
[("font-weight", "bold"), ("text-align", "left")],
|
||||
[("font-weight", "bold"), ("text-align", "center")],
|
||||
],
|
||||
1,
|
||||
2,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_css_excel_cell_cache(styles, cache_hits, cache_misses):
|
||||
"""It caches unique cell styles"""
|
||||
# See GH 47371
|
||||
converter = CSSToExcelConverter()
|
||||
converter._call_cached.cache_clear()
|
||||
|
||||
css_styles = {(0, i): _style for i, _style in enumerate(styles)}
|
||||
for css_row, css_col in css_styles:
|
||||
CssExcelCell(
|
||||
row=0,
|
||||
col=0,
|
||||
val="",
|
||||
style=None,
|
||||
css_styles=css_styles,
|
||||
css_row=css_row,
|
||||
css_col=css_col,
|
||||
css_converter=converter,
|
||||
)
|
||||
cache_info = converter._call_cached.cache_info()
|
||||
converter._call_cached.cache_clear()
|
||||
|
||||
assert cache_info.hits == cache_hits
|
||||
assert cache_info.misses == cache_misses
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,102 @@
|
||||
from io import StringIO
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.errors import Pandas4Warning
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
pytest.importorskip("tabulate")
|
||||
|
||||
|
||||
def test_keyword_deprecation():
|
||||
# GH 57280
|
||||
msg = (
|
||||
"Starting with pandas version 4.0 all arguments of to_markdown "
|
||||
"except for the argument 'buf' will be keyword-only."
|
||||
)
|
||||
s = pd.Series()
|
||||
with tm.assert_produces_warning(Pandas4Warning, match=msg):
|
||||
s.to_markdown(None, "wt")
|
||||
|
||||
|
||||
def test_simple():
|
||||
buf = StringIO()
|
||||
df = pd.DataFrame([1, 2, 3])
|
||||
df.to_markdown(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert (
|
||||
result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
|
||||
)
|
||||
|
||||
|
||||
def test_empty_frame():
|
||||
buf = StringIO()
|
||||
df = pd.DataFrame({"id": [], "first_name": [], "last_name": []}).set_index("id")
|
||||
df.to_markdown(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert result == (
|
||||
"| id | first_name | last_name |\n|------|--------------|-------------|"
|
||||
)
|
||||
|
||||
|
||||
def test_other_tablefmt():
|
||||
buf = StringIO()
|
||||
df = pd.DataFrame([1, 2, 3])
|
||||
df.to_markdown(buf=buf, tablefmt="jira")
|
||||
result = buf.getvalue()
|
||||
assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
|
||||
|
||||
|
||||
def test_other_headers():
|
||||
buf = StringIO()
|
||||
df = pd.DataFrame([1, 2, 3])
|
||||
df.to_markdown(buf=buf, headers=["foo", "bar"])
|
||||
result = buf.getvalue()
|
||||
assert result == (
|
||||
"| foo | bar |\n|------:|------:|\n| 0 "
|
||||
"| 1 |\n| 1 | 2 |\n| 2 | 3 |"
|
||||
)
|
||||
|
||||
|
||||
def test_series():
|
||||
buf = StringIO()
|
||||
s = pd.Series([1, 2, 3], name="foo")
|
||||
s.to_markdown(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert result == (
|
||||
"| | foo |\n|---:|------:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
|
||||
)
|
||||
|
||||
|
||||
def test_no_buf():
|
||||
df = pd.DataFrame([1, 2, 3])
|
||||
result = df.to_markdown()
|
||||
assert (
|
||||
result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_index(index):
|
||||
# GH 32667
|
||||
|
||||
df = pd.DataFrame([1, 2, 3])
|
||||
|
||||
result = df.to_markdown(index=index)
|
||||
|
||||
if index:
|
||||
expected = (
|
||||
"| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
|
||||
)
|
||||
else:
|
||||
expected = "| 0 |\n|----:|\n| 1 |\n| 2 |\n| 3 |"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_showindex_disallowed_in_kwargs():
|
||||
# GH 32667; disallowing showindex in kwargs enforced in 2.0
|
||||
df = pd.DataFrame([1, 2, 3])
|
||||
with pytest.raises(ValueError, match="Pass 'index' instead of 'showindex"):
|
||||
df.to_markdown(index=True, showindex=True)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,421 @@
|
||||
"""
|
||||
self-contained to write legacy storage pickle files
|
||||
|
||||
To use this script. Create an environment where you want
|
||||
generate pickles, say its for 0.20.3, with your pandas clone
|
||||
in ~/pandas
|
||||
|
||||
. activate pandas_0.20.3
|
||||
cd ~/pandas/pandas
|
||||
|
||||
$ python -m tests.io.generate_legacy_storage_files \
|
||||
tests/io/data/legacy_pickle/0.20.3/ pickle
|
||||
|
||||
This script generates a storage file for the current arch, system,
|
||||
and python version
|
||||
pandas version: 0.20.3
|
||||
output dir : pandas/pandas/tests/io/data/legacy_pickle/0.20.3/
|
||||
storage format: pickle
|
||||
created pickle file: 0.20.3_x86_64_darwin_3.5.2.pickle
|
||||
|
||||
The idea here is you are using the *current* version of the
|
||||
generate_legacy_storage_files with an *older* version of pandas to
|
||||
generate a pickle file. We will then check this file into a current
|
||||
branch, and test using test_pickle.py. This will load the *older*
|
||||
pickles and test versus the current data that is generated
|
||||
(with main). These are then compared.
|
||||
|
||||
If we have cases where we changed the signature (e.g. we renamed
|
||||
offset -> freq in Timestamp). Then we have to conditionally execute
|
||||
in the generate_legacy_storage_files.py to make it
|
||||
run under the older AND the newer version.
|
||||
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
import os
|
||||
import pickle
|
||||
import platform as pl
|
||||
import sys
|
||||
|
||||
# Remove script directory from path, otherwise Python will try to
|
||||
# import the JSON test directory as the json module
|
||||
sys.path.pop(0)
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Period,
|
||||
RangeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
bdate_range,
|
||||
date_range,
|
||||
interval_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.arrays import SparseArray
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
FY5253,
|
||||
BusinessDay,
|
||||
BusinessHour,
|
||||
CustomBusinessDay,
|
||||
DateOffset,
|
||||
Day,
|
||||
Easter,
|
||||
Hour,
|
||||
LastWeekOfMonth,
|
||||
Minute,
|
||||
MonthBegin,
|
||||
MonthEnd,
|
||||
QuarterBegin,
|
||||
QuarterEnd,
|
||||
SemiMonthBegin,
|
||||
SemiMonthEnd,
|
||||
Week,
|
||||
WeekOfMonth,
|
||||
YearBegin,
|
||||
YearEnd,
|
||||
)
|
||||
|
||||
|
||||
def _create_sp_series():
|
||||
nan = np.nan
|
||||
|
||||
# nan-based
|
||||
arr = np.arange(15, dtype=np.float64)
|
||||
arr[7:12] = nan
|
||||
arr[-1:] = nan
|
||||
|
||||
bseries = Series(SparseArray(arr, kind="block"))
|
||||
bseries.name = "bseries"
|
||||
return bseries
|
||||
|
||||
|
||||
def _create_sp_tsseries():
|
||||
nan = np.nan
|
||||
|
||||
# nan-based
|
||||
arr = np.arange(15, dtype=np.float64)
|
||||
arr[7:12] = nan
|
||||
arr[-1:] = nan
|
||||
|
||||
date_index = bdate_range("1/1/2011", periods=len(arr))
|
||||
bseries = Series(SparseArray(arr, kind="block"), index=date_index)
|
||||
bseries.name = "btsseries"
|
||||
return bseries
|
||||
|
||||
|
||||
def _create_sp_frame():
|
||||
nan = np.nan
|
||||
|
||||
data = {
|
||||
"A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
|
||||
"B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
|
||||
"C": np.arange(10).astype(np.int64),
|
||||
"D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan],
|
||||
}
|
||||
|
||||
dates = bdate_range("1/1/2011", periods=10)
|
||||
return DataFrame(data, index=dates).apply(SparseArray)
|
||||
|
||||
|
||||
def create_pickle_data(test: bool = True):
|
||||
"""create the pickle data"""
|
||||
data = {
|
||||
"A": [0.0, 1.0, 2.0, 3.0, np.nan],
|
||||
"B": [0, 1, 0, 1, 0],
|
||||
"C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
|
||||
"D": date_range("1/1/2009", periods=5),
|
||||
"E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
|
||||
}
|
||||
|
||||
scalars = {"timestamp": Timestamp("20130101"), "period": Period("2012", "M")}
|
||||
|
||||
index = {
|
||||
"int": Index(np.arange(10)),
|
||||
"date": date_range("20130101", periods=10),
|
||||
"period": period_range("2013-01-01", freq="M", periods=10),
|
||||
"float": Index(np.arange(10, dtype=np.float64)),
|
||||
"uint": Index(np.arange(10, dtype=np.uint64)),
|
||||
"timedelta": timedelta_range("00:00:00", freq="30min", periods=10),
|
||||
"string": Index(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
|
||||
}
|
||||
|
||||
index["range"] = RangeIndex(10)
|
||||
|
||||
index["interval"] = interval_range(0, periods=10)
|
||||
|
||||
mi = {
|
||||
"reg2": MultiIndex.from_tuples(
|
||||
tuple(
|
||||
zip(
|
||||
*[
|
||||
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
)
|
||||
),
|
||||
names=["first", "second"],
|
||||
)
|
||||
}
|
||||
|
||||
series = {
|
||||
"float": Series(data["A"]),
|
||||
"int": Series(data["B"]),
|
||||
"mixed": Series(data["E"]),
|
||||
"ts": Series(
|
||||
np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)
|
||||
),
|
||||
"mi": Series(
|
||||
np.arange(5).astype(np.float64),
|
||||
index=MultiIndex.from_tuples(
|
||||
tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]
|
||||
),
|
||||
),
|
||||
"dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]),
|
||||
"cat": Series(Categorical(["foo", "bar", "baz"])),
|
||||
"dt": Series(date_range("20130101", periods=5)),
|
||||
"dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")),
|
||||
"period": Series([Period("2000Q1")] * 5),
|
||||
"string": Series(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
|
||||
}
|
||||
|
||||
mixed_dup_df = DataFrame(data)
|
||||
mixed_dup_df.columns = list("ABCDA")
|
||||
frame = {
|
||||
"float": DataFrame({"A": series["float"], "B": series["float"] + 1}),
|
||||
"int": DataFrame({"A": series["int"], "B": series["int"] + 1}),
|
||||
"mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}),
|
||||
"mi": DataFrame(
|
||||
{"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)},
|
||||
index=MultiIndex.from_tuples(
|
||||
tuple(
|
||||
zip(
|
||||
*[
|
||||
["bar", "bar", "baz", "baz", "baz"],
|
||||
["one", "two", "one", "two", "three"],
|
||||
]
|
||||
)
|
||||
),
|
||||
names=["first", "second"],
|
||||
),
|
||||
),
|
||||
"dup": DataFrame(
|
||||
np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]
|
||||
),
|
||||
"cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}),
|
||||
"cat_and_float": DataFrame(
|
||||
{
|
||||
"A": Categorical(["foo", "bar", "baz"]),
|
||||
"B": np.arange(3).astype(np.int64),
|
||||
}
|
||||
),
|
||||
"mixed_dup": mixed_dup_df,
|
||||
"dt_mixed_tzs": DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz="US/Eastern"),
|
||||
"B": Timestamp("20130603", tz="CET"),
|
||||
},
|
||||
index=range(5),
|
||||
),
|
||||
"dt_mixed2_tzs": DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz="US/Eastern"),
|
||||
"B": Timestamp("20130603", tz="CET"),
|
||||
"C": Timestamp("20130603", tz="UTC"),
|
||||
},
|
||||
index=range(5),
|
||||
),
|
||||
"string": DataFrame(
|
||||
{
|
||||
"A": Series(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
|
||||
"B": Series(["one", "two", "one", "two", "three"], dtype="string"),
|
||||
}
|
||||
),
|
||||
}
|
||||
|
||||
cat = {
|
||||
"int8": Categorical(list("abcdefg")),
|
||||
"int16": Categorical(np.arange(1000)),
|
||||
"int32": Categorical(np.arange(10000)),
|
||||
}
|
||||
|
||||
timestamp = {
|
||||
"normal": Timestamp("2011-01-01"),
|
||||
"nat": NaT,
|
||||
"tz": Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
}
|
||||
if test:
|
||||
# kept because those are present in the legacy pickles (<= 1.4)
|
||||
timestamp["freq"] = Timestamp("2011-01-01")
|
||||
timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo")
|
||||
|
||||
off = {
|
||||
"DateOffset": DateOffset(years=1),
|
||||
"DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
|
||||
"BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
|
||||
"BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
|
||||
"CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
|
||||
"SemiMonthBegin": SemiMonthBegin(day_of_month=9),
|
||||
"SemiMonthEnd": SemiMonthEnd(day_of_month=24),
|
||||
"MonthBegin": MonthBegin(1),
|
||||
"MonthEnd": MonthEnd(1),
|
||||
"QuarterBegin": QuarterBegin(1),
|
||||
"QuarterEnd": QuarterEnd(1),
|
||||
"Day": Day(1),
|
||||
"YearBegin": YearBegin(1),
|
||||
"YearEnd": YearEnd(1),
|
||||
"Week": Week(1),
|
||||
"Week_Tues": Week(2, normalize=False, weekday=1),
|
||||
"WeekOfMonth": WeekOfMonth(week=3, weekday=4),
|
||||
"LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
|
||||
"FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
|
||||
"Easter": Easter(),
|
||||
"Hour": Hour(1),
|
||||
"Minute": Minute(1),
|
||||
}
|
||||
|
||||
return {
|
||||
"series": series,
|
||||
"frame": frame,
|
||||
"index": index,
|
||||
"scalars": scalars,
|
||||
"mi": mi,
|
||||
"sp_series": {"float": _create_sp_series(), "ts": _create_sp_tsseries()},
|
||||
"sp_frame": {"float": _create_sp_frame()},
|
||||
"cat": cat,
|
||||
"timestamp": timestamp,
|
||||
"offsets": off,
|
||||
}
|
||||
|
||||
|
||||
def create_dataframe_all_types():
|
||||
timestamps = Series(
|
||||
[
|
||||
Timestamp("2013-01-01"),
|
||||
NaT,
|
||||
Timestamp("2013-01-03"),
|
||||
Timestamp("2013-01-04"),
|
||||
Timestamp("2013-01-05"),
|
||||
]
|
||||
)
|
||||
timedeltas = timestamps - timestamps[0]
|
||||
|
||||
data = {
|
||||
# "string": Series(
|
||||
# ["a", "b", "c", None, "e"], dtype=StringDtype(na_value=np.nan)
|
||||
# ),
|
||||
# "object": Series(["a", "b", "c", None, "e"], dtype=object),
|
||||
# "object_nan": Series(["a", "b", "c", np.nan, "e"], dtype=object),
|
||||
"int": list(range(1, 6)),
|
||||
"uint64": np.arange(3, 8).astype("uint64"),
|
||||
"float": [0.1, 0.2, 0.3, 0.4, np.nan],
|
||||
"float32": Series([0.1, 0.2, 0.3, 0.4, np.nan], dtype="float32"),
|
||||
"bool": [True, False, True, False, True],
|
||||
"datetime_ns": timestamps.dt.as_unit("ns"),
|
||||
"datetime_us": timestamps.dt.as_unit("us"),
|
||||
"datetime_ms": timestamps.dt.as_unit("ms"),
|
||||
"datetime_s": timestamps.dt.as_unit("s"),
|
||||
"datetimetz_ns": timestamps.dt.tz_localize("US/Eastern").dt.as_unit("ns"),
|
||||
"datetimetz_us": timestamps.dt.tz_localize("US/Eastern").dt.as_unit("us"),
|
||||
"timedelta_ns": timedeltas.dt.as_unit("ns"),
|
||||
"timedelta_us": timedeltas.dt.as_unit("us"),
|
||||
"timedelta_ms": timedeltas.dt.as_unit("ms"),
|
||||
"timedelta_s": timedeltas.dt.as_unit("s"),
|
||||
# "categorical": Categorical(
|
||||
# Series(
|
||||
# ["foo", "bar", "baz",np.nan,"foo"],dtype=StringDtype(na_value=np.nan)
|
||||
# )
|
||||
# ),
|
||||
# "categorical_object": Categorical(
|
||||
# Series(["foo", "bar", "baz", np.nan, "foo"], dtype=object)
|
||||
# ),
|
||||
"categorical_int": Categorical([1, 2, 3, np.nan, 1]),
|
||||
}
|
||||
return DataFrame(data)
|
||||
|
||||
|
||||
def platform_name():
|
||||
return "_".join(
|
||||
[
|
||||
str(pandas.__version__),
|
||||
str(pl.machine()),
|
||||
str(pl.system().lower()),
|
||||
str(pl.python_version()),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_legacy_pickles(output_dir):
|
||||
pth = f"{platform_name()}.pickle"
|
||||
|
||||
with open(os.path.join(output_dir, pth), "wb") as fh:
|
||||
pickle.dump(create_pickle_data(test=False), fh, pickle.DEFAULT_PROTOCOL)
|
||||
|
||||
print(f"created pickle file: {pth}")
|
||||
|
||||
|
||||
def write_legacy_hdf(output_dir, format):
|
||||
import tables
|
||||
|
||||
pth = f"{platform_name()}_pytables-{tables.__version__}_{format}.h5"
|
||||
|
||||
df = create_dataframe_all_types()
|
||||
if format == "fixed":
|
||||
# df = df.drop(columns=["categorical", "categorical_object", "categorical_int"])
|
||||
df = df.drop(columns=["categorical_int"])
|
||||
complevel = 9 if format == "table" else None
|
||||
df.to_hdf(
|
||||
os.path.join(output_dir, pth),
|
||||
key="df_alltypes",
|
||||
format=format,
|
||||
complevel=complevel,
|
||||
)
|
||||
|
||||
print(f"created hdf file: {pth}")
|
||||
|
||||
|
||||
def write_legacy_file():
|
||||
# force our cwd to be the first searched
|
||||
sys.path.insert(0, "")
|
||||
|
||||
if not 3 <= len(sys.argv) <= 4:
|
||||
sys.exit(
|
||||
"Specify output directory and storage type: generate_legacy_"
|
||||
"storage_files.py <output_dir> <storage_type> "
|
||||
)
|
||||
|
||||
output_dir = str(sys.argv[1])
|
||||
storage_type = str(sys.argv[2])
|
||||
|
||||
print(
|
||||
"This script generates a storage file for the current arch, system, "
|
||||
"and python version"
|
||||
)
|
||||
print(f" pandas version: {pandas.__version__}")
|
||||
print(f" output dir : {output_dir}")
|
||||
print(f" storage format: {storage_type}")
|
||||
|
||||
if not os.path.exists(output_dir):
|
||||
os.mkdir(output_dir)
|
||||
|
||||
if storage_type == "pickle":
|
||||
write_legacy_pickles(output_dir=output_dir)
|
||||
elif storage_type == "hdf":
|
||||
write_legacy_hdf(output_dir=output_dir, format="fixed")
|
||||
write_legacy_hdf(output_dir=output_dir, format="table")
|
||||
else:
|
||||
sys.exit("storage_type must be one of {'pickle', 'hdf'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
write_legacy_file()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user