8000 add more tests · apache/datafusion-python@6588c8c · GitHub
[go: up one dir, main page]

Skip to content

Commit 6588c8c

Browse files
committed
add more tests
1 parent b5ab123 commit 6588c8c

File tree

1 file changed

+130
-90
lines changed

1 file changed

+130
-90
lines changed

python/tests/test_dataframe.py

Lines changed: 130 additions & 90 deletions
< 10000 td data-grid-cell-id="diff-425804dd733aa01ad5e1a5047507df94cf9f0960346141eacdaf554a4ad6665c-950-987-1" data-selected="false" role="gridcell" style="background-color:var(--diffBlob-additionNum-bgColor, var(--diffBlob-addition-bgColor-num));text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative left-side">987
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
)
4242
from pyarrow.csv import write_csv
4343

44-
44+
MB = 1024* 1024
4545
@pytest.fixture
4646
def ctx():
4747
return SessionContext()
@@ -116,6 +116,30 @@ def clean_formatter_state():
116116
"""Reset the HTML formatter after each test."""
117117
reset_formatter()
118118

119+
# custom style for testing with html formatter
120+
class CustomStyleProvider:
121+
def get_cell_style(self) -> str:
122+
return (
123+
"background-color: #f5f5f5; color: #333; padding: 8px; border: "
124+
"1px solid #ddd;"
125+
)
126+
127+
def get_header_style(self) -> str:
128+
return (
129+
"background-color: #4285f4; color: white; font-weight: bold; "
130+
"padding: 10px; border: 1px solid #3367d6;"
131+
)
132+
133+
def count_table_rows(html_content: str) -> int:
134+
"""Count the number of table rows in HTML content.
135+
136+
Args:
137+
html_content: HTML string to analyze
138+
139+
Returns:
140+
Number of table rows found (number of <tr> tags)
141+
"""
142+
return len(re.findall(r"<tr", html_content))
119143

120144
def test_select(df):
121145
df_1 = df.select(
@@ -671,11 +695,10 @@ def test_window_frame_defaults_match_postgres(partitioned_df):
671695
assert df_2.sort(col_a).to_pydict() == expected
672696

673697

674-
def test_html_formatter_configuration(df, clean_formatter_state):
698+
def test_html_formatter_cell_dimension(df, clean_formatter_state):
675699
"""Test configuring the HTML formatter with different options."""
676700
# Configure with custom settings
677701
configure_formatter(
678-
max_cell_length=5,
679702
max_width=500,
680703
max_height=200,
681704
enable_cell_expansion=False,
@@ -693,19 +716,6 @@ def test_html_formatter_configuration(df, clean_formatter_state):
693716
def test_html_formatter_custom_style_provider(df, clean_formatter_state):
694717
"""Test using custom style providers with the HTML formatter."""
695718

696-
class CustomStyleProvider:
697-
def get_cell_style(self) -> str:
698-
return (
699-
"background-color: #f5f5f5; color: #333; padding: 8px; border: "
700-
"1px solid #ddd;"
701-
)
702-
703-
def get_header_style(self) -> str:
704-
return (
705-
"background-color: #4285f4; color: white; font-weight: bold; "
706-
"padding: 10px; border: 1px solid #3367d6;"
707-
)
708-
709719
# Configure with custom style provider
710720
configure_formatter(style_provider=CustomStyleProvider())
711721

@@ -917,37 +927,67 @@ def get_header_style(self) -> str:
917927
assert "color: #5af" in html_output # Even numbers
918928

919929

920-
def test_html_formatter_memory_and_rows():
930+
def test_html_formatter_memory(df, clean_formatter_state):
921931
"""Test the memory and row control parameters in DataFrameHtmlFormatter."""
922-
923-
# Test default values
924-
formatter = DataFrameHtmlFormatter()
925-
assert formatter.max_memory_bytes == 2 * 1024 * 1024 # 2 MB
926-
assert formatter.min_rows_display == 20
927-
assert formatter.repr_rows == 10
928-
929-
# Test custom values
930-
formatter = DataFrameHtmlFormatter(
931-
max_memory_bytes=1024 * 1024, # 1 MB
932-
min_rows_display=10,
933-
repr_rows=5
932+
configure_formatter(
933+
max_memory_bytes = 10,
934+
min_rows_display = 1
934935
)
935-
assert formatter.max_memory_bytes == 1024 * 1024
936-
assert formatter.min_rows_display == 10
937-
assert formatter.repr_rows == 5
938-
939-
# Test extremely large values and tiny values (edge cases)
940-
# These should not raise exceptions
941-
extreme_formatter = DataFrameHtmlFormatter(
942-
max_memory_bytes=10 * 1024 * 1024 * 1024, # 10 GB
943-
min_rows_display=1,
944-
repr_rows=1
936+
html_output = df._repr_html_()
937+
938+
# Count the number of table rows in the output
939+
tr_count = count_table_rows(html_output)
940+
# With a tiny memory limit of 10 bytes, the formatter should display
941+
# the minimum number of rows (1) plus a message about truncation
942+
assert tr_count == 2 # 1 for header row, 1 for data row
943+
assert "data truncated" in html_output.lower()
944+
945+
configure_formatter(
946+
max_memory_bytes = 10*MB,
947+
min_rows_display = 2
945948
)
946-
assert extreme_formatter.max_memory_bytes == 10 * 1024 * 1024 * 1024
947-
assert extreme_formatter.min_rows_display == 1
948-
assert extreme_formatter.repr_rows == 1
949-
949+
html_output = df._repr_html_()
950+
# With larger memory limit and min_rows=2, should display all rows
951+
tr_count = count_table_rows(html_output)
952+
# Table should have header row (1) + 3 data rows = 4 rows
953+
assert tr_count == 4
954+
# No truncation message should appear
955+
assert "data truncated" not in html_output.lower()
956+
957+
def test_html_formatter_repr_rows(df, clean_formatter_state):
958+
configure_formatter(
959+
min_rows_display = 2,
960+
repr_rows = 2
961+
)
962+
html_output = df._repr_html_()
963+
964+
tr_count = count_table_rows(html_output)
965+
# Tabe should have header row (1) + 2 data rows = 3 rows
966+
assert tr_count == 3
967+
968+
configure_formatter(
969+
min_rows_display = 2,
970+
repr_rows = 3
971+
)
972+
html_output = df._repr_html_()
973+
974+
tr_count = count_table_rows(html_output)
975+
# Tabe should have header row (1) + 3 data rows = 4 rows
976+
assert tr_count == 4
977+
978+
979+
def test_html_formatter_validation():
950980
# Test validation for invalid parameters
981+
982+
with pytest.raises(ValueError, match="max_cell_length must be a positive integer"):
983+
DataFrameHtmlFormatter(max_cell_length=0)
984+
985+
with pytest.raises(ValueError, match="max_width must be a positive integer"):
986+
DataFrameHtmlFormatter(max_width=0)
+
988+
with pytest.raises(ValueError, match="max_height must be a positive integer"):
989+
DataFrameHtmlFormatter(max_height=0)
990+
951991
with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"):
952992
DataFrameHtmlFormatter(max_memory_bytes=0)
953993

@@ -967,55 +1007,56 @@ def test_html_formatter_memory_and_rows():
9671007
DataFrameHtmlFormatter(repr_rows=-10)
9681008

9691009

970-
def test_custom_style_provider_html_formatter(df, clean_formatter_state):
1010+
def test_configure_formatter(df, clean_formatter_state):
9711011
"""Test using custom style providers with the HTML formatter and configured
9721012
parameters."""
9731013

974-
class CustomStyleProvider:
975-
def get_cell_style(self) -> str:
976-
return (
977-
"background-color: #f5f5f5; color: #333; padding: 8px; border: "
978-
"1px solid #ddd;"
979-
)
980-
981-
def get_header_style(self) -> str:
982-
return (
983-
"background-color: #4285f4; color: white; font-weight: bold; "
984-
"padding: 10px; border: 1px solid #3367d6;"
985-
)
986-
987-
# Configure with custom style provider
988-
configure_formatter(style_provider=CustomStyleProvider())
989-
990-
html_output = df._repr_html_()
991-
992-
# Verify our custom styles were applied
993-
assert "background-color: #4285f4" in html_output
994-
assert "color: white" in html_output
995-
assert "background-color: #f5f5f5" in html_output
996-
997-
# Reset for the next part of the test
1014+
# these are non-default values
1015+
MAX_CELL_LENGTH = 10
1016+
MAX_WIDTH = 500
1017+
MAX_HEIGHT = 30
1018+
MAX_MEMORY_BYTES = 3*MB
1019+
MIN_ROWS_DISPLAY=2
1020+
REPR_ROWS = 2
1021+
ENABLE_CELL_EXPANSION = False
1022+
SHOW_TRUNCATION_MESSAGE = False
1023+
USE_SHARED_STYLES = False
1024+
9981025
reset_formatter()
1026+
formatter_default = get_formatter()
1027+
1028+
assert formatter_default.max_cell_length != MAX_CELL_LENGTH
1029+
assert formatter_default.max_width != MAX_WIDTH
1030+
assert formatter_default.max_height != MAX_HEIGHT
1031+
assert formatter_default.max_memory_bytes != MAX_MEMORY_BYTES
1032+
assert formatter_default.min_rows_display != MIN_ROWS_DISPLAY
1033+
assert formatter_default.repr_rows != REPR_ROWS
1034+
assert formatter_default.enable_cell_expansion != ENABLE_CELL_EXPANSION
1035+
assert formatter_default.show_truncation_message != SHOW_TRUNCATION_MESSAGE
1036+
assert formatter_default.use_shared_styles != USE_SHARED_STYLES
1037+
9991038
# Configure with custom style provider and additional parameters
10001039
configure_formatter(
1001-
style_provider=CustomStyleProvider(),
1002-
max_memory_bytes=3 * 1024 * 1024, # 3 MB
1003-
min_rows_display=15,
1004-
repr_rows=7
1040+
max_cell_length = MAX_CELL_LENGTH,
1041+
max_width = MAX_WIDTH,
1042+
max_height= MAX_HEIGHT,
1043+
max_memory_bytes=MAX_MEMORY_BYTES,
1044+
min_rows_display=MIN_ROWS_DISPLAY,
1045+
repr_rows=REPR_ROWS,
1046+
enable_cell_expansion = ENABLE_CELL_EXPANSION,
1047+
show_truncation_message = SHOW_TRUNCATION_MESSAGE,
1048+
use_shared_styles = USE_SHARED_STYLES
10051049
)
1006-
1007-
html_output = df._repr_html_()
1008-
1009-
# Verify our custom styles were applied
1010-
assert "background-color: #4285f4" in html_output
1011-
assert "color: white" in html_output
1012-
assert "background-color: #f5f5f5" in html_output
1013-
1014-
# Test memory and row parameters were properly set
1015-
formatter = get_formatter()
1016-
assert formatter.max_memory_bytes == 3 * 1024 * 1024 # 3 MB
1017-
assert formatter.min_rows_display == 15
1018-
assert formatter.repr_rows == 7
1050+
formatter_custom = get_formatter()
1051+
assert formatter_custom.max_cell_length == MAX_CELL_LENGTH
1052+
assert formatter_custom.max_width == MAX_WIDTH
1053+
assert formatter_custom.max_height == MAX_HEIGHT
1054+
assert formatter_custom.max_memory_bytes == MAX_MEMORY_BYTES
1055+
assert formatter_custom.min_rows_display == MIN_ROWS_DISPLAY
1056+
assert formatter_custom.repr_rows == REPR_ROWS
1057+
assert formatter_custom.enable_cell_expansion == ENABLE_CELL_EXPANSION
1058+
assert formatter_custom.show_truncation_message == SHOW_TRUNCATION_MESSAGE
1059+
assert formatter_custom.use_shared_styles == USE_SHARED_STYLES
10191060

10201061

10211062
def test_get_dataframe(tmp_path):
@@ -1606,9 +1647,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
16061647
assert result["new_col"] == [3 for _i in range(3)]
16071648

16081649

1609-
def test_dataframe_repr_html_structure(df) -> None:
1650+
def test_dataframe_repr_html_structure(df, clean_formatter_state) -> None:
16101651
"""Test that DataFrame._repr_html_ produces expected HTML output structure."""
1611-
import re
16121652

16131653
output = df._repr_html_()
16141654

@@ -1638,13 +1678,13 @@ def test_dataframe_repr_html_structure(df) -> None:
16381678
assert len(body_matches) == 1, "Expected pattern of values not found in HTML output"
16391679

16401680

1641-
def test_dataframe_repr_html_values(df):
1681+
def test_dataframe_repr_html_values(df, clean_formatter_state):
16421682
"""Test that DataFrame._repr_html_ contains the expected data values."""
16431683
html = df._repr_html_()
16441684
assert html is not None
16451685

16461686
# Create a more flexible pattern that handles values being wrapped in spans
1647-
# This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless
1687+
# This pattern will match the sequence of values 1,4,8,2,5,5 regardless
16481688
# of formatting
16491689
pattern = re.compile(
16501690
r"<td[^>]*?>(?:<span[^>]*?>)?1(?:</span>)?</td>.*?"
@@ -1748,4 +1788,4 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
17481788

17491789
assert "<style>" in local_html_1
17501790
assert "<style>" in local_html_2
1751-
1791+

0 commit comments

Comments
 (0)
0