41
41
)
42
42
from pyarrow .csv import write_csv
43
43
44
-
44
+ MB = 1024 * 1024
45
45
@pytest .fixture
46
46
def ctx ():
47
47
return SessionContext ()
@@ -116,6 +116,30 @@ def clean_formatter_state():
116
116
"""Reset the HTML formatter after each test."""
117
117
reset_formatter ()
118
118
119
+ # custom style for testing with html formatter
120
+ class CustomStyleProvider :
121
+ def get_cell_style (self ) -> str :
122
+ return (
123
+ "background-color: #f5f5f5; color: #333; padding: 8px; border: "
124
+ "1px solid #ddd;"
125
+ )
126
+
127
+ def get_header_style (self ) -> str :
128
+ return (
129
+ "background-color: #4285f4; color: white; font-weight: bold; "
130
+ "padding: 10px; border: 1px solid #3367d6;"
131
+ )
132
+
133
+ def count_table_rows (html_content : str ) -> int :
134
+ """Count the number of table rows in HTML content.
135
+
136
+ Args:
137
+ html_content: HTML string to analyze
138
+
139
+ Returns:
140
+ Number of table rows found (number of <tr> tags)
141
+ """
142
+ return len (re .findall (r"<tr" , html_content ))
119
143
120
144
def test_select (df ):
121
145
df_1 = df .select (
@@ -671,11 +695,10 @@ def test_window_frame_defaults_match_postgres(partitioned_df):
671
695
assert df_2 .sort (col_a ).to_pydict () == expected
672
696
673
697
674
- def test_html_formatter_configuration (df , clean_formatter_state ):
698
+ def test_html_formatter_cell_dimension (df , clean_formatter_state ):
675
699
"""Test configuring the HTML formatter with different options."""
676
700
# Configure with custom settings
677
701
configure_formatter (
678
- max_cell_length = 5 ,
679
702
max_width = 500 ,
680
703
max_height = 200 ,
681
704
enable_cell_expansion = False ,
@@ -693,19 +716,6 @@ def test_html_formatter_configuration(df, clean_formatter_state):
693
716
def test_html_formatter_custom_style_provider (df , clean_formatter_state ):
694
717
"""Test using custom style providers with the HTML formatter."""
695
718
696
- class CustomStyleProvider :
697
- def get_cell_style (self ) -> str :
698
- return (
699
- "background-color: #f5f5f5; color: #333; padding: 8px; border: "
700
- "1px solid #ddd;"
701
- )
702
-
703
- def get_header_style (self ) -> str :
704
- return (
705
- "background-color: #4285f4; color: white; font-weight: bold; "
706
- "padding: 10px; border: 1px solid #3367d6;"
707
- )
708
-
709
719
# Configure with custom style provider
710
720
configure_formatter (style_provider = CustomStyleProvider ())
711
721
@@ -917,37 +927,67 @@ def get_header_style(self) -> str:
917
927
assert "color: #5af" in html_output # Even numbers
918
928
919
929
920
- def test_html_formatter_memory_and_rows ( ):
930
+ def test_html_formatter_memory ( df , clean_formatter_state ):
921
931
"""Test the memory and row control parameters in DataFrameHtmlFormatter."""
922
-
923
- # Test default values
924
- formatter = DataFrameHtmlFormatter ()
925
- assert formatter .max_memory_bytes == 2 * 1024 * 1024 # 2 MB
926
- assert formatter .min_rows_display == 20
927
- assert formatter .repr_rows == 10
928
-
929
- # Test custom values
930
- formatter = DataFrameHtmlFormatter (
931
- max_memory_bytes = 1024 * 1024 , # 1 MB
932
- min_rows_display = 10 ,
933
- repr_rows = 5
932
+ configure_formatter (
933
+ max_memory_bytes = 10 ,
934
+ min_rows_display = 1
934
935
)
935
- assert formatter .max_memory_bytes == 1024 * 1024
936
- assert formatter .min_rows_display == 10
937
- assert formatter .repr_rows == 5
938
-
939
- # Test extremely large values and tiny values (edge cases)
940
- # These should not raise exceptions
941
- extreme_formatter = DataFrameHtmlFormatter (
942
- max_memory_bytes = 10 * 1024 * 1024 * 1024 , # 10 GB
943
- min_rows_display = 1 ,
944
- repr_rows = 1
936
+ html_output = df ._repr_html_ ()
937
+
938
+ # Count the number of table rows in the output
939
+ tr_count = count_table_rows (html_output )
940
+ # With a tiny memory limit of 10 bytes, the formatter should display
941
+ # the minimum number of rows (1) plus a message about truncation
942
+ assert tr_count == 2 # 1 for header row, 1 for data row
943
+ assert "data truncated" in html_output .lower ()
944
+
945
+ configure_formatter (
946
+ max_memory_bytes = 10 * MB ,
947
+ min_rows_display = 2
945
948
)
946
- assert extreme_formatter .max_memory_bytes == 10 * 1024 * 1024 * 1024
947
- assert extreme_formatter .min_rows_display == 1
948
- assert extreme_formatter .repr_rows == 1
949
-
949
+ html_output = df ._repr_html_ ()
950
+ # With larger memory limit and min_rows=2, should display all rows
951
+ tr_count = count_table_rows (html_output )
952
+ # Table should have header row (1) + 3 data rows = 4 rows
953
+ assert tr_count == 4
954
+ # No truncation message should appear
955
+ assert "data truncated" not in html_output .lower ()
956
+
957
+ def test_html_formatter_repr_rows (df , clean_formatter_state ):
958
+ configure_formatter (
959
+ min_rows_display = 2 ,
960
+ repr_rows = 2
961
+ )
962
+ html_output = df ._repr_html_ ()
963
+
964
+ tr_count = count_table_rows (html_output )
965
+ # Tabe should have header row (1) + 2 data rows = 3 rows
966
+ assert tr_count == 3
967
+
968
+ configure_formatter (
969
+ min_rows_display = 2 ,
970
+ repr_rows = 3
971
+ )
972
+ html_output = df ._repr_html_ ()
973
+
974
+ tr_count = count_table_rows (html_output )
975
+ # Tabe should have header row (1) + 3 data rows = 4 rows
976
+ assert tr_count == 4
977
+
978
+
979
+ def test_html_formatter_validation ():
950
980
# Test validation for invalid parameters
981
+
982
+ with pytest .raises (ValueError , match = "max_cell_length must be a positive integer" ):
983
+ DataFrameHtmlFormatter (max_cell_length = 0 )
984
+
985
+ with pytest .raises (ValueError , match = "max_width must be a positive integer" ):
986
+ DataFrameHtmlFormatter (max_width = 0 )
<
10000
td data-grid-cell-id="diff-425804dd733aa01ad5e1a5047507df94cf9f0960346141eacdaf554a4ad6665c-950-987-1" data-selected="false" role="gridcell" style="background-color:var(--diffBlob-additionNum-bgColor, var(--diffBlob-addition-bgColor-num));text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative left-side">987
+
988
+ with pytest .raises (ValueError , match = "max_height must be a positive integer" ):
989
+ DataFrameHtmlFormatter (max_height = 0 )
990
+
951
991
with pytest .raises (ValueError , match = "max_memory_bytes must be a positive integer" ):
952
992
DataFrameHtmlFormatter (max_memory_bytes = 0 )
953
993
@@ -967,55 +1007,56 @@ def test_html_formatter_memory_and_rows():
967
1007
DataFrameHtmlFormatter (repr_rows = - 10 )
968
1008
969
1009
970
- def test_custom_style_provider_html_formatter (df , clean_formatter_state ):
1010
+ def test_configure_formatter (df , clean_formatter_state ):
971
1011
"""Test using custom style providers with the HTML formatter and configured
972
1012
parameters."""
973
1013
974
- class CustomStyleProvider :
975
- def get_cell_style (self ) -> str :
976
- return (
977
- "background-color: #f5f5f5; color: #333; padding: 8px; border: "
978
- "1px solid #ddd;"
979
- )
980
-
981
- def get_header_style (self ) -> str :
982
- return (
983
- "background-color: #4285f4; color: white; font-weight: bold; "
984
- "padding: 10px; border: 1px solid #3367d6;"
985
- )
986
-
987
- # Configure with custom style provider
988
- configure_formatter (style_provider = CustomStyleProvider ())
989
-
990
- html_output = df ._repr_html_ ()
991
-
992
- # Verify our custom styles were applied
993
- assert "background-color: #4285f4" in html_output
994
- assert "color: white" in html_output
995
- assert "background-color: #f5f5f5" in html_output
996
-
997
- # Reset for the next part of the test
1014
+ # these are non-default values
1015
+ MAX_CELL_LENGTH = 10
1016
+ MAX_WIDTH = 500
1017
+ MAX_HEIGHT = 30
1018
+ MAX_MEMORY_BYTES = 3 * MB
1019
+ MIN_ROWS_DISPLAY = 2
1020
+ REPR_ROWS = 2
1021
+ ENABLE_CELL_EXPANSION = False
1022
+ SHOW_TRUNCATION_MESSAGE = False
1023
+ USE_SHARED_STYLES = False
1024
+
998
1025
reset_formatter ()
1026
+ formatter_default = get_formatter ()
1027
+
1028
+ assert formatter_default .max_cell_length != MAX_CELL_LENGTH
1029
+ assert formatter_default .max_width != MAX_WIDTH
1030
+ assert formatter_default .max_height != MAX_HEIGHT
1031
+ assert formatter_default .max_memory_bytes != MAX_MEMORY_BYTES
1032
+ assert formatter_default .min_rows_display != MIN_ROWS_DISPLAY
1033
+ assert formatter_default .repr_rows != REPR_ROWS
1034
+ assert formatter_default .enable_cell_expansion != ENABLE_CELL_EXPANSION
1035
+ assert formatter_default .show_truncation_message != SHOW_TRUNCATION_MESSAGE
1036
+ assert formatter_default .use_shared_styles != USE_SHARED_STYLES
1037
+
999
1038
# Configure with custom style provider and additional parameters
1000
1039
configure_formatter (
1001
- style_provider = CustomStyleProvider (),
1002
- max_memory_bytes = 3 * 1024 * 1024 , # 3 MB
1003
- min_rows_display = 15 ,
1004
- repr_rows = 7
1040
+ max_cell_length = MAX_CELL_LENGTH ,
1041
+ max_width = MAX_WIDTH ,
1042
+ max_height = MAX_HEIGHT ,
1043
+ max_memory_bytes = MAX_MEMORY_BYTES ,
1044
+ min_rows_display = MIN_ROWS_DISPLAY ,
1045
+ repr_rows = REPR_ROWS ,
1046
+ enable_cell_expansion = ENABLE_CELL_EXPANSION ,
1047
+ show_truncation_message = SHOW_TRUNCATION_MESSAGE ,
1048
+ use_shared_styles = USE_SHARED_STYLES
1005
1049
)
1006
-
1007
- html_output = df ._repr_html_ ()
1008
-
1009
- # Verify our custom styles were applied
1010
- assert "background-color: #4285f4" in html_output
1011
- assert "color: white" in html_output
1012
- assert "background-color: #f5f5f5" in html_output
1013
-
1014
- # Test memory and row parameters were properly set
1015
- formatter = get_formatter ()
1016
- assert formatter .max_memory_bytes == 3 * 1024 * 1024 # 3 MB
1017
- assert formatter .min_rows_display == 15
1018
- assert formatter .repr_rows == 7
1050
+ formatter_custom = get_formatter ()
1051
+ assert formatter_custom .max_cell_length == MAX_CELL_LENGTH
1052
+ assert formatter_custom .max_width == MAX_WIDTH
1053
+ assert formatter_custom .max_height == MAX_HEIGHT
1054
+ assert formatter_custom .max_memory_bytes == MAX_MEMORY_BYTES
1055
+ assert formatter_custom .min_rows_display == MIN_ROWS_DISPLAY
1056
+ assert formatter_custom .repr_rows == REPR_ROWS
1057
+ assert formatter_custom .enable_cell_expansion == ENABLE_CELL_EXPANSION
1058
+ assert formatter_custom .show_truncation_message == SHOW_TRUNCATION_MESSAGE
1059
+ assert formatter_custom .use_shared_styles == USE_SHARED_STYLES
1019
1060
1020
1061
1021
1062
def test_get_dataframe (tmp_path ):
@@ -1606,9 +1647,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
1606
1647
assert result ["new_col" ] == [3 for _i in range (3 )]
1607
1648
1608
1649
1609
- def test_dataframe_repr_html_structure (df ) -> None :
1650
+ def test_dataframe_repr_html_structure (df , clean_formatter_state ) -> None :
1610
1651
"""Test that DataFrame._repr_html_ produces expected HTML output structure."""
1611
- import re
1612
1652
1613
1653
output = df ._repr_html_ ()
1614
1654
@@ -1638,13 +1678,13 @@ def test_dataframe_repr_html_structure(df) -> None:
1638
1678
assert len (body_matches ) == 1 , "Expected pattern of values not found in HTML output"
1639
1679
1640
1680
1641
- def test_dataframe_repr_html_values (df ):
1681
+ def test_dataframe_repr_html_values (df , clean_formatter_state ):
1642
1682
"""Test that DataFrame._repr_html_ contains the expected data values."""
1643
1683
html = df ._repr_html_ ()
1644
1684
assert html is not None
1645
1685
1646
1686
# Create a more flexible pattern that handles values being wrapped in spans
1647
- # This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless
1687
+ # This pattern will match the sequence of values 1,4,8,2,5,5 regardless
1648
1688
# of formatting
1649
1689
pattern = re .compile (
1650
1690
r"<td[^>]*?>(?:<span[^>]*?>)?1(?:</span>)?</td>.*?"
@@ -1748,4 +1788,4 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
1748
1788
1749
1789
assert "<style>" in local_html_1
1750
1790
assert "<style>" in local_html_2
1751
-
1791
+
0 commit comments