8000 refactor: enhance DataFrameHtmlFormatter with customizable cell and h… · kosiew/datafusion-python@2f9d655 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2f9d655

Browse files
committed
refactor: enhance DataFrameHtmlFormatter with customizable cell and header styles
- Added methods `get_cell_style()` and `get_header_style()` to allow subclasses to customize the CSS styles for table cells and headers. - Updated `_build_table_header()` and `_build_regular_cell()` methods to utilize the new styling methods for improved maintainability. - Introduced a registry for custom type formatters in `DataFrameHtmlFormatter` to enable flexible formatting of cell values based on their types. - Enhanced `_format_cell_value()` to check for registered formatters before defaulting to string conversion, improving extensibility.
1 parent d9980c3 commit 2f9d655

File tree

1 file changed

+59
-16
lines changed

1 file changed

+59
-16
lines changed

python/datafusion/html_formatter.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""HTML formatting utilities for DataFusion DataFrames."""
22

3-
from typing import Dict, Optional, Any, Union, List
3+
from typing import Dict, Optional, Any, Union, List, Callable, Type
44

55

66
class DataFrameHtmlFormatter:
@@ -9,6 +9,12 @@ class DataFrameHtmlFormatter:
99
This class handles the HTML rendering of DataFrames for display in
1010
Jupyter notebooks and other rich display contexts.
1111
12+
This class is designed to be extended by subclassing. Key extension points:
13+
- Override `get_cell_style()` and `get_header_style()` to customize styling
14+
- Override `_format_cell_value()` to customize value formatting
15+
- Use `register_formatter()` to add custom formatters for specific types
16+
- Override any `_build_*` method to customize component generation
17+
1218
Args:
1319
max_cell_length: Maximum characters to display in a cell before truncation
1420
max_width: Maximum width of the HTML table in pixels
@@ -33,6 +39,44 @@ def __init__(
3339
self.enable_cell_expansion = enable_cell_expansion
3440
self.custom_css = custom_css
3541
self.show_truncation_message = show_truncation_message
42+
# Registry for custom type formatters
43+
self._type_formatters: Dict[Type, Callable[[Any], str]] = {}
44+
45+
def register_formatter(
46+
self, type_class: Type, formatter: Callable[[Any], str]
47+
) -> None:
48+
"""Register a custom formatter for a specific data type.
49+
50+
Args:
51+
type_class: The type to register a formatter for
52+
formatter: Function that takes a value of the given type and returns
53+
a formatted string
54+
"""
55+
self._type_formatters[type_class] = formatter
56+
57+
def get_cell_style(self) -> str:
58+
"""Get the CSS style for regular table cells.
59+
60+
This method can be overridden by subclasses to customize cell styling.
61+
62+
Returns:
63+
CSS style string
64+
"""
65+
return "border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;"
66+
67+
def get_header_style(self) -> str:
68+
"""Get the CSS style for table header cells.
69+
70+
This method can be overridden by subclasses to customize header styling.
71+
72+
Returns:
73+
CSS style string
74+
"""
75+
return (
76+
"border: 1px solid black; padding: 8px; text-align: left; "
77+
"background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; "
78+
"max-width: fit-content;"
79+
)
3680

3781
def format_html(
3882
self,
@@ -104,12 +148,7 @@ def _build_table_header(self, schema: Any) -> List[str]:
104148
html.append("<thead>")
105149
html.append("<tr>")
106150
for field in schema:
107-
html.append(
108-
"<th style='border: 1px solid black; padding: 8px; "
109-
"text-align: left; background-color: #f2f2f2; "
110-
"white-space: nowrap; min-width: fit-content; "
111-
f"max-width: fit-content;'>{field.name}</th>"
112-
)
151+
html.append(f"<th style='{self.get_header_style()}'>{field.name}</th>")
113152
html.append("</tr>")
114153
html.append("</thead>")
115154
return html
@@ -151,8 +190,7 @@ def _build_expandable_cell(
151190
"""Build an expandable cell for long content."""
152191
short_value = str(cell_value)[: self.max_cell_length]
153192
return (
154-
f"<td style='border: 1px solid black; padding: 8px; "
155-
f"text-align: left; white-space: nowrap;'>"
193+
f"<td style='{self.get_cell_style()}'>"
156194
f"<div class='expandable-container'>"
157195
f"<span class='expandable' id='{table_uuid}-min-text-{row_count}-{col_idx}'>"
158196
f"{short_value}</span>"
@@ -167,10 +205,7 @@ def _build_expandable_cell(
167205

168206
def _build_regular_cell(self, cell_value: Any) -> str:
169207
"""Build a regular table cell."""
170-
return (
171-
f"<td style='border: 1px solid black; padding: 8px; "
172-
f"text-align: left; white-space: nowrap;'>{cell_value}</td>"
173-
)
208+
return f"<td style='{self.get_cell_style()}'>{cell_value}</td>"
174209

175210
def _build_html_footer(self, has_more: bool) -> List[str]:
176211
"""Build the HTML footer with JavaScript and messages."""
@@ -189,17 +224,25 @@ def _build_html_footer(self, has_more: bool) -> List[str]:
189224
def _format_cell_value(self, column: Any, row_idx: int) -> str:
190225
"""Format a cell value for display.
191226
227+
This method can be overridden by subclasses to customize cell formatting.
228+
It also checks for registered type formatters before falling back to str().
229+
192230
Args:
193231
column: Arrow array
194232
row_idx: Row index
195233
196234
Returns:
197235
Formatted cell value a 6302 s string
198236
"""
199-
# This is a simplified implementation for Python-side formatting
200-
# In practice, we'd want to handle different Arrow types appropriately
201237
try:
202-
return str(column[row_idx])
238+
value = column[row_idx]
239+
240+
# Check for custom type formatters
241+
for type_cls, formatter in self._type_formatters.items():
242+
if isinstance(value, type_cls):
243+
return formatter(value)
244+
245+
return str(value)
203246
except (IndexError, TypeError):
204247
return ""
205248

0 commit comments

Comments
 (0)
0