8000 refactor: enhance DataFrameHtmlFormatter with custom cell and header … · kosiew/datafusion-python@a352a34 · GitHub
[go: up one dir, main page]

Skip to content

Commit a352a34

Browse files
committed
refactor: enhance DataFrameHtmlFormatter with custom cell and header builders
- Introduced CellFormatter and StyleProvider protocols for better extensibility. - Added DefaultStyleProvider class with default CSS styles for cells and headers. - Updated DataFrameHtmlFormatter to support custom cell and header builders. - Refactored methods to utilize the new style provider for consistent styling. - Improved documentation for methods and classes to clarify usage and customization options.
1 parent 2f9d655 commit a352a34

File tree

1 file changed

+91
-33
lines changed

1 file changed

+91
-33
lines changed

python/datafusion/html_formatter.py

Lines changed: 91 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,50 @@
11
"""HTML formatting utilities for DataFusion DataFrames."""
22

3-
from typing import Dict, Optional, Any, Union, List, Callable, Type
3+
from typing import Dict, Optional, Any, Union, List, Callable, Type, Protocol
4+
5+
6+
class CellFormatter(Protocol):
7+
"""Protocol for cell value formatters."""
8+
9+
def __call__(self, value: Any) -> str:
10+
"""Format a cell value to string representation."""
11+
...
12+
13+
14+
class StyleProvider(Protocol):
15+
"""Protocol for HTML style providers."""
16+
17+
def get_cell_style(self) -> str:
18+
"""Get the CSS style for table cells."""
19+
...
20+
21+
def get_header_style(self) -> str:
22+
"""Get the CSS style for header cells."""
23+
...
24+
25+
26+
class DefaultStyleProvider:
27+
"""Default implementation of StyleProvider."""
28+
29+
def get_cell_style(self) -> str:
30+
"""Get the CSS style for table cells.
31+
32+
Returns:
33+
CSS style string
34+
"""
35+
return "border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;"
36+
37+
def get_header_style(self) -> str:
38+
"""Get the CSS style for header cells.
39+
40+
Returns:
41+
CSS style string
42+
"""
43+
return (
44+
"border: 1px solid black; padding: 8px; text-align: left; "
45+
"background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; "
46+
"max-width: fit-content;"
47+
)
448

549

650
class DataFrameHtmlFormatter:
@@ -9,11 +53,10 @@ class DataFrameHtmlFormatter:
953
This class handles the HTML rendering of DataFrames for display in
1054
Jupyter notebooks and other rich display contexts.
1155
12-
This class is designed to be extended by subclassing. Key extension points:
13-
- Override `get_cell_style()` and `get_header_style()` to customize styling
14-
- Override `_format_cell_value()` to customize value formatting
15-
- Use `register_formatter()` to add custom formatters for specific types
16-
- Override any `_build_*` method to customize component generation
56+
This class supports extension through composition. Key extension points:
57+
- Provide a custom StyleProvider for styling cells and headers
58+
- Register custom formatters for specific types
59+
- Provide custom cell builders for specialized cell rendering
1760
1861
Args:
1962
max_cell_length: Maximum characters to display in a cell before truncation
@@ -22,6 +65,7 @@ class DataFrameHtmlFormatter:
2265
enable_cell_expansion: Whether to add expand/collapse buttons for long cell values
2366
custom_css: Additional CSS to include in the HTML output
2467
show_truncation_message: Whether to display a message when data is truncated
68+
style_provider: Custom provider for cell and header styles
2569
"""
2670

2771
def __init__(
@@ -32,19 +76,22 @@ def __init__(
3276
enable_cell_expansion: bool = True,
3377
custom_css: Optional[str] = None,
3478
show_truncation_message: bool = True,
79+
style_provider: Optional[StyleProvider] = None,
3580
):
3681
self.max_cell_length = max_cell_length
3782
self.max_width = max_width
3883
self.max_height = max_height
3984
self.enable_cell_expansion = enable_cell_expansion
4085
self.custom_css = custom_css
4186
self.show_truncation_message = show_truncation_message
87+
self.style_provider = style_provider or DefaultStyleProvider()
4288
# Registry for custom type formatters
43-
self._type_formatters: Dict[Type, Callable[[Any], str]] = {}
89+
self._type_formatters: Dict[Type, CellFormatter] = {}
90+
# Custom cell builders
91+
self._custom_cell_builder: Optional[Callable[[Any, int, int, str], str]] = None
92+
self._custom_header_builder: Optional[Callable[[Any], str]] = None
4493

45-
def register_formatter(
46-
self, type_class: Type, formatter: Callable[[Any], str]
47-
) -> None:
94+
def register_formatter(self, type_class: Type, formatter: CellFormatter) -> None:
4895
"""Register a custom formatter for a specific data type.
4996
5097
Args:
@@ -54,29 +101,23 @@ def register_formatter(
54101
"""
55102
self._type_formatters[type_class] = formatter
56103

57-
def get_cell_style(self) -> str:
58-
"""Get the CSS style for regular table cells.
59-
60-
This method can be overridden by subclasses to customize cell styling.
104+
def set_custom_cell_builder(
105+
self, builder: Callable[[Any, int, int, str], str]
106+
) -> None:
107+
"""Set a custom cell builder function.
61108
62-
Returns:
63-
CSS style string
109+
Args:
110+
builder: Function that takes (value, row, col, table_id) and returns HTML
64111
"""
65-
return "border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;"
112+
self._custom_cell_builder = builder
66113

67-
def get_header_style(self) -> str:
68-
"""Get the CSS style for table header cells.
69-
70-
This method can be overridden by subclasses to customize header styling.
114+
def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None:
115+
"""Set a custom header builder function.
71116
72-
Returns:
73-
CSS style string
117+
Args:
118+
builder: Function that takes a field and returns HTML
74119
"""
75-
return (
76-
"border: 1px solid black; padding: 8px; text-align: left; "
77-
"background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; "
78-
"max-width: fit-content;"
79-
)
120+
self._custom_header_builder = builder
80121

81122
def format_html(
82123
self,
@@ -148,7 +189,12 @@ def _build_table_header(self, schema: Any) -> List[str]:
148189
html.append("<thead>")
149190
html.append("<tr>")
150191
for field in schema:
151-
html.append(f"<th style='{self.get_header_style()}'>{field.name}</th>")
192+
if self._custom_header_builder:
193+
html.append(self._custom_header_builder(field))
194+
else:
195+
html.append(
196+
f"<th style='{self.style_provider.get_header_style()}'>{field.name}</th>"
197+
)
152198
html.append("</tr>")
153199
html.append("</thead>")
154200
return html
@@ -188,9 +234,13 @@ def _build_expandable_cell(
188234
self, cell_value: Any, row_count: int, col_idx: int, table_uuid: str
189235
) -> str:
190236
"""Build an expandable cell for long content."""
237+
# If custom cell builder is provided, use it
238+
if self._custom_cell_builder:
239+
return self._custom_cell_builder(cell_value, row_count, col_idx, table_uuid)
240+
191241
short_value = str(cell_value)[: self.max_cell_length]
192242
return (
193-
f"<td style='{self.get_cell_style()}'>"
243+
f"<td style='{self.style_provider.get_cell_style()}'>"
194244
f"<div class='expandable-container'>"
195245
f"<span class='expandable' id='{table_uuid}-min-text-{row_count}-{col_idx}'>"
196246
f"{short_value}</span>"
@@ -205,7 +255,7 @@ def _build_expandable_cell(
205255

206256
def _build_regular_cell(self, cell_value: Any) -> str:
207257
"""Build a regular table cell."""
208-
return f"<td style='{self.get_cell_style()}'>{cell_value}</td>"
258+
return f"<td style='{self.style_provider.get_cell_style()}'>{cell_value}</td>"
209259

210260
def _build_html_footer(self, has_more: bool) -> List[str]:
211261
"""Build the HTML footer with JavaScript and messages."""
@@ -224,8 +274,7 @@ def _build_html_footer(self, has_more: bool) -> List[str]:
224274
def _format_cell_value(self, column: Any, row_idx: int) -> str:
225275
"""Format a cell value for display.
226276
227-
This method can be overridden by subclasses to customize cell formatting.
228-
It also checks for registered type formatters before falling back to str().
277+
Uses registered type formatters if available.
229278
230279
Args:
231280
column: Arrow array
@@ -327,3 +376,12 @@ def configure_formatter(**kwargs: Any) -> None:
327376
"""
328377
global _default_formatter
329378
_default_formatter = DataFrameHtmlFormatter(**kwargs)
379+
380+
381+
def set_style_provider(provider: StyleProvider) -> None:
382+
"""Set a custom style provider for the global formatter.
383+
384+
Args:
385+
provider: A StyleProvider implementation
386+
"""
387+
_default_formatter.style_provider = provider

0 commit comments

Comments
 (0)
0