8000 refactor: streamline HTML formatter by removing extensive docstring e… · kosiew/datafusion-python@ecab831 · GitHub
[go: up one dir, main page]

Skip to content

Commit ecab831

Browse files
committed
refactor: streamline HTML formatter by removing extensive docstring examples and enhancing cell formatting methods
- Removed lengthy examples from the docstring of DataFrameHtmlFormatter to improve readability. - Added methods for extracting and formatting cell values, enhancing the clarity and maintainability of the code. - Updated cell building methods to utilize the new formatting logic, ensuring consistent application of styles and behaviors. - Introduced a reset fixture for tests to ensure the formatter is returned to default settings after each test case. - Added tests for HTML formatter configuration, custom style providers, type formatters, custom cell builders, and complex customizations to ensure robust functionality.
1 parent 34f337e commit ecab831

File tree

2 files changed

+241
-177
lines changed

2 files changed

+241
-177
lines changed

python/datafusion/html_formatter.py

Lines changed: 67 additions & 177 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,4 @@
1-
"""HTML formatting utilities for DataFusion DataFrames.
2-
3-
This module provides a customizable HTML formatter for displaying DataFrames
4-
in rich environments like Jupyter notebooks.
5-
6-
Examples:
7-
Basic usage with the default formatter:
8-
9-
>>> import datafusion as df
10-
>>> # Create a DataFrame
11-
>>> ctx = df.SessionContext()
12-
>>> df_obj = ctx.sql("SELECT 1 as id, 'example' as name")
13-
>>> # The DataFrame will use the default formatter in Jupyter
14-
15-
Configuring the global formatter:
16-
17-
>>> from datafusion.html_formatter import configure_formatter
18-
>>> configure_formatter(
19-
... max_cell_length=50,
20-
... max_height=500,
21-
... enable_cell_expansion=True
22-
... )
23-
24-
Creating a custom formatter with specialized type handling:
25-
26-
>>> import datetime
27-
>>> from datafusion.html_formatter import (
28-
... DataFrameHtmlFormatter,
29-
... StyleProvider,
30-
... get_formatter
31-
... )
32-
>>>
33-
>>> # Create a custom date formatter
34-
>>> def format_date(date_value):
35-
... return date_value.strftime("%Y-%m-%d")
36-
>>>
37-
>>> # Create a custom style provider
38-
>>> class BlueHeaderStyleProvider(StyleProvider):
39-
... def get_cell_style(self) -> str:
40-
... return "border: 1px solid #ddd; padding: 8px; text-align: left;"
41-
...
42-
... def get_header_style(self) -> str:
43-
... return (
44-
... "border: 1px solid #ddd; padding: 8px; "
45-
... "background-color: #4285f4; color: white; "
46-
... "text-align: left; font-weight: bold;"
47-
... )
48-
>>>
49-
>>> # Use composition to create a custom formatter
50-
>>> formatter = DataFrameHtmlFormatter(
51-
... max_cell_length=100,
52-
... style_provider=BlueHeaderStyleProvider()
53-
... )
54-
>>>
55-
>>> # Register formatters for specific types
56-
>>> formatter.register_formatter(datetime.date, format_date)
57-
>>> formatter.register_formatter(float, lambda x: f"{x:.2f}")
58-
>>>
59-
>>> # Make it the global formatter
60-
>>> from datafusion.html_formatter import configure_formatter
61-
>>> configure_formatter(
62-
... max_cell_length=100,
63-
... style_provider=BlueHeaderStyleProvider()
64-
... )
65-
>>> # Now register the formatters with the global formatter
66-
>>> current_formatter = get_formatter()
67-
>>> current_formatter.register_formatter(datetime.date, format_date)
68-
>>> current_formatter.register_formatter(float, lambda x: f"{x:.2f}")
69-
70-
Creating custom cell builders for more complex formatting:
71-
72-
>>> # Custom cell builder for numeric values
73-
>>> def number_cell_builder(value, row, col, table_id):
74-
... if isinstance(value, (int, float)) and value < 0:
75-
... return f"<td style='background-color: #ffcccc'>{value}</td>"
76-
... elif isinstance(value, (int, float)) and value > 1000:
77-
... return f"<td style='background-color: #ccffcc; font-weight: bold'>{value}</td>"
78-
... else:
79-
... return f"<td>{value}</td>"
80-
>>>
81-
>>> formatter.set_custom_cell_builder(number_cell_builder)
82-
"""
1+
"""HTML formatting utilities for DataFusion DataFrames."""
832

843
from typing import Dict, Optional, Any, Union, List, Callable, Type, Protocol
854

@@ -147,46 +66,6 @@ class DataFrameHtmlFormatter:
14766
custom_css: Additional CSS to include in the HTML output
14867
show_truncation_message: Whether to display a message when data is truncated
14968
style_provider: Custom provider for cell and header styles
150-
151-
Example:
152-
Create a formatter that adds color-coding for numeric values and custom date formatting:
153-
154-
>>> # Create custom style provider
155-
>>> class CustomStyleProvider:
156-
... def get_cell_style(self) -> str:
157-
... return "border: 1px solid #ddd; padding: 8px;"
158-
...
159-
... def get_header_style(self) -> str:
160-
... return (
161-
... "border: 1px solid #ddd; padding: 8px; "
162-
... "background-color: #333; color: white;"
163-
... )
164-
>>>
165-
>>> # Create the formatter with custom styling
166-
>>> formatter = DataFrameHtmlFormatter(
167-
... max_cell_length=50,
168-
... style_provider=CustomStyleProvider()
169-
... )
170-
>>>
171-
>>> # Add custom formatters for specific data types
172-
>>> import F438 datetime
173-
>>> formatter.register_formatter(
174-
... datetime.date,
175-
... lambda d: f'<span style="color: blue">{d.strftime("%b %d, %Y")}</span>'
176-
... )
177-
>>>
178-
>>> # Format large numbers with commas
179-
>>> formatter.register_formatter(
180-
... int,
181-
... lambda n: f'<span style="font-family: monospace">{n:,}</span>' if n > 1000 else str(n)
182-
... )
183-
>>>
184-
>>> # Replace the global formatter so all DataFrames use it
185-
>>> from datafusion.html_formatter import configure_formatter
186-
>>> configure_formatter(
187-
... max_cell_length=50,
188-
... style_provider=CustomStyleProvider()
189-
... )
19069
"""
19170

19271
def __init__(
@@ -288,7 +167,9 @@ def _build_html_header(self) -> List[str]:
288167
"""Build the HTML header with CSS styles."""
289168
html = []
290169
html.append("<style>")
291-
html.append(self._get_default_css())
170+
# Only include expandable CSS if cell expansion is enabled
171+
if self.enable_cell_expansion:
172+
html.append(self._get_default_css())
292173
if self.custom_css:
293174
html.append(self.custom_css)
294175
html.append("</style>")
@@ -332,57 +213,109 @@ def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
332213
html.append("<tr>")
333214

334215
for col_idx, column in enumerate(batch.columns):
335-
cell_value = self._format_cell_value(column, row_idx)
216+
raw_value = self._get_cell_value(column, row_idx)
217+
formatted_value = self._format_cell_value(raw_value)
336218

337219
if (
338-
len(str(cell_value)) > self.max_cell_length
220+
len(str(formatted_value)) > self.max_cell_length
339221
and self.enable_cell_expansion
340222
):
341223
html.append(
342224
self._build_expandable_cell(
343-
cell_value, row_count, col_idx, table_uuid
225+
raw_value,
226+
formatted_value,
227+
row_count,
228+
col_idx,
229+
table_uuid,
344230
)
345231
)
346232
else:
347-
html.append(self._build_regular_cell(cell_value))
233+
html.append(
234+
self._build_regular_cell(raw_value, formatted_value)
235+
)
348236

349237
html.append("</tr>")
350238

351239
html.append("</tbody>")
352240
return html
353241

242+
def _get_cell_value(self, column: Any, row_idx: int) -> Any:
243+
"""Extract a cell value from a column.
244+
245+
Args:
246+
column: Arrow array
247+
row_idx: Row index
248+
249+
Returns:
250+
The raw cell value
251+
"""
252+
try:
253+
return column[row_idx]
254+
except (IndexError, TypeError):
255+
return ""
256+
257+
def _format_cell_value(self, value: Any) -> str:
258+
"""Format a cell value for display.
259+
260+
Uses registered type formatters if available.
261+
262+
Args:
263+
value: The cell value to format
264+
265+
Returns:
266+
Formatted cell value as string
267+
"""
268+
# Check for custom type formatters
269+
for type_cls, formatter in self._type_formatters.items():
270+
if isinstance(value, type_cls):
271+
return formatter(value)
272+
273+
return str(value)
274+
354275
def _build_expandable_cell(
355-
self, cell_value: Any, row_count: int, col_idx: int, table_uuid: str
276+
self,
277+
raw_value: Any,
278+
formatted_value: str,
279+
row_count: int,
280+
col_idx: int,
281+
table_uuid: str,
356282
) -> str:
357283
"""Build an expandable cell for long content."""
358284
# If custom cell builder is provided, use it
359285
if self._custom_cell_builder:
360-
return self._custom_cell_builder(cell_value, row_count, col_idx, table_uuid)
286+
return self._custom_cell_builder(raw_value, row_count, col_idx, table_uuid)
361287

362-
short_value = str(cell_value)[: self.max_cell_length]
288+
short_value = formatted_value[: self.max_cell_length]
363289
return (
364290
f"<td style='{self.style_provider.get_cell_style()}'>"
365291
f"<div class='expandable-container'>"
366292
f"<span class='expandable' id='{table_uuid}-min-text-{row_count}-{col_idx}'>"
367293
f"{short_value}</span>"
368294
f"<span class='full-text' id='{table_uuid}-full-text-{row_count}-{col_idx}'>"
369-
f"{cell_value}</span>"
295+
f"{formatted_value}</span>"
370296
f"<button class='expand-btn' "
371297
f"onclick=\"toggleDataFrameCellText('{table_uuid}',{row_count},{col_idx})\">"
372298
f"...</button>"
373299
f"</div>"
374300
f"</td>"
375301
)
376302

377-
def _build_regular_cell(self, cell_value: Any) -> str:
303+
def _build_regular_cell(self, raw_value: Any, formatted_value: str) -> str:
378304
"""Build a regular table cell."""
379-
return f"<td style='{self.style_provider.get_cell_style()}'>{cell_value}</td>"
305+
# If custom cell builder is provided, use it with dummy row/col values
306+
if self._custom_cell_builder:
307+
# Use 0, 0, "" as dummy values since this isn't an expandable cell
308+
return self._custom_cell_builder(raw_value, 0, 0, "")
309+
310+
return (
311+
f"<td style='{self.style_provider.get_cell_style()}'>{formatted_value}</td>"
312+
)
380313

381314
def _build_html_footer(self, has_more: bool) -> List[str]:
382315
"""Build the HTML footer with JavaScript and messages."""
383316
html = []
384317

385-
# Add JavaScript for interactivity
318+
# Add JavaScript for interactivity only if cell expansion is enabled
386319
if self.enable_cell_expansion:
387320
html.append(self._get_javascript())
388321

@@ -392,30 +325,6 @@ def _build_html_footer(self, has_more: bool) -> List[str]:
392325

393326
return html
394327

395-
def _format_cell_value(self, column: Any, row_idx: int) -> str:
396-
"""Format a cell value for display.
397-
398-
Uses registered type formatters if available.
399-
400-
Args:
401-
column: Arrow array
402-
row_idx: Row index
403-
404-
Returns:
405-
Formatted cell value as string
406-
"""
407-
try:
408-
value = column[row_idx]
409-
410-
# Check for custom type formatters
411-
for type_cls, formatter in self._type_formatters.items():
412-
if isinstance(value, type_cls):
413-
return formatter(value)
414-
415-
return str(value)
416-
except (IndexError, TypeError):
417-
return ""
418-
419328
def _get_default_css(self) -> str:
420329
"""Get default CSS styles for the HTML table."""
421330
return """
@@ -502,26 +411,7 @@ def configure_formatter(**kwargs: Any) -> None:
502411
def set_style_provider(provider: StyleProvider) -> None:
503412
"""Set a custom style provider for the global formatter.
504413
505-
This is a convenience function to replace just the style provider
506-
of the global formatter instance without changing other settings.
507-
508414
Args:
509415
provider: A StyleProvider implementation
510-
511-
Example:
512-
>>> from datafusion.html_formatter import set_style_provider
513-
>>>
514-
>>> class DarkModeStyleProvider:
515-
... def get_cell_style(self) -> str:
516-
... return "border: 1px solid #555; padding: 8px; color: #eee; background-color: #222;"
517-
...
518-
... def get_header_style(self) -> str:
519-
... return (
520-
... "border: 1px solid #555; padding: 8px; "
521-
... "color: white; background-color: #111; font-weight: bold;"
522-
... )
523-
>>>
524-
>>> # Apply dark mode styling to all DataFrames
525-
>>> set_style_provider(DarkModeStyleProvider())
526416
"""
527417
_default_formatter.style_provider = provider

0 commit comments

Comments
 (0)
0