1
- """HTML formatting utilities for DataFusion DataFrames.
2
-
3
- This module provides a customizable HTML formatter for displaying DataFrames
4
- in rich environments like Jupyter notebooks.
5
-
6
- Examples:
7
- Basic usage with the default formatter:
8
-
9
- >>> import datafusion as df
10
- >>> # Create a DataFrame
11
- >>> ctx = df.SessionContext()
12
- >>> df_obj = ctx.sql("SELECT 1 as id, 'example' as name")
13
- >>> # The DataFrame will use the default formatter in Jupyter
14
-
15
- Configuring the global formatter:
16
-
17
- >>> from datafusion.html_formatter import configure_formatter
18
- >>> configure_formatter(
19
- ... max_cell_length=50,
20
- ... max_height=500,
21
- ... enable_cell_expansion=True
22
- ... )
23
-
24
- Creating a custom formatter with specialized type handling:
25
-
26
- >>> import datetime
27
- >>> from datafusion.html_formatter import (
28
- ... DataFrameHtmlFormatter,
29
- ... StyleProvider,
30
- ... get_formatter
31
- ... )
32
- >>>
33
- >>> # Create a custom date formatter
34
- >>> def format_date(date_value):
35
- ... return date_value.strftime("%Y-%m-%d")
36
- >>>
37
- >>> # Create a custom style provider
38
- >>> class BlueHeaderStyleProvider(StyleProvider):
39
- ... def get_cell_style(self) -> str:
40
- ... return "border: 1px solid #ddd; padding: 8px; text-align: left;"
41
- ...
42
- ... def get_header_style(self) -> str:
43
- ... return (
44
- ... "border: 1px solid #ddd; padding: 8px; "
45
- ... "background-color: #4285f4; color: white; "
46
- ... "text-align: left; font-weight: bold;"
47
- ... )
48
- >>>
49
- >>> # Use composition to create a custom formatter
50
- >>> formatter = DataFrameHtmlFormatter(
51
- ... max_cell_length=100,
52
- ... style_provider=BlueHeaderStyleProvider()
53
- ... )
54
- >>>
55
- >>> # Register formatters for specific types
56
- >>> formatter.register_formatter(datetime.date, format_date)
57
- >>> formatter.register_formatter(float, lambda x: f"{x:.2f}")
58
- >>>
59
- >>> # Make it the global formatter
60
- >>> from datafusion.html_formatter import configure_formatter
61
- >>> configure_formatter(
62
- ... max_cell_length=100,
63
- ... style_provider=BlueHeaderStyleProvider()
64
- ... )
65
- >>> # Now register the formatters with the global formatter
66
- >>> current_formatter = get_formatter()
67
- >>> current_formatter.register_formatter(datetime.date, format_date)
68
- >>> current_formatter.register_formatter(float, lambda x: f"{x:.2f}")
69
-
70
- Creating custom cell builders for more complex formatting:
71
-
72
- >>> # Custom cell builder for numeric values
73
- >>> def number_cell_builder(value, row, col, table_id):
74
- ... if isinstance(value, (int, float)) and value < 0:
75
- ... return f"<td style='background-color: #ffcccc'>{value}</td>"
76
- ... elif isinstance(value, (int, float)) and value > 1000:
77
- ... return f"<td style='background-color: #ccffcc; font-weight: bold'>{value}</td>"
78
- ... else:
79
- ... return f"<td>{value}</td>"
80
- >>>
81
- >>> formatter.set_custom_cell_builder(number_cell_builder)
82
- """
1
+ """HTML formatting utilities for DataFusion DataFrames."""
83
2
84
3
from typing import Dict , Optional , Any , Union , List , Callable , Type , Protocol
85
4
@@ -147,46 +66,6 @@ class DataFrameHtmlFormatter:
147
66
custom_css: Additional CSS to include in the HTML output
148
67
show_truncation_message: Whether to display a message when data is truncated
149
68
style_provider: Custom provider for cell and header styles
150
-
151
- Example:
152
- Create a formatter that adds color-coding for numeric values and custom date formatting:
153
-
154
- >>> # Create custom style provider
155
- >>> class CustomStyleProvider:
156
- ... def get_cell_style(self) -> str:
157
- ... return "border: 1px solid #ddd; padding: 8px;"
158
- ...
159
- ... def get_header_style(self) -> str:
160
- ... return (
161
- ... "border: 1px solid #ddd; padding: 8px; "
162
- ... "background-color: #333; color: white;"
163
- ... )
164
- >>>
165
- >>> # Create the formatter with custom styling
166
- >>> formatter = DataFrameHtmlFormatter(
167
- ... max_cell_length=50,
168
- ... style_provider=CustomStyleProvider()
169
- ... )
170
- >>>
171
- >>> # Add custom formatters for specific data types
172
- >>> import
F438
datetime
173
- >>> formatter.register_formatter(
174
- ... datetime.date,
175
- ... lambda d: f'<span style="color: blue">{d.strftime("%b %d, %Y")}</span>'
176
- ... )
177
- >>>
178
- >>> # Format large numbers with commas
179
- >>> formatter.register_formatter(
180
- ... int,
181
- ... lambda n: f'<span style="font-family: monospace">{n:,}</span>' if n > 1000 else str(n)
182
- ... )
183
- >>>
184
- >>> # Replace the global formatter so all DataFrames use it
185
- >>> from datafusion.html_formatter import configure_formatter
186
- >>> configure_formatter(
187
- ... max_cell_length=50,
188
- ... style_provider=CustomStyleProvider()
189
- ... )
190
69
"""
191
70
192
71
def __init__ (
@@ -288,7 +167,9 @@ def _build_html_header(self) -> List[str]:
288
167
"""Build the HTML header with CSS styles."""
289
168
html = []
290
169
html .append ("<style>" )
291
- html .append (self ._get_default_css ())
170
+ # Only include expandable CSS if cell expansion is enabled
171
+ if self .enable_cell_expansion :
172
+ html .append (self ._get_default_css ())
292
173
if self .custom_css :
293
174
html .append (self .custom_css )
294
175
html .append ("</style>" )
@@ -332,57 +213,109 @@ def _build_table_body(self, batches: list, table_uuid: str) -> List[str]:
332
213
html .append ("<tr>" )
333
214
334
215
for col_idx , column in enumerate (batch .columns ):
335
- cell_value = self ._format_cell_value (column , row_idx )
216
+ raw_value = self ._get_cell_value (column , row_idx )
217
+ formatted_value = self ._format_cell_value (raw_value )
336
218
337
219
if (
338
- len (str (cell_value )) > self .max_cell_length
220
+ len (str (formatted_value )) > self .max_cell_length
339
221
and self .enable_cell_expansion
340
222
):
341
223
html .append (
342
224
self ._build_expandable_cell (
343
- cell_value , row_count , col_idx , table_uuid
225
+ raw_value ,
226
+ formatted_value ,
227
+ row_count ,
228
+ col_idx ,
229
+ table_uuid ,
344
230
)
345
231
)
346
232
else :
347
- html .append (self ._build_regular_cell (cell_value ))
233
+ html .append (
234
+ self ._build_regular_cell (raw_value , formatted_value )
235
+ )
348
236
349
237
html .append ("</tr>" )
350
238
351
239
html .append ("</tbody>" )
352
240
return html
353
241
242
+ def _get_cell_value (self , column : Any , row_idx : int ) -> Any :
243
+ """Extract a cell value from a column.
244
+
245
+ Args:
246
+ column: Arrow array
247
+ row_idx: Row index
248
+
249
+ Returns:
250
+ The raw cell value
251
+ """
252
+ try :
253
+ return column [row_idx ]
254
+ except (IndexError , TypeError ):
255
+ return ""
256
+
257
+ def _format_cell_value (self , value : Any ) -> str :
258
+ """Format a cell value for display.
259
+
260
+ Uses registered type formatters if available.
261
+
262
+ Args:
263
+ value: The cell value to format
264
+
265
+ Returns:
266
+ Formatted cell value as string
267
+ """
268
+ # Check for custom type formatters
269
+ for type_cls , formatter in self ._type_formatters .items ():
270
+ if isinstance (value , type_cls ):
271
+ return formatter (value )
272
+
273
+ return str (value )
274
+
354
275
def _build_expandable_cell (
355
- self , cell_value : Any , row_count : int , col_idx : int , table_uuid : str
276
+ self ,
277
+ raw_value : Any ,
278
+ formatted_value : str ,
279
+ row_count : int ,
280
+ col_idx : int ,
281
+ table_uuid : str ,
356
282
) -> str :
357
283
"""Build an expandable cell for long content."""
358
284
# If custom cell builder is provided, use it
359
285
if self ._custom_cell_builder :
360
- return self ._custom_cell_builder (cell_value , row_count , col_idx , table_uuid )
286
+ return self ._custom_cell_builder (raw_value , row_count , col_idx , table_uuid )
361
287
362
- short_value = str ( cell_value ) [: self .max_cell_length ]
288
+ short_value = formatted_value [: self .max_cell_length ]
363
289
return (
364
290
f"<td style='{ self .style_provider .get_cell_style ()} '>"
365
291
f"<div class='expandable-container'>"
366
292
f"<span class='expandable' id='{ table_uuid } -min-text-{ row_count } -{ col_idx } '>"
367
293
f"{ short_value } </span>"
368
294
f"<span class='full-text' id='{ table_uuid } -full-text-{ row_count } -{ col_idx } '>"
369
- f"{ cell_value } </span>"
295
+ f"{ formatted_value } </span>"
370
296
f"<button class='expand-btn' "
371
297
f"onclick=\" toggleDataFrameCellText('{ table_uuid } ',{ row_count } ,{ col_idx } )\" >"
372
298
f"...</button>"
373
299
f"</div>"
374
300
f"</td>"
375
301
)
376
302
377
- def _build_regular_cell (self , cell_value : Any ) -> str :
303
+ def _build_regular_cell (self , raw_value : Any , formatted_value : str ) -> str :
378
304
"""Build a regular table cell."""
379
- return f"<td style='{ self .style_provider .get_cell_style ()} '>{ cell_value } </td>"
305
+ # If custom cell builder is provided, use it with dummy row/col values
306
+ if self ._custom_cell_builder :
307
+ # Use 0, 0, "" as dummy values since this isn't an expandable cell
308
+ return self ._custom_cell_builder (raw_value , 0 , 0 , "" )
309
+
310
+ return (
311
+ f"<td style='{ self .style_provider .get_cell_style ()} '>{ formatted_value } </td>"
312
+ )
380
313
381
314
def _build_html_footer (self , has_more : bool ) -> List [str ]:
382
315
"""Build the HTML footer with JavaScript and messages."""
383
316
html = []
384
317
385
- # Add JavaScript for interactivity
318
+ # Add JavaScript for interactivity only if cell expansion is enabled
386
319
if self .enable_cell_expansion :
387
320
html .append (self ._get_javascript ())
388
321
@@ -392,30 +325,6 @@ def _build_html_footer(self, has_more: bool) -> List[str]:
392
325
393
326
return html
394
327
395
- def _format_cell_value (self , column : Any , row_idx : int ) -> str :
396
- """Format a cell value for display.
397
-
398
- Uses registered type formatters if available.
399
-
400
- Args:
401
- column: Arrow array
402
- row_idx: Row index
403
-
404
- Returns:
405
- Formatted cell value as string
406
- """
407
- try :
408
- value = column [row_idx ]
409
-
410
- # Check for custom type formatters
411
- for type_cls , formatter in self ._type_formatters .items ():
412
- if isinstance (value , type_cls ):
413
- return formatter (value )
414
-
415
- return str (value )
416
- except (IndexError , TypeError ):
417
- return ""
418
-
419
328
def _get_default_css (self ) -> str :
420
329
"""Get default CSS styles for the HTML table."""
421
330
return """
@@ -502,26 +411,7 @@ def configure_formatter(**kwargs: Any) -> None:
502
411
def set_style_provider (provider : StyleProvider ) -> None :
503
412
"""Set a custom style provider for the global formatter.
504
413
505
- This is a convenience function to replace just the style provider
506
- of the global formatter instance without changing other settings.
507
-
508
414
Args:
509
415
provider: A StyleProvider implementation
510
-
511
- Example:
512
- >>> from datafusion.html_formatter import set_style_provider
513
- >>>
514
- >>> class DarkModeStyleProvider:
515
- ... def get_cell_style(self) -> str:
516
- ... return "border: 1px solid #555; padding: 8px; color: #eee; background-color: #222;"
517
- ...
518
- ... def get_header_style(self) -> str:
519
- ... return (
520
- ... "border: 1px solid #555; padding: 8px; "
521
- ... "color: white; background-color: #111; font-weight: bold;"
522
- ... )
523
- >>>
524
- >>> # Apply dark mode styling to all DataFrames
525
- >>> set_style_provider(DarkModeStyleProvider())
526
416
"""
527
417
_default_formatter .style_provider = provider
0 commit comments