|
1 |
| -"""HTML formatting utilities for DataFusion DataFrames.""" |
| 1 | +"""HTML formatting utilities for DataFusion DataFrames. |
| 2 | +
|
| 3 | +This module provides a customizable HTML formatter for displaying DataFrames |
| 4 | +in rich environments like Jupyter notebooks. |
| 5 | +
|
| 6 | +Examples: |
| 7 | + Basic usage with the default formatter: |
| 8 | +
|
| 9 | + >>> import datafusion as df |
| 10 | + >>> # Create a DataFrame |
| 11 | + >>> ctx = df.SessionContext() |
| 12 | + >>> df_obj = ctx.sql("SELECT 1 as id, 'example' as name") |
| 13 | + >>> # The DataFrame will use the default formatter in Jupyter |
| 14 | +
|
| 15 | + Configuring the global formatter: |
| 16 | +
|
| 17 | + >>> from datafusion.html_formatter import configure_formatter |
| 18 | + >>> configure_formatter( |
| 19 | + ... max_cell_length=50, |
| 20 | + ... max_height=500, |
| 21 | + ... enable_cell_expansion=True |
| 22 | + ... ) |
| 23 | +
|
| 24 | + Creating a custom formatter with specialized type handling: |
| 25 | +
|
| 26 | + >>> import datetime |
| 27 | + >>> from datafusion.html_formatter import ( |
| 28 | + ... DataFrameHtmlFormatter, |
| 29 | + ... StyleProvider, |
| 30 | + ... get_formatter |
| 31 | + ... ) |
| 32 | + >>> |
| 33 | + >>> # Create a custom date formatter |
| 34 | + >>> def format_date(date_value): |
| 35 | + ... return date_value.strftime("%Y-%m-%d") |
| 36 | + >>> |
| 37 | + >>> # Create a custom style provider |
| 38 | + >>> class BlueHeaderStyleProvider(StyleProvider): |
| 39 | + ... def get_cell_style(self) -> str: |
| 40 | + ... return "border: 1px solid #ddd; padding: 8px; text-align: left;" |
| 41 | + ... |
| 42 | + ... def get_header_style(self) -> str: |
| 43 | + ... return ( |
| 44 | + ... "border: 1px solid #ddd; padding: 8px; " |
| 45 | + ... "background-color: #4285f4; color: white; " |
| 46 | + ... "text-align: left; font-weight: bold;" |
| 47 | + ... ) |
| 48 | + >>> |
| 49 | + >>> # Use composition to create a custom formatter |
| 50 | + >>> formatter = DataFrameHtmlFormatter( |
| 51 | + ... max_cell_length=100, |
| 52 | + ... style_provider=BlueHeaderStyleProvider() |
| 53 | + ... ) |
| 54 | + >>> |
| 55 | + >>> # Register formatters for specific types |
| 56 | + >>> formatter.register_formatter(datetime.date, format_date) |
| 57 | + >>> formatter.register_formatter(float, lambda x: f"{x:.2f}") |
| 58 | + >>> |
| 59 | + >>> # Make it the global formatter |
| 60 | + >>> from datafusion.html_formatter import configure_formatter |
| 61 | + >>> configure_formatter( |
| 62 | + ... max_cell_length=100, |
| 63 | + ... style_provider=BlueHeaderStyleProvider() |
| 64 | + ... ) |
| 65 | + >>> # Now register the formatters with the global formatter |
| 66 | + >>> current_formatter = get_formatter() |
| 67 | + >>> current_formatter.register_formatter(datetime.date, format_date) |
| 68 | + >>> current_formatter.register_formatter(float, lambda x: f"{x:.2f}") |
| 69 | +
|
| 70 | + Creating custom cell builders for more complex formatting: |
| 71 | +
|
| 72 | + >>> # Custom cell builder for numeric values |
| 73 | + >>> def number_cell_builder(value, row, col, table_id): |
| 74 | + ... if isinstance(value, (int, float)) and value < 0: |
| 75 | + ... return f"<td style='background-color: #ffcccc'>{value}</td>" |
| 76 | + ... elif isinstance(value, (int, float)) and value > 1000: |
| 77 | + ... return f"<td style='background-color: #ccffcc; font-weight: bold'>{value}</td>" |
| 78 | + ... else: |
| 79 | + ... return f"<td>{value}</td>" |
| 80 | + >>> |
| 81 | + >>> formatter.set_custom_cell_builder(number_cell_builder) |
| 82 | +""" |
2 | 83 |
|
3 | 84 | from typing import Dict, Optional, Any, Union, List, Callable, Type, Protocol
|
4 | 85 |
|
@@ -66,6 +147,46 @@ class DataFrameHtmlFormatter:
|
66 | 147 | custom_css: Additional CSS to include in the HTML output
|
67 | 148 | show_truncation_message: Whether to display a message when data is truncated
|
68 | 149 | style_provider: Custom provider for cell and header styles
|
| 150 | +
|
| 151 | + Example: |
| 152 | + Create a formatter that adds color-coding for numeric values and custom date formatting: |
| 153 | +
|
| 154 | + >>> # Create custom style provider |
| 155 | + >>> class CustomStyleProvider: |
| 156 | + ... def get_cell_style(self) -> str: |
| 157 | + ... return "border: 1px solid #ddd; padding: 8px;" |
| 158 | + ... |
| 159 | + ... def get_header_style(self) -> str: |
| 160 | + ... return ( |
| 161 | + ... "border: 1px solid #ddd; padding: 8px; " |
| 162 | + ... "background-color: #333; color: white;" |
| 163 | + ... ) |
| 164 | + >>> |
| 165 | + >>> # Create the formatter with custom styling |
| 166 | + >>> formatter = DataFrameHtmlFormatter( |
| 167 | + ... max_cell_length=50, |
| 168 | + ... style_provider=CustomStyleProvider() |
| 169 | + ... ) |
| 170 | + >>> |
| 171 | + >>> # Add custom formatters for specific data types |
| 172 | + >>> import datetime |
| 173 | + >>> formatter.register_formatter( |
| 174 | + ... datetime.date, |
| 175 | + ... lambda d: f'<span style="color: blue">{d.strftime("%b %d, %Y")}</span>' |
| 176 | + ... ) |
| 177 | + >>> |
| 178 | + >>> # Format large numbers with commas |
| 179 | + >>> formatter.register_formatter( |
| 180 | + ... int, |
| 181 | + ... lambda n: f'<span style="font-family: monospace">{n:,}</span>' if n > 1000 else str(n) |
| 182 | + ... ) |
| 183 | + >>> |
| 184 | + >>> # Replace the global formatter so all DataFrames use it |
| 185 | + >>> from datafusion.html_formatter import configure_formatter |
| 186 | + >>> configure_formatter( |
| 187 | + ... max_cell_length=50, |
| 188 | + ... style_provider=CustomStyleProvider() |
| 189 | + ... ) |
69 | 190 | """
|
70 | 191 |
|
71 | 192 | def __init__(
|
@@ -381,7 +502,26 @@ def configure_formatter(**kwargs: Any) -> None:
|
381 | 502 | def set_style_provider(provider: StyleProvider) -> None:
|
382 | 503 | """Set a custom style provider for the global formatter.
|
383 | 504 |
|
| 505 | + This is a convenience function to replace just the style provider |
| 506 | + of the global formatter instance without changing other settings. |
| 507 | +
|
384 | 508 | Args:
|
385 | 509 | provider: A StyleProvider implementation
|
| 510 | +
|
| 511 | + Example: |
| 512 | + >>> from datafusion.html_formatter import set_style_provider |
| 513 | + >>> |
| 514 | + >>> class DarkModeStyleProvider: |
| 515 | + ... def get_cell_style(self) -> str: |
| 516 | + ... return "border: 1px solid #555; padding: 8px; color: #eee; background-color: #222;" |
| 517 | + ... |
| 518 | + ... def get_header_style(self) -> str: |
| 519 | + ... return ( |
| 520 | + ... "border: 1px solid #555; padding: 8px; " |
| 521 | + ... "color: white; background-color: #111; font-weight: bold;" |
| 522 | + ... ) |
| 523 | + >>> |
| 524 | + >>> # Apply dark mode styling to all DataFrames |
| 525 | + >>> set_style_provider(DarkModeStyleProvider()) |
386 | 526 | """
|
387 | 527 | _default_formatter.style_provider = provider
|
0 commit comments