8000 test: Add display configuration tests for DataFrame representation an… · kosiew/datafusion-python@815690b · GitHub
[go: up one dir, main page]

Skip to content

Commit 815690b

Browse files
committed
test: Add display configuration tests for DataFrame representation and HTML output
1 parent eef0a36 commit 815690b

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

python/tests/test_dataframe.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,3 +1358,151 @@ def test_dataframe_repr_html(df) -> None:
13581358
body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner]
13591359
body_pattern = "(.*?)".join(body_lines)
13601360
assert len(re.findall(body_pattern, output, re.DOTALL)) == 1
1361+
1362+
1363+
def test_display_config_affects_repr():
1364+
max_table_rows_in_repr = 3
1365+
# Create a context with custom display config
1366+
ctx = SessionContext().with_display_config(
1367+
max_table_rows_in_repr=max_table_rows_in_repr
1368+
)
1369+
1370+
# Create a DataFrame with more rows than the display limit
1371+
data = [{"a": i, "b": f"value_{i}", "c": i * 10} for i in range(10)]
1372+
df = ctx.from_pylist(data)
1373+
1374+
# Get the string representation
1375+
# +---+---------+----+
1376+
# | a | b | c |
1377+
# +---+---------+----+
1378+
# | 0 | value_0 | 0 |
1379+
# | 1 | value_1 | 10 |
1380+
# | 2 | value_2 | 20 |
1381+
# +---+---------+----+
1382+
# Data truncated.
1383+
repr_str = repr(df)
1384+
print("==> repr_str", repr_str)
1385+
1386+
# The representation should show truncated data (3 rows as specified)
1387+
assert (
1388+
repr_str.count("\n") <= max_table_rows_in_repr + 5
1389+
) # header row + separator lines + data rows + possibly truncation message
1390+
assert "Data truncated" in repr_str
1391+
1392+
# Create a context with larger display limit
1393+
ctx2 = SessionContext().with_display_config(max_table_rows_in_repr=15)
1394+
1395+
df2 = ctx2.from_pylist(data)
1396+
repr_str2 = repr(df2)
1397+
1398+
# Should show all data without truncation message
1399+
assert repr_str2.count("\n") >= 10 # All rows should be shown
1400+
assert "Data truncated" not in repr_str2
1401+
1402+
1403+
def test_display_config_affects_html_repr():
1404+
# Create a context with custom display config to show only a small cell length
1405+
ctx = SessionContext().with_display_config(max_cell_length=5)
1406+
1407+
# Create a DataFrame with a column containing long strings
1408+
data = [
1409+
{"a": 1, "b": "This is a very long string that should be truncated", "c": 100}
1410+
]
1411+
df = ctx.from_pylist(data)
1412+
1413+
# Get the HTML representation
1414+
html_str = df._repr_html_()
1415+
1416+
# The cell should be truncated to 5 characters and have expansion button
1417+
assert ">This " in html_str # 5 character limit
1418+
assert "expandable" in html_str
1419+
assert "expand-btn" in html_str
1420+
1421+
# Create a context with larger cell length limit
1422+
ctx2 = SessionContext().with_display_config(max_cell_length=50)
1423+
1424+
df2 = ctx2.from_pylist(data)
1425+
html_str2 = df2._repr_html_()
1426+
1427+
# String shouldn't be truncated (or at least not in the same way)
1428+
if "expandable" in html_str2:
1429+
# If it still has an expandable div, it should contain more characters
1430+
assert ">This is a very long string that" in html_str2
1431+
else:
1432+
# Or it might not need expansion at all
1433+
assert "This is a very long string that should be truncated" in html_str2
1434+
1435+
1436+
def test_display_config_rows_limit_in_html():
1437+
max_table_rows = 5
1438+
# Create a context with custom display config to limit rows
1439+
ctx = SessionContext().with_display_config(
1440+
max_table_rows_in_repr=max_table_rows,
1441+
)
1442+
1443+
# Create a DataFrame with 10 rows
1444+
data = [{"a": i, "b": f"value_{i}", "c": i * 10} for i in range(10)]
1445+
df = ctx.from_pylist(data)
1446+
1447+
# Get the HTML representation
1448+
html_str = df._repr_html_()
1449+
1450+
# Only a few rows should be shown and there should be a truncation message
1451+
row_count = html_str.count("<tr>") - 1 # Subtract 1 for header row
1452+
print("==> html_str", html_str)
1453+
assert row_count <= max_table_rows
1454+
assert "Data truncated" in html_str
1455+
1456+
# Create a context with larger row limit
1457+
max_table_rows = 20
1458+
ctx2 = SessionContext().with_display_config(
1459+
max_table_rows_in_repr=max_table_rows
1460+
) # Show more rows
1461+
1462+
df2 = ctx2.from_pylist(data)
1463+
html_str2 = df2._repr_html_()
1464+
1465+
# Should show all rows
1466+
row_count2 = html_str2.count("<tr>") - 1 # Subtract 1 for header row
1467+
assert row_count2 == 10 # Should show all 10 rows
1468+
assert "Data truncated" not in html_str2
1469+
1470+
1471+
def test_display_config_max_bytes_limit():
1472+
min_table_rows = 10
1473+
max_table_rows = 20
1474+
# Create a context with custom display config with very small byte limit
1475+
ctx = SessionContext().with_display_config(
1476+
min_table_rows=min_table_rows,
1477+
max_table_rows_in_repr=max_table_rows,
1478+
max_table_bytes=100,
1479+
) # Very small limit
1480+
1481+
# Create a DataFrame with large content
1482+
# Generate some data with long strings to hit the byte limit quickly
1483+
large_string = "x" * 50
1484+
data = [
1485+
{"a": i, "b": large_string, "c": large_string}
1486+
for i in range(20) # 20 rows with long strings
1487+
]
1488+
df = ctx.from_pylist(data)
1489+
1490+
# Get the HTML representation
1491+
html_str = df._repr_html_()
1492+
1493+
# Due to small byte limit, we should see truncation
1494+
row_count = html_str.count("<tr>") - 1 # Subtract 1 for header row
1495+
assert row_count <= min_table_rows # Should not show all 10 rows
1496+
assert "Data truncated" in html_str
1497+
1498+
# With a larger byte limit
1499+
ctx2 = SessionContext().with_display_config(
1500+
max_table_bytes=10 * 1024 * 1024 # 10 MB, much more than needed
1501+
)
1502+
1503+
df2 = ctx2.from_pylist(data)
1504+
html_str2 = df2._repr_html_()
1505+
1506+
# Should show all rows
1507+
row_count2 = html_str2.count("<tr>") - 1 # Subtract 1 for header row
1508+
assert row_count2 >= min_table_rows # Should show more than min_table_rows

0 commit comments

Comments
 (0)
0