-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
feat: make plotly-express dataframe agnostic via narwhals #4790
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
9873e97
0389591
ba93236
421fc1d
ca5c820
a6aab24
7665f10
ec4f250
7e0d4c2
5543638
cd0dab7
f334b32
e5eb949
7747e30
ac00b36
da80c5b
8a72ba1
aeff203
2041bef
71473f1
9f74c38
28587c9
1bb2448
f45addf
5341759
dfc957c
90f2667
fb58d1b
ddb3b35
37ce302
4da8768
210e01a
c00525e
3486a3e
c0ce093
0eb6951
844a6a9
0c27789
0e6ff78
c2337c9
2cc5d7b
1b27487
23a23be
7968cff
cf76721
91db84b
5c6772e
9ec3f9e
400a624
1aa5163
594ded0
6676061
d7d2884
d6ee676
82c114d
0ceabc1
c9b626e
87841d1
ffa7b3b
3ba19ae
a70146b
1fa9fe4
0103aa6
673d141
b858ed8
bbcf438
49efae2
a36bc24
c119153
7416407
1867f6f
d3a28c0
e6e9994
64b8c70
3f6b383
755aea8
6f18021
4d62e73
a770fd8
7d6f7d6
b8c10ec
490b64a
f7fd4c9
8753acb
1429e6f
878d4db
192e0a8
de6761c
bcfef68
519cc68
e5520a7
b855352
51e2b23
7ef9f28
e9a367d
12fed31
27b2996
f27f959
126a79d
7735366
b6516b4
6f1389f
db22268
b514c01
ce8fb9a
e47827e
9a9283a
2630a5a
fef6dbe
48c7f62
18cc11c
d94cbf7
c320c46
afdb31f
68ab52a
b8ccec4
f102998
2df0427
55a0178
bb327d5
7d611fb
a22a7be
44a52e5
fc74b2e
499e2fa
d2e1008
742b2ec
269dea6
b1dc48d
17fb96f
9f2c55b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,22 +2,10 @@ | |
import plotly.io as pio | ||
import narwhals.stable.v1 as nw | ||
import numpy as np | ||
import pandas as pd | ||
import polars as pl | ||
import pyarrow as pa | ||
import pytest | ||
from itertools import permutations | ||
|
||
constructors = ( | ||
pd.DataFrame, | ||
pl.DataFrame, | ||
pa.table, | ||
lambda d: pd.DataFrame(d).convert_dtypes("pyarrow"), | ||
lambda d: pd.DataFrame(d).convert_dtypes("numpy_nullable"), | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_scatter(constructor): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @FBruzzesi I'm not seeing the benefit to calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's so that in L14 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it, I see it now, thanks. I see a few tests further down, though, where it doesn't seem necessary -- what about the following?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am addressing those, thanks for spotting them! Replacing all with regex was definitly a mistake 🙈 Edit: |
||
data = px.data.iris().to_dict(orient="list") | ||
iris = nw.from_native(constructor(data)) | ||
|
@@ -29,7 +17,6 @@ def test_scatter(constructor): | |
assert fig.data[0].mode == "markers" | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_custom_data_scatter(constructor): | ||
data = px.data.iris().to_dict(orient="list") | ||
iris = nw.from_native(constructor(data)) | ||
|
@@ -80,7 +67,6 @@ def test_custom_data_scatter(constructor): | |
) | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_labels(constructor): | ||
data = px.data.tips().to_dict(orient="list") | ||
tips = nw.from_native(constructor(data)) | ||
|
@@ -106,7 +92,6 @@ def test_labels(constructor): | |
assert fig.layout.annotations[4].text.startswith("TIME") | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
@pytest.mark.parametrize( | ||
["extra_kwargs", "expected_mode"], | < 10000 /tr>||
[ | ||
|
@@ -129,7 +114,6 @@ def test_line_mode(constructor, extra_kwargs, expected_mode): | |
assert fig.data[0].mode == expected_mode | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_px_templates(constructor): | ||
try: | ||
import plotly.graph_objects as go | ||
|
@@ -300,7 +284,6 @@ def assert_orderings(constructor, days_order, days_check, times_order, times_che | |
assert trace.marker.color == color_sequence[i] | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
@pytest.mark.parametrize("days", permutations(["Sun", "Sat", "Fri", "x"])) | ||
@pytest.mark.parametrize("times", permutations(["Lunch", "x"])) | ||
def test_orthogonal_and_missing_orderings(constructor, days, times): | ||
|
@@ -309,7 +292,6 @@ def test_orthogonal_and_missing_orderings(constructor, days, times): | |
) | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
@pytest.mark.parametrize("days", permutations(["Sun", "Sat", "Fri", "Thur"])) | ||
@pytest.mark.parametrize("times", permutations(["Lunch", "Dinner"])) | ||
def test_orthogonal_orderings(constructor, days, times): | ||
|
@@ -322,7 +304,6 @@ def test_permissive_defaults(): | |
px.defaults.should_not_work = "test" | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_marginal_ranges(constructor): | ||
data = px.data.tips().to_dict(orient="list") | ||
df = nw.from_native(constructor(data)) | ||
|
@@ -339,7 +320,6 @@ def test_marginal_ranges(constructor): | |
assert fig.layout.yaxis3.range is None | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_render_mode(constructor): | ||
data = px.data.gapminder().to_dict(orient="list") | ||
df = nw.from_native(constructor(data)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,22 +2,10 @@ | |
import narwhals.stable.v1 as nw | ||
import numpy as np | ||
import pandas as pd | ||
import polars as pl | ||
import pyarrow as pa | ||
import pytest | ||
from collections import OrderedDict # an OrderedDict is needed for Python 2 | ||
|
||
|
||
constructors = ( | ||
pd.DataFrame, | ||
pl.DataFrame, | ||
pa.table, | ||
lambda d: pd.DataFrame(d).convert_dtypes("pyarrow"), | ||
lambda d: pd.DataFrame(d).convert_dtypes("numpy_nullable"), | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_skip_hover(constructor): | ||
data = px.data.iris().to_dict(orient="list") | ||
df = constructor(data) | ||
|
@@ -31,7 +19,6 @@ def test_skip_hover(constructor): | |
assert fig.data[0].hovertemplate == "species_id=%{marker.size}<extra></extra>" | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_hover_data_string_column(constructor): | ||
data = px.data.tips().to_dict(orient="list") | ||
df = constructor(data) | ||
|
@@ -44,7 +31,6 @@ def test_hover_data_string_column(constructor): | |
assert "sex" in fig.data[0].hovertemplate | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_composite_hover(constructor): | ||
data = px.data.tips().to_dict(orient="list") | ||
df = constructor(data) | ||
|
@@ -105,7 +91,6 @@ def test_newdatain_hover_data(): | |
) | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
104E8 def test_formatted_hover_and_labels(constructor): | ||
data = px.data.tips().to_dict(orient="list") | ||
df = constructor(data) | ||
|
@@ -191,7 +176,6 @@ def test_fail_wrong_column(): | |
) | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_sunburst_hoverdict_color(constructor): | ||
data = px.data.gapminder().query("year == 2007").to_dict(orient="list") | ||
df = constructor(data) | ||
|
@@ -205,10 +189,10 @@ def test_sunburst_hoverdict_color(constructor): | |
assert "color" in fig.data[0].hovertemplate | ||
|
||
|
||
@pytest.mark.parametrize("constructor", constructors) | ||
def test_date_in_hover(request, constructor): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @FBruzzesi What does this mean in practice, in terms of how we should document this limitation in the docs? Something like "Datetimes converted to strings for non-Pandas input dataframes will lose their timezone information"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ooh this is a really good point, thanks! I just tried this out, and indeed, there is a difference:
@FBruzzesi here's an example to test this with: df = pl.DataFrame({'price': [40, 50], 'datetime': [datetime(2020,1,1), datetime(2020,1,2)]})
df = df.with_columns(pl.col('datetime').dt.replace_time_zone('Asia/Kathmandu'))
px.scatter(df, x='datetime', y='price') then, for Polars, we see: whereas for pandas: I'll have to check the logic, but the way to get the local datetime in Narwhals would be |
||
if constructor in {pl.DataFrame, pa.table}: | ||
if "pyarrow_table" in str(constructor) or "polars_eager" in str(constructor): | ||
request.applymarker(pytest.mark.xfail) | ||
|
||
df = nw.from_native( | ||
constructor({"date": ["2015-04-04 19:31:30+01:00"], "value": [3]}) | ||
).with_columns(date=nw.col("date").str.to_datetime(format="%Y-%m-%d %H:%M:%S%z")) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if the constructor functions were modified to accept either a Pandas df or a dict? Then if a Pandas df is passed it could call
.to_dict()
on it. Would save a bit of boilerplate in the tests.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it makes sense. However let me mention that after our meeting today I started working to allow data(set) module to be dataframe agnostic. Different PR should be ready tomorrow and it may be a quick win to simplify data loading in tests as well. I will ping you there