8000 Merge branch 'master' into update-groupby · pandas-dev/pandas@8d26f5a · GitHub
[go: up one dir, main page]

Skip to content 6603

Commit 8d26f5a

Browse files
authored
Merge branch 'master' into update-groupby
2 parents fa2b56f + ef77b57 commit 8d26f5a

File tree

390 files changed

+16173
-18548
lines changed
  • ops
  • reshape
  • sparse
  • tools
  • util
  • window
  • io
  • plotting
  • tests
  • tseries
  • util
  • scripts
  • web
  • Some content is hidden

    Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

    390 files changed

    +16173
    -18548
    lines changed

    .pre-commit-config.yaml

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -15,3 +15,4 @@ repos:
    1515
    hooks:
    1616
    - id: isort
    1717
    language: python_venv
    18+
    exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$

    .travis.yml

    Lines changed: 11 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -30,6 +30,12 @@ matrix:
    3030
    - python: 3.5
    3131

    3232
    include:
    33+
    - dist: bionic
    34+
    # 18.04
    35+
    python: 3.8-dev
    36+
    env:
    37+
    - JOB="3.8-dev" PATTERN="(not slow and not network)"
    38+
    3339
    - dist: trusty
    3440
    env:
    3541
    - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
    @@ -71,24 +77,27 @@ before_install:
    7177
    # This overrides travis and tells it to look nowhere.
    7278
    - export BOTO_CONFIG=/dev/null
    7379

    80+
    7481
    install:
    7582
    - echo "install start"
    7683
    - ci/prep_cython_cache.sh
    7784
    - ci/setup_env.sh
    7885
    - ci/submit_cython_cache.sh
    7986
    - echo "install done"
    8087

    88+
    8189
    before_script:
    8290
    # display server (for clipboard functionality) needs to be started here,
    8391
    # does not work if done in install:setup_env.sh (GH-26103)
    8492
    - export DISPLAY=":99.0"
    8593
    - echo "sh -e /etc/init.d/xvfb start"
    86-
    - sh -e /etc/init.d/xvfb start
    94+
    - if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
    8795
    - sleep 3
    8896

    8997
    script:
    9098
    - echo "script start"
    91-
    - source activate pandas-dev
    99+
    - echo "$JOB"
    100+
    - if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
    92101
    - ci/run_tests.sh
    93102

    94103
    after_script:

    README.md

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -225,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
    225225

    226226
    All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
    227227

    228-
    A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
    228+
    A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
    229229

    230230
    If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
    231231

    asv_bench/benchmarks/categoricals.py

    Lines changed: 14 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -282,4 +282,18 @@ def time_sort_values(self):
    282282
    self.index.sort_values(ascending=False)
    283283

    284284

    285+
    class SearchSorted:
    286+
    def setup(self):
    287+
    N = 10 ** 5
    288+
    self.ci = tm.makeCategoricalIndex(N).sort_values()
    289+
    self.c = self.ci.values
    290+
    self.key = self.ci.categories[1]
    291+
    292+
    def time_categorical_index_contains(self):
    293+
    self.ci.searchsorted(self.key)
    294+
    295+
    def time_categorical_contains(self):
    296+
    self.c.searchsorted(self.key)
    297+
    298+
    285299
    from .pandas_vb_common import setup # noqa: F401 isort:skip

    asv_bench/benchmarks/ctors.py

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -67,7 +67,7 @@ class SeriesConstructors:
    6767
    def setup(self, data_fmt, with_index, dtype):
    6868
    if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
    6969
    raise NotImplementedError(
    70-
    "Series constructors do not support " "using generators with indexes"
    70+
    "Series constructors do not support using generators with indexes"
    7171
    )
    7272
    N = 10 ** 4
    7373
    if dtype == "float":

    asv_bench/benchmarks/eval.py

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -27,7 +27,7 @@ def time_add(self, engine, threads):
    2727

    2828
    def time_and(self, engine, threads):
    2929
    pd.eval(
    30-
    "(self.df > 0) & (self.df2 > 0) & " "(self.df3 > 0) & (self.df4 > 0)",
    30+
    "(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
    3131
    engine=engine,
    3232
    )
    3333

    asv_bench/benchmarks/frame_methods.py

    Lines changed: 11 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -609,4 +609,15 @@ def time_dataframe_describe(self):
    609609
    self.df.describe()
    610610

    611611

    612+
    class SelectDtypes:
    613+
    params = [100, 1000]
    614+
    param_names = ["n"]
    615+
    616+
    def setup(self, n):
    617+
    self.df = DataFrame(np.random.randn(10, n))
    618+
    619+
    def time_select_dtypes(self, n):
    620+
    self.df.select_dtypes(include="int")
    621+
    622+
    612623
    from .pandas_vb_common import setup # noqa: F401 isort:skip

    asv_bench/benchmarks/io/hdf.py

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -88,11 +88,11 @@ def time_write_store_table_dc(self):
    8888

    8989
    def time_query_store_table_wide(self):
    9090
    self.store.select(
    91-
    "table_wide", where="index > self.start_wide and " "index < self.stop_wide"
    91+
    "table_wide", where="index > self.start_wide and index < self.stop_wide"
    9292
    )
    9393

    9494
    def time_query_store_table(self):
    95-
    self.store.select("table", where="index > self.start and " "index < self.stop")
    95+
    self.store.select("table", where="index > self.start and index < self.stop")
    9696

    9797
    def time_store_repr(self):
    9898
    repr(self.store)

    asv_bench/benchmarks/join_merge.py

    Lines changed: 46 additions & 13 deletions
    Original file line numberDiff line numberDiff line change
    @@ -273,10 +273,10 @@ def time_merge_ordered(self):
    273273

    274274

    275275
    class MergeAsof:
    276-
    params = [["backward", "forward", "nearest"]]
    277-
    param_names = ["direction"]
    276+
    params = [["backward", "forward", "nearest"], [None, 5]]
    277+
    param_names = ["direction", "tolerance"]
    278278

    279-
    def setup(self, direction):
    279+
    def setup(self, direction, tolerance):
    280280
    one_count = 200000
    281281
    two_count = 1000000
    282282

    @@ -303,6 +303,9 @@ def setup(self, direction):
    303303
    df1["time32"] = np.int32(df1.time)
    304304
    df2["time32"] = np.int32(df2.time)
    305305

    306+
    df1["timeu64"] = np.uint64(df1.time)
    307+
    df2["timeu64"] = np.uint64(df2.time)
    308+
    306309
    self.df1a = df1[["time", "value1"]]
    307310
    self.df2a = df2[["time", "value2"]]
    308311
    self.df1b = df1[["time", "key", "value1"]]
    @@ -313,22 +316,52 @@ def setup(self, direction):
    313316
    self.df2d = df2[["time32", "value2"]]
    314317
    self.df1e = df1[["time", "key", "key2", "value1"]]
    315318
    self.df2e = df2[["time", "key", "key2", "value2"]]
    319+
    self.df1f = df1[["timeu64", "value1"]]
    320+
    self.df2f = df2[["timeu64", "value2"]]
    321+
    322+
    def time_on_int(self, direction, tolerance):
    323+
    merge_asof(
    324+
    self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance
    325+
    )
    316326

    317-
    def time_on_int(self, direction):
    318-
    merge_asof(self.df1a, self.df2a, on="time", direction=direction)
    327+
    def time_on_int32(self, direction, tolerance):
    328+
    merge_asof(
    329+
    self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance
    330+
    )
    319331

    320-
    def time_on_int32(self, direction):
    321-
    merge_asof(self.df1d, self.df2d, on="time32", direction=direction)
    332+
    def time_on_uint64(self, direction, tolerance):
    333+
    merge_asof(
    334+
    self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance
    335+
    )
    322336

    323-
    def time_by_object(self, direction):
    324-
    merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction)
    337+
    def time_by_object(self, direction, tolerance):
    338+
    merge_asof(
    339+
    self.df1b,
    340+
    self.df2b,
    341+
    on="time",
    342+
    by="key",
    343+
    direction=direction,
    344+
    tolerance=tolerance,
    345+
    )
    325346

    326-
    def time_by_int(self, direction):
    327-
    merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction)
    347+
    def time_by_int(self, direction, tolerance):
    348+
    merge_asof(
    349+
    self.df1c,
    350+
    self.df2c,
    351+
    on="time",
    352+
    by="key2",
    353+
    direction=direction,
    354+
    tolerance=tolerance,
    355+
    )
    328356

    329-
    def time_multiby(self, direction):
    357+
    def time_multiby(self, direction, tolerance):
    330358
    merge_asof(
    331-
    self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction
    359+
    self.df1e,
    360+
    self.df2e,
    361+
    on="time",
    362+
    by=["key", "key2"],
    363+
    direction=direction,
    364+
    tolerance=tolerance,
    332365
    )
    333366

    334367

    asv_bench/benchmarks/rolling.py

    Lines changed: 19 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -25,6 +25,25 @@ def peakmem_rolling(self, constructor, window, dtype, method):
    2525
    getattr(self.roll, method)()
    2626

    2727

    28+
    class Apply:
    29+
    params = (
    30+
    ["DataFrame", "Series"],
    31+
    [10, 1000],
    32+
    ["int", "float"],
    33+
    [sum, np.sum, lambda x: np.sum(x) + 5],
    34+
    [True, False],
    35+
    )
    36+
    param_names = ["contructor", "window", "dtype", "function", "raw"]
    37+
    38+
    def setup(self, constructor, window, dtype, function, raw):
    39+
    N = 10 ** 5
    40+
    arr = (100 * np.random.random(N)).astype(dtype)
    41+
    self.roll = getattr(pd, constructor)(arr).rolling(window)
    42+
    43+
    def time_rolling(self, constructor, window, dtype, function, raw):
    44+
    self.roll.apply(function, raw=raw)
    45+
    46+
    2847
    class ExpandingMethods:
    2948

    3049
    params = (

    0 commit comments

    Comments
     (0)
    0