pandas-dev
diff --git a/‎.github/actions/build_pandas/action.yml
Lines changed: 17 additions & 0 deletions b/‎.github/actions/build_pandas/action.yml
Lines changed: 17 additions & 0 deletions
diff --git a/‎.github/actions/setup/action.yml
Lines changed: 12 additions & 0 deletions b/‎.github/actions/setup/action.yml
Lines changed: 12 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml
Lines changed: 14 additions & 18 deletions b/‎.github/workflows/ci.yml
Lines changed: 14 additions & 18 deletions
diff --git a/‎.github/workflows/database.yml
Lines changed: 2 additions & 16 deletions b/‎.github/workflows/database.yml
Lines changed: 2 additions & 16 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 8 additions & 2 deletions b/‎.pre-commit-config.yaml
Lines changed: 8 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/categoricals.py
Lines changed: 18 additions & 1 deletion b/‎asv_bench/benchmarks/categoricals.py
Lines changed: 18 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/dtypes.py
Lines changed: 14 additions & 1 deletion b/‎asv_bench/benchmarks/dtypes.py
Lines changed: 14 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/gil.py
Lines changed: 2 additions & 2 deletions b/‎asv_bench/benchmarks/gil.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/hash_functions.py
Lines changed: 9 additions & 0 deletions b/‎asv_bench/benchmarks/hash_functions.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/period.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/period.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/rolling.py
Lines changed: 13 additions & 0 deletions b/‎asv_bench/benchmarks/rolling.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/timedelta.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/timedelta.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
39.9 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
39.9 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
2.18 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
2.18 KB
diff --git a/‎doc/source/development/code_style.rst
Lines changed: 40 additions & 0 deletions b/‎doc/source/development/code_style.rst
Lines changed: 40 additions & 0 deletions
diff --git a/‎doc/source/development/contributing.rst
Lines changed: 11 additions & 0 deletions b/‎doc/source/development/contributing.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎doc/source/development/extending.rst
Lines changed: 4 additions & 14 deletions b/‎doc/source/development/extending.rst
Lines changed: 4 additions & 14 deletions
diff --git a/‎doc/source/user_guide/dsintro.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/dsintro.rst
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,17 @@
+name: Build pandas
+description: Rebuilds the C extensions and installs pandas
+runs:
+  using: composite
+  steps:
+
+    - name: Environment Detail
+      run: |
+        conda info
+        conda list
+      shell: bash -l {0}
+
+    - name: Build Pandas
+      run: |
+        python setup.py build_ext -j 2
+        python -m pip install -e . --no-build-isolation --no-use-pep517
+      shell: bash -l {0}
@@ -0,0 +1,12 @@
+name: Set up pandas
+description: Runs all the setup steps required to have a built pandas ready to use
+runs:
+  using: composite
+  steps:
+    - name: Setting conda path
+      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
+      shell: bash -l {0}
+
+    - name: Setup environment and build pandas
+      run: ci/setup_env.sh
+      shell: bash -l {0}
@@ -41,15 +41,8 @@ jobs:
         environment-file: ${{ env.ENV_FILE }}
         use-only-tar-bz2: true
 
-    - name: Environment Detail
-      run: |
-        conda info
-        conda list
-
     - name: Build Pandas
-      run: |
-        python setup.py build_ext -j 2
-        python -m pip install -e . --no-build-isolation --no-use-pep517
+      uses: ./.github/actions/build_pandas
 
     - name: Linting
       run: ci/code_checks.sh lint
@@ -100,14 +93,11 @@ jobs:
     runs-on: ubuntu-latest
     steps:
 
-    - name: Setting conda path
-      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
-
     - name: Checkout
       uses: actions/checkout@v1
 
-    - name: Setup environment and build pandas
-      run: ci/setup_env.sh
+    - name: Set up pandas
+      uses: ./.github/actions/setup
 
     - name: Build website
       run: |
@@ -139,19 +129,25 @@ jobs:
       run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/dev
       if: github.event_name == 'push'
 
+    - name: Move docs into site directory
+      run: mv doc/build/html web/build/docs
+    - name: Save website as an artifact
+      uses: actions/upload-artifact@v2
+      with:
+        name: website
+        path: web/build
+        retention-days: 14
+
   data_manager:
     name: Test experimental data manager
     runs-on: ubuntu-latest
     steps:
 
-    - name: Setting conda path
-      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
-
     - name: Checkout
       uses: actions/checkout@v1
 
-    - name: Setup environment and build pandas
-      run: ci/setup_env.sh
+    - name: Set up pandas
+      uses: ./.github/actions/setup
 
     - name: Run tests
       run: |
 
@@ -72,15 +72,8 @@ jobs:
         environment-file: ${{ env.ENV_FILE }}
         use-only-tar-bz2: true
 
-    - name: Environment Detail
-      run: |
-        conda info
-        conda list
-
     - name: Build Pandas
-      run: |
-        python setup.py build_ext -j 2
-        python -m pip install -e . --no-build-isolation --no-use-pep517
+      uses: ./.github/actions/build_pandas
 
     - name: Test
       run: ci/run_tests.sh
@@ -158,15 +151,8 @@ jobs:
         environment-file: ${{ env.ENV_FILE }}
         use-only-tar-bz2: true
 
-    - name: Environment Detail
-      run: |
-        conda info
-        conda list
-
     - name: Build Pandas
-      run: |
-        python setup.py build_ext -j 2
-        python -m pip install -e . --no-build-isolation --no-use-pep517
+      uses: ./.github/actions/build_pandas
 
     - name: Test
       run: ci/run_tests.sh
 
@@ -24,12 +24,12 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.9.0
+    rev: v2.10.0
     hooks:
     -   id: pyupgrade
         args: [--py37-plus, --keep-runtime-typing]
 -   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.7.0
+    rev: v1.7.1
     hooks:
       - id: rst-backticks
       - id: rst-directive-colons
@@ -127,6 +127,12 @@ repos:
         types: [python]
         files: ^pandas/tests/
         exclude: ^pandas/tests/extension/
+    -   id: unwanted-patters-pytest-xfail
+        name: Check for use of pytest.xfail
+        entry: pytest\.xfail
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
     -   id: inconsistent-namespace-usage
         name: 'Check for inconsistent use of pandas namespace in tests'
         entry: python scripts/check_for_inconsistent_pandas_namespace.py
 
@@ -118,12 +118,29 @@ def setup(self):
         self.a = pd.Categorical(list("aabbcd") * N)
         self.b = pd.Categorical(list("bbcdjk") * N)
 
+        self.idx_a = pd.CategoricalIndex(range(N), range(N))
+        self.idx_b = pd.CategoricalIndex(range(N + 1), range(N + 1))
+        self.df_a = pd.DataFrame(range(N), columns=["a"], index=self.idx_a)
+        self.df_b = pd.DataFrame(range(N + 1), columns=["a"], index=self.idx_b)
+
     def time_concat(self):
         pd.concat([self.s, self.s])
 
     def time_union(self):
         union_categoricals([self.a, self.b])
 
+    def time_append_overlapping_index(self):
+        self.idx_a.append(self.idx_a)
+
+    def time_append_non_overlapping_index(self):
+        self.idx_a.append(self.idx_b)
+
+    def time_concat_overlapping_index(self):
+        pd.concat([self.df_a, self.df_a])
+
+    def time_concat_non_overlapping_index(self):
+        pd.concat([self.df_a, self.df_b])
+
 
 class ValueCounts:
 
@@ -306,7 +323,7 @@ def time_get_loc(self):
         self.index.get_loc(self.category)
 
     def time_shallow_copy(self):
-        self.index._shallow_copy()
+        self.index._view()
 
     def time_align(self):
         pd.DataFrame({"a": self.series, "b": self.series[:500]})
 
@@ -2,9 +2,10 @@
 
 import numpy as np
 
+import pandas as pd
 from pandas import DataFrame
 import pandas._testing as tm
-from pandas.api.types import pandas_dtype
+from pandas.api.types import is_extension_array_dtype, pandas_dtype
 
 from .pandas_vb_common import (
     datetime_dtypes,
@@ -119,4 +120,16 @@ def time_select_dtype_string_exclude(self, dtype):
         self.df_string.select_dtypes(exclude=dtype)
 
 
+class CheckDtypes:
+    def setup(self):
+        self.ext_dtype = pd.Int64Dtype()
+        self.np_dtype = np.dtype("int64")
+
+    def time_is_extension_array_dtype_true(self):
+        is_extension_array_dtype(self.ext_dtype)
+
+    def time_is_extension_array_dtype_false(self):
+        is_extension_array_dtype(self.np_dtype)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -1,7 +1,7 @@
 import numpy as np
 
 from pandas import DataFrame, Series, date_range, factorize, read_csv
-from pandas.core.algorithms import take_1d
+from pandas.core.algorithms import take_nd
 
 from .pandas_vb_common import tm
 
@@ -110,7 +110,7 @@ def setup(self, dtype):
 
         @test_parallel(num_threads=2)
         def parallel_take1d():
-            take_1d(df["col"].values, indexer)
+            take_nd(df["col"].values, indexer)
 
         self.parallel_take1d = parallel_take1d
 
 
@@ -25,6 +25,15 @@ def time_isin_outside(self, dtype, exponent):
         self.s.isin(self.values_outside)
 
 
+class UniqueForLargePyObjectInts:
+    def setup(self):
+        lst = [x << 32 for x in range(5000)]
+        self.arr = np.array(lst, dtype=np.object_)
+
+    def time_unique(self):
+        pd.unique(self.arr)
+
+
 class IsinWithRandomFloat:
     params = [
         [np.float64, np.object],
 
@@ -86,7 +86,7 @@ def time_get_loc(self):
         self.index.get_loc(self.period)
 
     def time_shallow_copy(self):
-        self.index._shallow_copy()
+        self.index._view()
 
     def time_series_loc(self):
         self.series.loc[self.period]
 
@@ -255,6 +255,19 @@ def time_rolling_multiindex_creation(self):
 
 class GroupbyEWM:
 
+    params = ["var", "std", "cov", "corr"]
+    param_names = ["method"]
 
+    def setup(self, method):
+        df = pd.DataFrame({"A": range(50), "B": range(50)})
+        self.gb_ewm = df.groupby("A").ewm(com=1.0)
+
+    def time_groupby_method(self, method):
+        getattr(self.gb_ewm, method)()
+
+
+class GroupbyEWMEngine:
+
     params = ["cython", "numba"]
     param_names = ["engine"]
 
 
@@ -74,7 +74,7 @@ def time_get_loc(self):
         self.index.get_loc(self.timedelta)
 
     def time_shallow_copy(self):
-        self.index._shallow_copy()
+        self.index._view()
 
     def time_series_loc(self):
         self.series.loc[self.timedelta]
 
@@ -161,6 +161,46 @@ For example:
     # wrong
     from common import test_base
 
+Testing
+=======
+
+Failing tests
+--------------
+
+See https://docs.pytest.org/en/latest/skipping.html for background.
+
+Do not use ``pytest.xfail``
+---------------------------
+
+Do not use this method. It has the same behavior as ``pytest.skip``, namely
+it immediately stops the test and does not check if the test will fail. If
+this is the behavior you desire, use ``pytest.skip`` instead.
+
+Using ``pytest.mark.xfail``
+---------------------------
+
+Use this method if a test is known to fail but the manner in which it fails
+is not meant to be captured. It is common to use this method for a test that
+exhibits buggy behavior or a non-implemented feature. If
+the failing test has flaky behavior, use the argument ``strict=False``. This
+will make it so pytest does not fail if the test happens to pass.
+
+Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param``
+over usage within a test so that the test is appropriately marked during the
+collection phase of pytest. For xfailing a test that involves multiple
+parameters, a fixture, or a combination of these, it is only possible to
+xfail during the testing phase. To do so, use the ``request`` fixture:
+
+.. code-block:: python
+
+    import pytest
+
+    def test_xfail(request):
+        request.node.add_marker(pytest.mark.xfail(reason="Indicate why here"))
+
+xfail is not to be used for tests involving failure due to invalid user arguments.
+For these tests, we need to verify the correct exception type and error message
+is being raised, using ``pytest.raises`` instead.
 
 Miscellaneous
 =============
 
@@ -629,6 +629,17 @@ the documentation are also built by Travis-CI. These docs are then hosted `here
 <https://pandas.pydata.org/docs/dev/>`__, see also
 the :ref:`Continuous Integration <contributing.ci>` section.
 
+Previewing changes
+------------------
+
+Once, the pull request is submitted, GitHub Actions will automatically build the
+documentation. To view the built site:
+
+#. Wait for the ``CI / Web and docs`` check to complete.
+#. Click ``Details`` next to it.
+#. From the ``Artifacts`` drop-down, click ``docs`` or ``website`` to download
+   the site as a ZIP file.
+
 .. _contributing.code:
 
 Contributing to the code base
 
@@ -329,21 +329,11 @@ Each data structure has several *constructor properties* for returning a new
 data structure as the result of an operation. By overriding these properties,
 you can retain subclasses through ``pandas`` data manipulations.
 
-There are 3 constructor properties to be defined:
+There are 3 possible constructor properties to be defined on a subclass:
 
-* ``_constructor``: Used when a manipulation result has the same dimensions as the original.
-* ``_constructor_sliced``: Used when a manipulation result has one lower dimension(s) as the original, such as ``DataFrame`` single columns slicing.
-* ``_constructor_expanddim``: Used when a manipulation result has one higher dimension as the original, such as ``Series.to_frame()``.
-
-Following table shows how ``pandas`` data structures define constructor properties by default.
-
-===========================  ======================= =============
-Property Attributes          ``Series``              ``DataFrame``
-===========================  ======================= =============
-``_constructor``             ``Series``              ``DataFrame``
-``_constructor_sliced``      ``NotImplementedError`` ``Series``
-``_constructor_expanddim``   ``DataFrame``           ``NotImplementedError``
-===========================  ======================= =============
+* ``DataFrame/Series._constructor``: Used when a manipulation result has the same dimension as the original.
+* ``DataFrame._constructor_sliced``: Used when a ``DataFrame`` (sub-)class manipulation result should be a ``Series`` (sub-)class.
+* ``Series._constructor_expanddim``: Used when a ``Series`` (sub-)class manipulation result should be a ``DataFrame`` (sub-)class, e.g. ``Series.to_frame()``.
 
 Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties.
 
 
@@ -126,7 +126,7 @@ However, operations such as slicing will also slice the index.
 .. note::
 
    We will address array-based indexing like ``s[[4, 3, 1]]``
-   in :ref:`section <indexing>`.
+   in :ref:`section on indexing <indexing>`.
 
 Like a NumPy array, a pandas Series has a :attr:`~Series.dtype`.