diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index 2a7601f196ec4..fd7c3587f2254 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -7,7 +7,7 @@ runs: shell: bash -el {0} - name: Publish test results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Test results path: test-data.xml @@ -19,7 +19,7 @@ runs: if: failure() - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: flags: unittests name: codecov-pandas diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index b667075e87144..8aa417c1d8fd4 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -13,7 +13,7 @@ runs: using: composite steps: - name: Install ${{ inputs.environment-file }} - uses: mamba-org/provision-with-micromamba@v12 + uses: mamba-org/provision-with-micromamba@v15 with: environment-file: ${{ inputs.environment-file }} environment-name: ${{ inputs.environment-name }} diff --git a/.github/workflows/32-bit-linux.yml b/.github/workflows/32-bit-linux.yml deleted file mode 100644 index 08026a5fd637f..0000000000000 --- a/.github/workflows/32-bit-linux.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: 32 Bit Linux - -on: - push: - branches: - - main - - 2.0.x - pull_request: - branches: - - main - - 2.0.x - paths-ignore: - - "doc/**" - -permissions: - contents: read - -jobs: - pytest: - runs-on: ubuntu-22.04 - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Run 32-bit manylinux2014 Docker Build / Tests - run: | - # Without this (line 34), versioneer will not be able to determine the pandas version. - # This is because of a security update to git that blocks it from reading the config folder if - # it is not owned by the current user. We hit this since the "mounted" folder is not hit by the - # Docker container. - # xref https://github.com/pypa/manylinux/issues/1309 - docker pull quay.io/pypa/manylinux2014_i686 - docker run --platform linux/386 -v $(pwd):/pandas quay.io/pypa/manylinux2014_i686 \ - /bin/bash -xc "cd pandas && \ - git config --global --add safe.directory /pandas && \ - /opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \ - . ~/virtualenvs/pandas-dev/bin/activate && \ - python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \ - python -m pip install versioneer[toml] && \ - python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \ - python setup.py build_ext -q -j1 && \ - python -m pip install --no-build-isolation --no-use-pep517 -e . && \ - python -m pip list && \ - export PANDAS_CI=1 && \ - pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml" - - - name: Publish test results for Python 3.8-32 bit full Linux - uses: actions/upload-artifact@v3 - with: - name: Test results - path: test-data.xml - if: failure() - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit - cancel-in-progress: true diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml deleted file mode 100644 index fd6560d61b160..0000000000000 --- a/.github/workflows/macos-windows.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Windows-macOS - -on: - push: - branches: - - main - - 2.0.x - pull_request: - branches: - - main - - 2.0.x - paths-ignore: - - "doc/**" - -env: - PANDAS_CI: 1 - PYTEST_TARGET: pandas - PATTERN: "not slow and not db and not network and not single_cpu" - -permissions: - contents: read - -jobs: - pytest: - defaults: - run: - shell: bash -el {0} - timeout-minutes: 180 - strategy: - matrix: - os: [macos-latest, windows-latest] - env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] - fail-fast: false - runs-on: ${{ matrix.os }} - name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }} - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }} - cancel-in-progress: true - env: - # GH 47443: PYTEST_WORKERS > 1 crashes Windows builds with memory related errors - PYTEST_WORKERS: ${{ matrix.os == 'macos-latest' && 'auto' || '1' }} - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Conda - uses: ./.github/actions/setup-conda - with: - environment-file: ci/deps/${{ matrix.env_file }} - - - name: Build Pandas - uses: ./.github/actions/build_pandas - - - name: Test - uses: ./.github/actions/run-tests diff --git a/.github/workflows/package-checks.yml b/.github/workflows/package-checks.yml index fa1b5e5d4fba3..6f1fa771a7854 100644 --- a/.github/workflows/package-checks.yml +++ b/.github/workflows/package-checks.yml @@ -14,6 +14,10 @@ on: permissions: contents: read +defaults: + run: + shell: bash -el {0} + jobs: pip: if: ${{ github.event.label.name == 'Build' || contains(github.event.pull_request.labels.*.name, 'Build') || github.event_name == 'push'}} @@ -44,9 +48,39 @@ jobs: run: | python -m pip install --upgrade pip setuptools wheel python-dateutil pytz numpy cython python -m pip install versioneer[toml] - shell: bash -el {0} - name: Pip install with extra - run: | - python -m pip install -e .[${{ matrix.extra }}] --no-build-isolation - shell: bash -el {0} + run: python -m pip install -e .[${{ matrix.extra }}] --no-build-isolation + conda_forge_recipe: + if: ${{ github.event.label.name == 'Build' || contains(github.event.pull_request.labels.*.name, 'Build') || github.event_name == 'push'}} + runs-on: ubuntu-22.04 + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11'] + fail-fast: false + name: Test Conda Forge Recipe - Python ${{ matrix.python-version }} + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-conda-forge-recipe-${{ matrix.python-version }} + cancel-in-progress: true + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python + uses: mamba-org/provision-with-micromamba@v15 + with: + environment-file: false + environment-name: recipe-test + extra-specs: | + python=${{ matrix.python-version }} + boa + conda-verify + channels: conda-forge + cache-downloads: true + cache-env: true + + - name: Build conda package + run: conda mambabuild ci --no-anaconda-upload --verify --strict-verify --output --output-folder . diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml deleted file mode 100644 index 39b0439e2d6f4..0000000000000 --- a/.github/workflows/python-dev.yml +++ /dev/null @@ -1,95 +0,0 @@ -# This workflow may or may not run depending on the state of the next -# unreleased Python version. DO NOT DELETE IT. -# -# In general, this file will remain frozen(present, but not running) until: -# - The next unreleased Python version has released beta 1 -# - This version should be available on GitHub Actions. -# - Our required build/runtime dependencies(numpy, pytz, Cython, python-dateutil) -# support that unreleased Python version. -# To unfreeze, comment out the ``if: false`` condition, and make sure you update -# the name of the workflow and Python version in actions/setup-python to: '3.12-dev' -# -# After it has been unfrozen, this file should remain unfrozen(present, and running) until: -# - The next Python version has been officially released. -# OR -# - Most/All of our optional dependencies support Python 3.11 AND -# - The next Python version has released a rc(we are guaranteed a stable ABI). -# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs -# to the corresponding posix/windows-macos/sdist etc. workflows. -# Feel free to modify this comment as necessary. - -name: Python Dev - -on: - push: - branches: - - main - - 2.0.x - - None - pull_request: - branches: - - main - - 2.0.x - - None - paths-ignore: - - "doc/**" - -env: - PYTEST_WORKERS: "auto" - PANDAS_CI: 1 - PATTERN: "not slow and not network and not clipboard and not single_cpu" - COVERAGE: true - PYTEST_TARGET: pandas - -permissions: - contents: read - -jobs: - build: - if: false # Uncomment this to freeze the workflow, comment it to unfreeze - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-22.04, macOS-latest, windows-latest] - - name: actions-311-dev - timeout-minutes: 120 - - concurrency: - #https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev - cancel-in-progress: true - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Python Dev Version - uses: actions/setup-python@v4 - with: - python-version: '3.11-dev' - - - name: Install dependencies - run: | - python --version - python -m pip install --upgrade pip setuptools wheel - python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy - python -m pip install git+https://github.com/nedbat/coveragepy.git - python -m pip install versioneer[toml] - python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 - python -m pip list - - # GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs - - name: Build Pandas - run: | - python setup.py build_ext -q -j1 - python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index - - - name: Build Version - run: | - python -c "import pandas; pandas.show_versions();" - - - name: Test - uses: ./.github/actions/run-tests diff --git a/.github/workflows/sdist.yml b/.github/workflows/sdist.yml deleted file mode 100644 index 284935d7051e1..0000000000000 --- a/.github/workflows/sdist.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: sdist - -on: - push: - branches: - - main - - 2.0.x - pull_request: - branches: - - main - - 2.0.x - types: [labeled, opened, synchronize, reopened] - paths-ignore: - - "doc/**" - -permissions: - contents: read - -jobs: - build: - if: ${{ github.event.label.name == 'Build' || contains(github.event.pull_request.labels.*.name, 'Build') || github.event_name == 'push'}} - runs-on: ubuntu-22.04 - timeout-minutes: 60 - defaults: - run: - shell: bash -el {0} - - strategy: - fail-fast: false - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{matrix.python-version}}-sdist - cancel-in-progress: true - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel - python -m pip install versioneer[toml] - - # GH 39416 - pip install numpy - - - name: Build pandas sdist - run: | - pip list - python setup.py sdist --formats=gztar - - - name: Upload sdist artifact - uses: actions/upload-artifact@v3 - with: - name: ${{matrix.python-version}}-sdist.gz - path: dist/*.gz - - - name: Set up Conda - uses: ./.github/actions/setup-conda - with: - environment-file: false - environment-name: pandas-sdist - extra-specs: | - python =${{ matrix.python-version }} - - - name: Install pandas from sdist - run: | - pip list - python -m pip install dist/*.gz - - - name: Force oldest supported NumPy - run: | - case "${{matrix.python-version}}" in - 3.8) - pip install numpy==1.20.3 ;; - 3.9) - pip install numpy==1.20.3 ;; - 3.10) - pip install numpy==1.21.2 ;; - 3.11) - pip install numpy==1.23.2 ;; - esac - - - name: Import pandas - run: | - cd .. - python -c "import pandas; pandas.show_versions();" diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml deleted file mode 100644 index 7bffd066b7e64..0000000000000 --- a/.github/workflows/ubuntu.yml +++ /dev/null @@ -1,167 +0,0 @@ -name: Ubuntu - -on: - push: - branches: - - main - - 2.0.x - pull_request: - branches: - - main - - 2.0.x - paths-ignore: - - "doc/**" - -permissions: - contents: read - -jobs: - pytest: - runs-on: ubuntu-22.04 - defaults: - run: - shell: bash -el {0} - timeout-minutes: 180 - strategy: - matrix: - env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] - # Prevent the include jobs from overriding other jobs - pattern: [""] - include: - - name: "Downstream Compat" - env_file: actions-38-downstream_compat.yaml - pattern: "not slow and not network and not single_cpu" - pytest_target: "pandas/tests/test_downstream.py" - - name: "Minimum Versions" - env_file: actions-38-minimum_versions.yaml - pattern: "not slow and not network and not single_cpu" - - name: "Locale: it_IT" - env_file: actions-38.yaml - pattern: "not slow and not network and not single_cpu" - extra_apt: "language-pack-it" - # Use the utf8 version as the default, it has no bad side-effect. - lang: "it_IT.utf8" - lc_all: "it_IT.utf8" - # Also install it_IT (its encoding is ISO8859-1) but do not activate it. - # It will be temporarily activated during tests with locale.setlocale - extra_loc: "it_IT" - - name: "Locale: zh_CN" - env_file: actions-38.yaml - pattern: "not slow and not network and not single_cpu" - extra_apt: "language-pack-zh-hans" - # Use the utf8 version as the default, it has no bad side-effect. - lang: "zh_CN.utf8" - lc_all: "zh_CN.utf8" - # Also install zh_CN (its encoding is gb2312) but do not activate it. - # It will be temporarily activated during tests with locale.setlocale - extra_loc: "zh_CN" - - name: "Copy-on-Write" - env_file: actions-310.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "1" - - name: "Pypy" - env_file: actions-pypy-38.yaml - pattern: "not slow and not network and not single_cpu" - test_args: "--max-worker-restart 0" - - name: "Numpy Dev" - env_file: actions-310-numpydev.yaml - pattern: "not slow and not network and not single_cpu" - test_args: "-W error::DeprecationWarning -W error::FutureWarning" - # TODO(cython3): Re-enable once next-beta(after beta 1) comes out - # There are some warnings failing the build with -werror - pandas_ci: "0" - - name: "Pyarrow Nightly" - env_file: actions-311-pyarrownightly.yaml - pattern: "not slow and not network and not single_cpu" - fail-fast: false - name: ${{ matrix.name || matrix.env_file }} - env: - ENV_FILE: ci/deps/${{ matrix.env_file }} - PATTERN: ${{ matrix.pattern }} - EXTRA_APT: ${{ matrix.extra_apt || '' }} - LANG: ${{ matrix.lang || '' }} - LC_ALL: ${{ matrix.lc_all || '' }} - PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} - PANDAS_CI: ${{ matrix.pandas_ci || '1' }} - TEST_ARGS: ${{ matrix.test_args || '' }} - PYTEST_WORKERS: 'auto' - PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} - concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }} - cancel-in-progress: true - - services: - mysql: - image: mysql - env: - MYSQL_ALLOW_EMPTY_PASSWORD: yes - MYSQL_DATABASE: pandas - options: >- - --health-cmd "mysqladmin ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 3306:3306 - - postgres: - image: postgres - env: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: pandas - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 5432:5432 - - moto: - image: motoserver/moto:4.1.4 - env: - AWS_ACCESS_KEY_ID: foobar_key - AWS_SECRET_ACCESS_KEY: foobar_secret - ports: - - 5000:5000 - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Extra installs - # xsel for clipboard tests - run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }} - - - name: Generate extra locales - # These extra locales will be available for locale.setlocale() calls in tests - run: | - sudo locale-gen ${{ matrix.extra_loc }} - if: ${{ matrix.extra_loc }} - - - name: Set up Conda - uses: ./.github/actions/setup-conda - with: - environment-file: ${{ env.ENV_FILE }} - - - name: Build Pandas - id: build - uses: ./.github/actions/build_pandas - - - name: Test (not single_cpu) - uses: ./.github/actions/run-tests - if: ${{ matrix.name != 'Pypy' }} - env: - # Set pattern to not single_cpu if not already set - PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} - - - name: Test (single_cpu) - uses: ./.github/actions/run-tests - env: - PATTERN: 'single_cpu' - PYTEST_WORKERS: 1 - if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 0000000000000..31e2095624347 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,315 @@ +name: Unit Tests + +on: + push: + branches: + - main + - 2.0.x + pull_request: + branches: + - main + - 2.0.x + paths-ignore: + - "doc/**" + - "web/**" + +permissions: + contents: read + +defaults: + run: + shell: bash -el {0} + +jobs: + ubuntu: + runs-on: ubuntu-22.04 + timeout-minutes: 180 + strategy: + matrix: + env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] + # Prevent the include jobs from overriding other jobs + pattern: [""] + include: + - name: "Downstream Compat" + env_file: actions-38-downstream_compat.yaml + pattern: "not slow and not network and not single_cpu" + pytest_target: "pandas/tests/test_downstream.py" + - name: "Minimum Versions" + env_file: actions-38-minimum_versions.yaml + pattern: "not slow and not network and not single_cpu" + - name: "Locale: it_IT" + env_file: actions-38.yaml + pattern: "not slow and not network and not single_cpu" + extra_apt: "language-pack-it" + # Use the utf8 version as the default, it has no bad side-effect. + lang: "it_IT.utf8" + lc_all: "it_IT.utf8" + # Also install it_IT (its encoding is ISO8859-1) but do not activate it. + # It will be temporarily activated during tests with locale.setlocale + extra_loc: "it_IT" + - name: "Locale: zh_CN" + env_file: actions-38.yaml + pattern: "not slow and not network and not single_cpu" + extra_apt: "language-pack-zh-hans" + # Use the utf8 version as the default, it has no bad side-effect. + lang: "zh_CN.utf8" + lc_all: "zh_CN.utf8" + # Also install zh_CN (its encoding is gb2312) but do not activate it. + # It will be temporarily activated during tests with locale.setlocale + extra_loc: "zh_CN" + - name: "Copy-on-Write" + env_file: actions-310.yaml + pattern: "not slow and not network and not single_cpu" + pandas_copy_on_write: "1" + - name: "Pypy" + env_file: actions-pypy-38.yaml + pattern: "not slow and not network and not single_cpu" + test_args: "--max-worker-restart 0" + - name: "Numpy Dev" + env_file: actions-310-numpydev.yaml + pattern: "not slow and not network and not single_cpu" + test_args: "-W error::DeprecationWarning -W error::FutureWarning" + # TODO(cython3): Re-enable once next-beta(after beta 1) comes out + # There are some warnings failing the build with -werror + pandas_ci: "0" + - name: "Pyarrow Nightly" + env_file: actions-311-pyarrownightly.yaml + pattern: "not slow and not network and not single_cpu" + fail-fast: false + name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }} + env: + ENV_FILE: ci/deps/${{ matrix.env_file }} + PATTERN: ${{ matrix.pattern }} + EXTRA_APT: ${{ matrix.extra_apt || '' }} + LANG: ${{ matrix.lang || '' }} + LC_ALL: ${{ matrix.lc_all || '' }} + PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} + PANDAS_CI: ${{ matrix.pandas_ci || '1' }} + TEST_ARGS: ${{ matrix.test_args || '' }} + PYTEST_WORKERS: 'auto' + PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }} + cancel-in-progress: true + + services: + mysql: + image: mysql + env: + MYSQL_ALLOW_EMPTY_PASSWORD: yes + MYSQL_DATABASE: pandas + options: >- + --health-cmd "mysqladmin ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 3306:3306 + + postgres: + image: postgres + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: pandas + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + moto: + image: motoserver/moto:4.1.4 + env: + AWS_ACCESS_KEY_ID: foobar_key + AWS_SECRET_ACCESS_KEY: foobar_secret + ports: + - 5000:5000 + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Extra installs + # xsel for clipboard tests + run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }} + + - name: Generate extra locales + # These extra locales will be available for locale.setlocale() calls in tests + run: | + sudo locale-gen ${{ matrix.extra_loc }} + if: ${{ matrix.extra_loc }} + + - name: Set up Conda + uses: ./.github/actions/setup-conda + with: + environment-file: ${{ env.ENV_FILE }} + + - name: Build Pandas + id: build + uses: ./.github/actions/build_pandas + + - name: Test (not single_cpu) + uses: ./.github/actions/run-tests + if: ${{ matrix.name != 'Pypy' }} + env: + # Set pattern to not single_cpu if not already set + PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }} + + - name: Test (single_cpu) + uses: ./.github/actions/run-tests + env: + PATTERN: 'single_cpu' + PYTEST_WORKERS: 1 + if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}} + + macos-windows: + timeout-minutes: 180 + strategy: + matrix: + os: [macos-latest, windows-latest] + env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] + fail-fast: false + runs-on: ${{ matrix.os }} + name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }} + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }} + cancel-in-progress: true + env: + PANDAS_CI: 1 + PYTEST_TARGET: pandas + PATTERN: "not slow and not db and not network and not single_cpu" + # GH 47443: PYTEST_WORKERS > 1 crashes Windows builds with memory related errors + PYTEST_WORKERS: ${{ matrix.os == 'macos-latest' && 'auto' || '1' }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Conda + uses: ./.github/actions/setup-conda + with: + environment-file: ci/deps/${{ matrix.env_file }} + + - name: Build Pandas + uses: ./.github/actions/build_pandas + + - name: Test + uses: ./.github/actions/run-tests + + Linux-32-bit: + runs-on: ubuntu-22.04 + container: + image: quay.io/pypa/manylinux2014_i686 + options: --platform linux/386 + steps: + - name: Checkout pandas Repo + # actions/checkout does not work since it requires node + run: | + git config --global --add safe.directory $PWD + + if [ $GITHUB_EVENT_NAME != pull_request ]; then + git clone --recursive --branch=$GITHUB_REF_NAME https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE + git reset --hard $GITHUB_SHA + else + git clone --recursive https://github.com/${GITHUB_REPOSITORY}.git $GITHUB_WORKSPACE + git fetch origin $GITHUB_REF:my_ref_name + git checkout $GITHUB_BASE_REF + git -c user.email="you@example.com" merge --no-commit my_ref_name + fi + - name: Build environment and Run Tests + run: | + /opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev + . ~/virtualenvs/pandas-dev/bin/activate + python -m pip install --no-cache-dir --no-deps -U pip wheel setuptools + python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1 + python setup.py build_ext -q -j$(nproc) + python -m pip install --no-cache-dir --no-build-isolation --no-use-pep517 -e . + python -m pip list + export PANDAS_CI=1 + python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit + cancel-in-progress: true + + python-dev: + # This job may or may not run depending on the state of the next + # unreleased Python version. DO NOT DELETE IT. + # + # In general, this will remain frozen(present, but not running) until: + # - The next unreleased Python version has released beta 1 + # - This version should be available on GitHub Actions. + # - Our required build/runtime dependencies(numpy, pytz, Cython, python-dateutil) + # support that unreleased Python version. + # To unfreeze, comment out the ``if: false`` condition, and make sure you update + # the name of the workflow and Python version in actions/setup-python ``python-version:`` + # + # After it has been unfrozen, this file should remain unfrozen(present, and running) until: + # - The next Python version has been officially released. + # OR + # - Most/All of our optional dependencies support the next Python version AND + # - The next Python version has released a rc(we are guaranteed a stable ABI). + # To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs + # to the corresponding posix/windows-macos/sdist etc. workflows. + # Feel free to modify this comment as necessary. + if: false # Uncomment this to freeze the workflow, comment it to unfreeze + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, macOS-latest, windows-latest] + + timeout-minutes: 180 + + concurrency: + #https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev + cancel-in-progress: true + + env: + PYTEST_WORKERS: "auto" + PANDAS_CI: 1 + PATTERN: "not slow and not network and not clipboard and not single_cpu" + COVERAGE: true + PYTEST_TARGET: pandas + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python Dev Version + uses: actions/setup-python@v4 + with: + python-version: '3.11-dev' + + - name: Install dependencies + run: | + python --version + python -m pip install --upgrade pip setuptools wheel + python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy + python -m pip install git+https://github.com/nedbat/coveragepy.git + python -m pip install versioneer[toml] + python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 + python -m pip list + + - name: Build Pandas + run: | + python setup.py build_ext -q -j4 + python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index + + - name: Build Version + run: | + python -c "import pandas; pandas.show_versions();" + + - name: Test + uses: ./.github/actions/run-tests diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f0d422b01e0c8..c9e44fae43669 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -23,7 +23,10 @@ on: - cron: "27 3 */1 * *" push: pull_request: - types: [labeled, opened, synchronize, reopened] + types: [labeled, opened, synchronize, reopened] + paths-ignore: + - "doc/**" + - "web/**" workflow_dispatch: concurrency: @@ -71,7 +74,7 @@ jobs: fetch-depth: 0 - name: Build wheels - uses: pypa/cibuildwheel@v2.12.1 + uses: pypa/cibuildwheel@v2.12.3 env: CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml index 7bc71483be34a..8f309b0781457 100644 --- a/ci/deps/circle-38-arm64.yaml +++ b/ci/deps/circle-38-arm64.yaml @@ -33,7 +33,8 @@ dependencies: - jinja2 - lxml - matplotlib>=3.6.1, <3.7.0 - - numba + # test_numba_vs_cython segfaults with numba 0.57 + - numba>=0.55.2, <0.57.0 - numexpr - openpyxl<3.1.1 - odfpy diff --git a/ci/meta.yaml b/ci/meta.yaml new file mode 100644 index 0000000000000..f02c7eec001fc --- /dev/null +++ b/ci/meta.yaml @@ -0,0 +1,93 @@ +{% set version = "2.0.1" %} + +package: + name: pandas + version: {{ version }} + +source: + git_url: ../.. + +build: + number: 1 + script: + - export PYTHONUNBUFFERED=1 # [ppc64le] + - {{ PYTHON }} -m pip install -vv --no-deps --ignore-installed . # [not unix] + - {{ PYTHON }} -m pip install -vv --no-deps --ignore-installed . --global-option="build_ext" --global-option="-j4" --no-use-pep517 # [unix] + skip: true # [py<39] + +requirements: + build: + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - {{ compiler('c') }} + - {{ compiler('cxx') }} + host: + - python + - pip + - setuptools >=61.0.0 + - cython >=0.29.33,<3 + - numpy >=1.21.6 # [py<311] + - numpy >=1.23.2 # [py>=311] + - versioneer + - tomli # [py<311] + run: + - python + - {{ pin_compatible('numpy') }} + - python-dateutil >=2.8.2 + - pytz >=2020.1 + - python-tzdata >=2022.1 + +test: + imports: + - pandas + commands: + - pip check + # Skip test suite on PyPy as it segfaults there + # xref: https://github.com/conda-forge/pandas-feedstock/issues/148 + # + # Also skip `test_rolling_var_numerical_issues` on `ppc64le` as it is a known test failure. + # xref: https://github.com/conda-forge/pandas-feedstock/issues/149 + {% set markers = ["not clipboard", "not single_cpu", "not db", "not network", "not slow"] %} + {% set markers = markers + ["not arm_slow"] %} # [aarch64 or ppc64le] + {% set extra_args = ["-n=2 -m " + " and ".join(markers)] %} + {% set tests_to_skip = "_not_a_real_test" %} + {% set tests_to_skip = tests_to_skip + " or test_rolling_var_numerical_issues" %} # [ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_std_timedelta64_skipna_false" %} # [ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_value_counts_normalized[M8[ns]]" %} # [ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_to_datetime_format_YYYYMMDD_with_nat" %} # [ppc64le] + {% set tests_to_skip = tests_to_skip + " or (TestReductions and test_median_2d)" %} # [ppc64le] + {% set extra_args = extra_args + ["-k", "not (" + tests_to_skip + ")"] %} + - python -c "import pandas; pandas.test(extra_args={{ extra_args }})" # [python_impl == "cpython"] + requires: + - pip + - pytest >=7.0.0 + - pytest-asyncio >=0.17.0 + - pytest-xdist >=2.2.0 + - pytest-cov + - hypothesis >=6.46.1 + - tomli # [py<311] + +about: + home: http://pandas.pydata.org + license: BSD-3-Clause + license_file: LICENSE + summary: Powerful data structures for data analysis, time series, and statistics + doc_url: https://pandas.pydata.org/docs/ + dev_url: https://github.com/pandas-dev/pandas + +extra: + recipe-maintainers: + - jreback + - jorisvandenbossche + - msarahan + - ocefpaf + - TomAugspurger + - WillAyd + - simonjayhawkins + - mroeschke + - datapythonista + - phofl + - lithomas1 + - marcogorelli diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 79596c946c068..15ea0066156b5 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -520,7 +520,7 @@ "\n", "*New in version 1.2.0*\n", "\n", - "The [.set_td_classes()][tdclass] method accepts a DataFrame with matching indices and columns to the underlying [Styler][styler]'s DataFrame. That DataFrame will contain strings as css-classes to add to individual data cells: the `` elements of the ``. Rather than use external CSS we will create our classes internally and add them to table style. We will save adding the borders until the [section on tooltips](#Tooltips).\n", + "The [.set_td_classes()][tdclass] method accepts a DataFrame with matching indices and columns to the underlying [Styler][styler]'s DataFrame. That DataFrame will contain strings as css-classes to add to individual data cells: the `
` elements of the ``. Rather than use external CSS we will create our classes internally and add them to table style. We will save adding the borders until the [section on tooltips](#Tooltips-and-Captions).\n", "\n", "[tdclass]: ../reference/api/pandas.io.formats.style.Styler.set_td_classes.rst\n", "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst" diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index e08fa81c5fa09..4e7733e25fa88 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -89,6 +89,7 @@ For example, a `weighted mean str | None: output_format.append(tokens[i]) + # if am/pm token present, replace 24-hour %H, with 12-hour %I + if "%p" in output_format and "%H" in output_format: + i = output_format.index("%H") + output_format[i] = "%I" + guessed_format = "".join(output_format) try: diff --git a/pandas/core/_numba/kernels/mean_.py b/pandas/core/_numba/kernels/mean_.py index 725989e093441..35a6e688d01d1 100644 --- a/pandas/core/_numba/kernels/mean_.py +++ b/pandas/core/_numba/kernels/mean_.py @@ -100,7 +100,7 @@ def sliding_mean( neg_ct, compensation_add, num_consecutive_same_value, - prev_value, + prev_value, # pyright: ignore[reportGeneralTypeIssues] ) else: for j in range(start[i - 1], s): @@ -125,7 +125,7 @@ def sliding_mean( neg_ct, compensation_add, num_consecutive_same_value, - prev_value, + prev_value, # pyright: ignore[reportGeneralTypeIssues] ) if nobs >= min_periods and nobs > 0: diff --git a/pandas/core/_numba/kernels/sum_.py b/pandas/core/_numba/kernels/sum_.py index 056897189fe67..eb8846b1fa50a 100644 --- a/pandas/core/_numba/kernels/sum_.py +++ b/pandas/core/_numba/kernels/sum_.py @@ -92,7 +92,7 @@ def sliding_sum( sum_x, compensation_add, num_consecutive_same_value, - prev_value, + prev_value, # pyright: ignore[reportGeneralTypeIssues] ) else: for j in range(start[i - 1], s): @@ -115,7 +115,7 @@ def sliding_sum( sum_x, compensation_add, num_consecutive_same_value, - prev_value, + prev_value, # pyright: ignore[reportGeneralTypeIssues] ) if nobs == 0 == min_periods: diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py index d3243f4928dca..2c4559ddc2121 100644 --- a/pandas/core/_numba/kernels/var_.py +++ b/pandas/core/_numba/kernels/var_.py @@ -110,7 +110,7 @@ def sliding_var( ssqdm_x, compensation_add, num_consecutive_same_value, - prev_value, + prev_value, # pyright: ignore[reportGeneralTypeIssues] ) else: for j in range(start[i - 1], s): @@ -135,7 +135,7 @@ def sliding_var( ssqdm_x, compensation_add, num_consecutive_same_value, - prev_value, + prev_value, # pyright: ignore[reportGeneralTypeIssues] ) if nobs >= min_periods and nobs > ddof: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 29009c1627cfb..d312612cdc680 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -35,6 +35,7 @@ from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, infer_dtype_from_array, + np_find_common_type, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -522,7 +523,7 @@ def f(c, v): f = np.in1d else: - common = np.find_common_type([values.dtype, comps_array.dtype], []) + common = np_find_common_type(values.dtype, comps_array.dtype) values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) f = htable.ismember diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index dd8484050ef89..445ec36135d5f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -53,6 +53,7 @@ is_object_dtype, is_scalar, ) +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna from pandas.core import roperator @@ -168,6 +169,8 @@ def to_pyarrow_type( return dtype.pyarrow_dtype elif isinstance(dtype, pa.DataType): return dtype + elif isinstance(dtype, DatetimeTZDtype): + return pa.timestamp(dtype.unit, dtype.tz) elif dtype: try: # Accepts python types too @@ -247,6 +250,16 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal Construct a new ExtensionArray from a sequence of scalars. """ pa_dtype = to_pyarrow_type(dtype) + if ( + isinstance(scalars, np.ndarray) + and isinstance(dtype, ArrowDtype) + and ( + pa.types.is_large_binary(pa_dtype) or pa.types.is_large_string(pa_dtype) + ) + ): + # See https://github.com/apache/arrow/issues/35289 + scalars = scalars.tolist() + if isinstance(scalars, cls): scalars = scalars._data elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)): @@ -259,7 +272,10 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal # GH50430: let pyarrow infer type, then cast scalars = pa.array(scalars, from_pandas=True) if pa_dtype: - scalars = scalars.cast(pa_dtype) + if pa.types.is_dictionary(pa_dtype): + scalars = scalars.dictionary_encode() + else: + scalars = scalars.cast(pa_dtype) arr = cls(scalars) if pa.types.is_duration(scalars.type) and scalars.null_count > 0: # GH52843: upstream bug for duration types when originally @@ -858,7 +874,10 @@ def factorize( else: data = self._data - encoded = data.dictionary_encode(null_encoding=null_encoding) + if pa.types.is_dictionary(data.type): + encoded = data + else: + encoded = data.dictionary_encode(null_encoding=null_encoding) if encoded.length() == 0: indices = np.array([], dtype=np.intp) uniques = type(self)(pa.chunked_array([], type=encoded.type.value_type)) @@ -2151,6 +2170,11 @@ def _dt_round( return self._round_temporally("round", freq, ambiguous, nonexistent) def _dt_to_pydatetime(self): + if pa.types.is_date(self.dtype.pyarrow_dtype): + raise ValueError( + f"to_pydatetime cannot be called with {self.dtype.pyarrow_dtype} type. " + "Convert to pyarrow timestamp type." + ) data = self._data.to_pylist() if self._dtype.pyarrow_dtype.unit == "ns": data = [None if ts is None else ts.to_pydatetime(warn=False) for ts in data] diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 20a9902f3bc90..7d4fbb788cc9c 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -140,6 +140,8 @@ def type(self): elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type): return list elif pa.types.is_map(pa_type): + return list + elif pa.types.is_struct(pa_type): return dict elif pa.types.is_null(pa_type): # TODO: None? pd.NA? pa.null? diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 843e9be6de14a..8545cd1499b5e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2010,7 +2010,7 @@ def _round(self, freq, mode, ambiguous, nonexistent): nanos = delta_to_nanoseconds(offset, self._creso) if nanos == 0: # GH 52761 - return self + return self.copy() result_i8 = round_nsint64(values, mode, nanos) result = self._maybe_mask_results(result_i8, fill_value=iNaT) result = result.view(self._ndarray.dtype) diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index c7a44d3606fa6..185ed2911c11e 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -400,6 +400,8 @@ def _subtype_with_str(self): def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # TODO for now only handle SparseDtypes and numpy dtypes => extend # with other compatible extension dtypes + from pandas.core.dtypes.cast import np_find_common_type + if any( isinstance(x, ExtensionDtype) and not isinstance(x, SparseDtype) for x in dtypes @@ -420,5 +422,5 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: stacklevel=find_stack_level(), ) - np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes] - return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value) + np_dtypes = (x.subtype if isinstance(x, SparseDtype) else x for x in dtypes) + return SparseDtype(np_find_common_type(*np_dtypes), fill_value=fill_value) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1e2192f4c7691..2dbd9465be3c6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1122,20 +1122,31 @@ def convert_dtypes( from pandas.core.arrays.arrow.dtype import ArrowDtype from pandas.core.arrays.string_ import StringDtype - if isinstance(inferred_dtype, PandasExtensionDtype): - base_dtype = inferred_dtype.base - elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)): - base_dtype = inferred_dtype.numpy_dtype - elif isinstance(inferred_dtype, StringDtype): - base_dtype = np.dtype(str) - else: - # error: Incompatible types in assignment (expression has type - # "Union[str, Any, dtype[Any], ExtensionDtype]", - # variable has type "Union[dtype[Any], ExtensionDtype, None]") - base_dtype = inferred_dtype # type: ignore[assignment] - pa_type = to_pyarrow_type(base_dtype) - if pa_type is not None: - inferred_dtype = ArrowDtype(pa_type) + assert not isinstance(inferred_dtype, str) + + if ( + (convert_integer and inferred_dtype.kind in "iu") + or (convert_floating and inferred_dtype.kind in "fc") + or (convert_boolean and inferred_dtype.kind == "b") + or (convert_string and isinstance(inferred_dtype, StringDtype)) + or ( + inferred_dtype.kind not in "iufcb" + and not isinstance(inferred_dtype, StringDtype) + ) + ): + if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance( + inferred_dtype, DatetimeTZDtype + ): + base_dtype = inferred_dtype.base + elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)): + base_dtype = inferred_dtype.numpy_dtype + elif isinstance(inferred_dtype, StringDtype): + base_dtype = np.dtype(str) + else: + base_dtype = inferred_dtype + pa_type = to_pyarrow_type(base_dtype) + if pa_type is not None: + inferred_dtype = ArrowDtype(pa_type) # error: Incompatible return value type (got "Union[str, Union[dtype[Any], # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") @@ -1359,6 +1370,32 @@ def common_dtype_categorical_compat( return dtype +def np_find_common_type(*dtypes: np.dtype) -> np.dtype: + """ + np.find_common_type implementation pre-1.25 deprecation using np.result_type + https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065 + + Parameters + ---------- + dtypes : np.dtypes + + Returns + ------- + np.dtype + """ + try: + common_dtype = np.result_type(*dtypes) + if common_dtype.kind in "mMSU": + # NumPy promotion currently (1.25) misbehaves for for times and strings, + # so fall back to object (find_common_dtype did unless there + # was only one dtype) + common_dtype = np.dtype("O") + + except TypeError: + common_dtype = np.dtype("O") + return common_dtype + + @overload def find_common_type(types: list[np.dtype]) -> np.dtype: ... @@ -1426,7 +1463,7 @@ def find_common_type(types): if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): return np.dtype("object") - return np.find_common_type(types, []) + return np_find_common_type(*types) def construct_2d_arraylike_from_scalar( diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 28bc849088d5f..709563020778e 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -13,6 +13,7 @@ from pandas.core.dtypes.cast import ( common_dtype_categorical_compat, find_common_type, + np_find_common_type, ) from pandas.core.dtypes.common import is_dtype_equal from pandas.core.dtypes.dtypes import ( @@ -110,6 +111,8 @@ def is_nonempty(x) -> bool: # coerce to object to_concat = [x.astype("object") for x in to_concat] kinds = {"o"} + else: + target_dtype = np_find_common_type(*dtypes) result = np.concatenate(to_concat, axis=axis) if "b" in kinds and result.dtype.kind in ["i", "u", "f"]: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da61d5e88a882..7e1d8711aee86 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3816,18 +3816,8 @@ def _getitem_multilevel(self, key): if isinstance(loc, (slice, np.ndarray)): new_columns = self.columns[loc] result_columns = maybe_droplevels(new_columns, key) - if self._is_mixed_type: - result = self.reindex(columns=new_columns) - result.columns = result_columns - else: - new_values = self._values[:, loc] - result = self._constructor( - new_values, index=self.index, columns=result_columns, copy=False - ) - if using_copy_on_write() and isinstance(loc, slice): - result._mgr.add_references(self._mgr) # type: ignore[arg-type] - - result = result.__finalize__(self) + result = self.iloc[:, loc] + result.columns = result_columns # If there is only one column being returned, and its name is # either an empty string, or a tuple with an empty string as its @@ -9538,7 +9528,12 @@ def _append( "or if the Series has a name" ) - index = Index([other.name], name=self.index.name) + index = Index( + [other.name], + name=self.index.names + if isinstance(self.index, MultiIndex) + else self.index.name, + ) row_df = other.to_frame().T # infer_objects is needed for # test_append_empty_frame_to_series_with_dateutil_tz diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dc7dff74369bb..42b7fd9b635df 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3074,12 +3074,12 @@ def _nth( # error: No overload variant of "where" matches argument types # "Any", "NAType", "Any" values = np.where(nulls, NA, grouper) # type: ignore[call-overload] - grouper = Index(values, dtype="Int64") + grouper = Index(values, dtype="Int64") # type: ignore[assignment] else: # create a grouper with the original parameters, but on dropped # object - grouper, _, _ = get_grouper( + grouper, _, _ = get_grouper( # type: ignore[assignment] dropped, key=self.keys, axis=self.axis, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8969e90e6318..e2b8400188136 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2521,7 +2521,7 @@ def is_categorical(self) -> bool: Check if the Index holds categorical data. .. deprecated:: 2.0.0 - Use :meth:`pandas.api.types.is_categorical_dtype` instead. + Use `isinstance(index.dtype, pd.CategoricalDtype)` instead. Returns ------- @@ -2574,7 +2574,7 @@ def is_interval(self) -> bool: Check if the Index holds Interval objects. .. deprecated:: 2.0.0 - Use `pandas.api.types.is_interval_dtype` instead. + Use `isinstance(index.dtype, pd.IntervalDtype)` instead. Returns ------- @@ -4877,7 +4877,7 @@ def _wrap_joined_index( mask = lidx == -1 join_idx = self.take(lidx) right = other.take(ridx) - join_index = join_idx.putmask(mask, right) + join_index = join_idx.putmask(mask, right)._sort_levels_monotonic() return join_index.set_names(name) # type: ignore[return-value] else: name = get_op_result_name(self, other) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index afb5ab036b5b5..f8d78d21f74df 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1768,6 +1768,13 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"): if not isinstance(value, ABCSeries): # if not Series (in which case we need to align), # we can short-circuit + if ( + isinstance(arr, np.ndarray) + and arr.ndim == 1 + and len(arr) == 1 + ): + # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 + arr = arr[0, ...] empty_value[indexer[0]] = arr self.obj[key] = empty_value return diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 998f3bc374942..78e530f915117 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -6,6 +6,8 @@ import numpy as np +from pandas.compat._optional import import_optional_dependency + import pandas as pd from pandas.core.interchange.dataframe_protocol import ( Buffer, @@ -23,7 +25,7 @@ DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}, DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}, DtypeKind.FLOAT: {32: np.float32, 64: np.float64}, - DtypeKind.BOOL: {8: bool}, + DtypeKind.BOOL: {1: bool, 8: bool}, } @@ -154,7 +156,9 @@ def primitive_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: buffers = col.get_buffers() data_buff, data_dtype = buffers["data"] - data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size()) + data = buffer_to_ndarray( + data_buff, data_dtype, offset=col.offset, length=col.size() + ) data = set_nulls(data, col, buffers["validity"]) return data, buffers @@ -192,11 +196,16 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]: buffers = col.get_buffers() codes_buff, codes_dtype = buffers["data"] - codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size()) + codes = buffer_to_ndarray( + codes_buff, codes_dtype, offset=col.offset, length=col.size() + ) # Doing module in order to not get ``IndexError`` for # out-of-bounds sentinel values in `codes` - values = categories[codes % len(categories)] + if len(categories) > 0: + values = categories[codes % len(categories)] + else: + values = codes cat = pd.Categorical( values, categories=categories, ordered=categorical["is_ordered"] @@ -252,7 +261,7 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: Endianness.NATIVE, ) # Specify zero offset as we don't want to chunk the string data - data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size()) + data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize) # Retrieve the offsets buffer containing the index offsets demarcating # the beginning and the ending of each string @@ -261,14 +270,16 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: # meaning that it has more elements than in the data buffer, do `col.size() + 1` # here to pass a proper offsets buffer size offsets = buffer_to_ndarray( - offset_buff, offset_dtype, col.offset, length=col.size() + 1 + offset_buff, offset_dtype, offset=col.offset, length=col.size() + 1 ) null_pos = None if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK): assert buffers["validity"], "Validity buffers cannot be empty for masks" valid_buff, valid_dtype = buffers["validity"] - null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size()) + null_pos = buffer_to_ndarray( + valid_buff, valid_dtype, offset=col.offset, length=col.size() + ) if sentinel_val == 0: null_pos = ~null_pos @@ -356,8 +367,8 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: getattr(ArrowCTypes, f"UINT{dtype[1]}"), Endianness.NATIVE, ), - col.offset, - col.size(), + offset=col.offset, + length=col.size(), ) data = parse_datetime_format_str(format_str, data) @@ -368,8 +379,9 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: def buffer_to_ndarray( buffer: Buffer, dtype: tuple[DtypeKind, int, str, str], + *, + length: int, offset: int = 0, - length: int | None = None, ) -> np.ndarray: """ Build a NumPy array from the passed buffer. @@ -406,74 +418,27 @@ def buffer_to_ndarray( # and size in the buffer plus the dtype on the column. Use DLPack as NumPy supports # it since https://github.com/numpy/numpy/pull/19083 ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype) - data_pointer = ctypes.cast( - buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type) - ) if bit_width == 1: assert length is not None, "`length` must be specified for a bit-mask buffer." - arr = np.ctypeslib.as_array(data_pointer, shape=(buffer.bufsize,)) - return bitmask_to_bool_ndarray(arr, length, first_byte_offset=offset % 8) + pa = import_optional_dependency("pyarrow") + arr = pa.BooleanArray.from_buffers( + pa.bool_(), + length, + [None, pa.foreign_buffer(buffer.ptr, length)], + offset=offset, + ) + return np.asarray(arr) else: + data_pointer = ctypes.cast( + buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type) + ) return np.ctypeslib.as_array( - data_pointer, shape=(buffer.bufsize // (bit_width // 8),) + data_pointer, + shape=(length,), ) -def bitmask_to_bool_ndarray( - bitmask: np.ndarray, mask_length: int, first_byte_offset: int = 0 -) -> np.ndarray: - """ - Convert bit-mask to a boolean NumPy array. - - Parameters - ---------- - bitmask : np.ndarray[uint8] - NumPy array of uint8 dtype representing the bitmask. - mask_length : int - Number of elements in the mask to interpret. - first_byte_offset : int, default: 0 - Number of elements to offset from the start of the first byte. - - Returns - ------- - np.ndarray[bool] - """ - bytes_to_skip = first_byte_offset // 8 - bitmask = bitmask[bytes_to_skip:] - first_byte_offset %= 8 - - bool_mask = np.zeros(mask_length, dtype=bool) - - # Processing the first byte separately as it has its own offset - val = bitmask[0] - mask_idx = 0 - bits_in_first_byte = min(8 - first_byte_offset, mask_length) - for j in range(bits_in_first_byte): - if val & (1 << (j + first_byte_offset)): - bool_mask[mask_idx] = True - mask_idx += 1 - - # `mask_length // 8` describes how many full bytes to process - for i in range((mask_length - bits_in_first_byte) // 8): - # doing `+ 1` as we already processed the first byte - val = bitmask[i + 1] - for j in range(8): - if val & (1 << j): - bool_mask[mask_idx] = True - mask_idx += 1 - - if len(bitmask) > 1: - # Processing reminder of last byte - val = bitmask[-1] - for j in range(len(bool_mask) - mask_idx): - if val & (1 << j): - bool_mask[mask_idx] = True - mask_idx += 1 - - return bool_mask - - def set_nulls( data: np.ndarray | pd.Series, col: Column, @@ -509,7 +474,9 @@ def set_nulls( elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK): assert validity, "Expected to have a validity buffer for the mask" valid_buff, valid_dtype = validity - null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size()) + null_pos = buffer_to_ndarray( + valid_buff, valid_dtype, offset=col.offset, length=col.size() + ) if sentinel_val == 0: null_pos = ~null_pos elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN): diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index bb5d7e839a98c..bf48e1ff0a653 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -186,6 +186,10 @@ def setitem_inplace(self, indexer, value) -> None: # dt64/td64, which do their own validation. value = np_can_hold_element(arr.dtype, value) + if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1: + # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 + value = value[0, ...] + arr[indexer] = value def grouped_reduce(self, func): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cb336d2f718a6..b2a6b1fa39219 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1061,7 +1061,9 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: self = self.make_block_same_class( values.T if values.ndim == 2 else values ) - + if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: + # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 + casted = casted[0, ...] values[indexer] = casted return self diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index a33ce8fd5c459..e9cf7a151b627 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -28,6 +28,7 @@ from pandas.core.dtypes.cast import ( ensure_dtype_can_hold_na, find_common_type, + np_find_common_type, ) from pandas.core.dtypes.common import ( is_1d_only_ea_dtype, @@ -144,7 +145,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: target_dtype = to_concat_no_proxy[0].dtype elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes): # GH#42092 - target_dtype = np.find_common_type(list(dtypes), []) + target_dtype = np_find_common_type(*dtypes) else: target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index ef2cf8e96782d..ccd9ccfff808b 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -31,11 +31,10 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_complex_dtype, - is_datetime64_any_dtype, is_extension_array_dtype, is_numeric_dtype, - is_timedelta64_dtype, ) +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.arrays.arrow.dtype import ArrowDtype from pandas.core.arrays.floating import Float64Dtype @@ -232,9 +231,13 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series: dtype: DtypeObj | None if is_extension_array_dtype(series): if isinstance(series.dtype, ArrowDtype): - import pyarrow as pa + if series.dtype.kind == "m": + # GH53001: describe timedeltas with object dtype + dtype = None + else: + import pyarrow as pa - dtype = ArrowDtype(pa.float64()) + dtype = ArrowDtype(pa.float64()) else: dtype = Float64Dtype() elif is_numeric_dtype(series) and not is_complex_dtype(series): @@ -362,9 +365,9 @@ def select_describe_func( return describe_categorical_1d elif is_numeric_dtype(data): return describe_numeric_1d - elif is_datetime64_any_dtype(data.dtype): + elif data.dtype.kind == "M" or isinstance(data.dtype, DatetimeTZDtype): return describe_timestamp_1d - elif is_timedelta64_dtype(data.dtype): + elif data.dtype.kind == "m": return describe_numeric_1d else: return describe_categorical_1d diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index bc8f4b97d539a..79f130451a986 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -446,7 +446,7 @@ def __init__( keys = type(keys).from_tuples(clean_keys, names=keys.names) else: name = getattr(keys, "name", None) - keys = Index(clean_keys, name=name) + keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None)) if len(objs) == 0: raise ValueError("All objects passed were None") @@ -743,15 +743,19 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde for hlevel, level in zip(zipped, levels): to_concat = [] - for key, index in zip(hlevel, indexes): - # Find matching codes, include matching nan values as equal. - mask = (isna(level) & isna(key)) | (level == key) - if not mask.any(): - raise ValueError(f"Key {key} not in level {level}") - i = np.nonzero(mask)[0][0] - - to_concat.append(np.repeat(i, len(index))) - codes_list.append(np.concatenate(to_concat)) + if isinstance(hlevel, Index) and hlevel.equals(level): + lens = [len(idx) for idx in indexes] + codes_list.append(np.repeat(np.arange(len(hlevel)), lens)) + else: + for key, index in zip(hlevel, indexes): + # Find matching codes, include matching nan values as equal. + mask = (isna(level) & isna(key)) | (level == key) + if not mask.any(): + raise ValueError(f"Key {key} not in level {level}") + i = np.nonzero(mask)[0][0] + + to_concat.append(np.repeat(i, len(index))) + codes_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index bb01d551628d3..4c9cfb476586e 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -123,6 +123,10 @@ np.object_: libhashtable.ObjectFactorizer, } +# See https://github.com/pandas-dev/pandas/issues/52451 +if np.intc is not np.int32: + _factorizers[np.intc] = libhashtable.Int64Factorizer + @Substitution("\nleft : DataFrame or named Series") @Appender(_merge_doc, indents=0) @@ -1000,6 +1004,14 @@ def _maybe_add_join_keys( else: key_col = Index(lvals).where(~mask_left, rvals) result_dtype = find_common_type([lvals.dtype, rvals.dtype]) + if ( + lvals.dtype.kind == "M" + and rvals.dtype.kind == "M" + and result_dtype.kind == "O" + ): + # TODO(non-nano) Workaround for common_type not dealing + # with different resolutions + result_dtype = key_col.dtype if result._is_label_reference(name): result[name] = Series( @@ -1401,6 +1413,12 @@ def _maybe_coerce_merge_keys(self) -> None: rk.dtype, DatetimeTZDtype ): raise ValueError(msg) + elif ( + isinstance(lk.dtype, DatetimeTZDtype) + and isinstance(rk.dtype, DatetimeTZDtype) + ) or (lk.dtype.kind == "M" and rk.dtype.kind == "M"): + # allows datetime with different resolutions + continue elif lk_is_object and rk_is_object: continue @@ -2355,7 +2373,7 @@ def _factorize_keys( if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype): # Extract the ndarray (UTC-localized) values # Note: we dont need the dtypes to match, as these can still be compared - # TODO(non-nano): need to make sure resolutions match + lk, rk = cast("DatetimeArray", lk)._ensure_matching_resos(rk) lk = cast("DatetimeArray", lk)._ndarray rk = cast("DatetimeArray", rk)._ndarray diff --git a/pandas/core/series.py b/pandas/core/series.py index 29e02fdc7695d..78f4da4e65196 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1940,7 +1940,9 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: df = self._constructor_expanddim(mgr) return df.__finalize__(self, method="to_frame") - def _set_name(self, name, inplace: bool = False) -> Series: + def _set_name( + self, name, inplace: bool = False, deep: bool | None = None + ) -> Series: """ Set the Series name. @@ -1949,9 +1951,11 @@ def _set_name(self, name, inplace: bool = False) -> Series: name : str inplace : bool Whether to modify `self` directly or return a copy. + deep : bool|None, default None + Whether to do a deep copy, a shallow copy, or Copy on Write(None) """ inplace = validate_bool_kwarg(inplace, "inplace") - ser = self if inplace else self.copy() + ser = self if inplace else self.copy(deep and not using_copy_on_write()) ser.name = name return ser @@ -4770,7 +4774,7 @@ def rename( index: Renamer | Hashable | None = None, *, axis: Axis | None = None, - copy: bool = True, + copy: bool | None = None, inplace: bool = False, level: Level | None = None, errors: IgnoreRaise = "ignore", @@ -4857,7 +4861,7 @@ def rename( errors=errors, ) else: - return self._set_name(index, inplace=inplace) + return self._set_name(index, inplace=inplace, deep=copy) @Appender( """ diff --git a/pandas/io/formats/string.py b/pandas/io/formats/string.py index 071afc059b166..c143988bdc885 100644 --- a/pandas/io/formats/string.py +++ b/pandas/io/formats/string.py @@ -135,12 +135,6 @@ def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str: col_bins = _binify(col_widths, lwidth) nbins = len(col_bins) - if self.fmt.is_truncated_vertically: - assert self.fmt.max_rows_fitted is not None - nrows = self.fmt.max_rows_fitted + 1 - else: - nrows = len(self.frame) - str_lst = [] start = 0 for i, end in enumerate(col_bins): @@ -148,6 +142,7 @@ def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str: if self.fmt.index: row.insert(0, idx) if nbins > 1: + nrows = len(row[-1]) if end <= len(strcols) and i < nbins - 1: row.append([" \\"] + [" "] * (nrows - 1)) else: diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 3acb1073bac93..2db759719fcb4 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1113,7 +1113,16 @@ def _make_date_converter( if date_parser is not lib.no_default and date_format is not None: raise TypeError("Cannot use both 'date_parser' and 'date_format'") + def unpack_if_single_element(arg): + # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 + if isinstance(arg, np.ndarray) and arg.ndim == 1 and len(arg) == 1: + return arg[0] + return arg + def converter(*date_cols, col: Hashable): + if len(date_cols) == 1 and date_cols[0].dtype.kind in "Mm": + return date_cols[0] + if date_parser is lib.no_default: strs = parsing.concat_date_cols(date_cols) date_fmt = ( @@ -1136,7 +1145,9 @@ def converter(*date_cols, col: Hashable): else: try: result = tools.to_datetime( - date_parser(*date_cols), errors="ignore", cache=cache_dates + date_parser(*(unpack_if_single_element(arg) for arg in date_cols)), + errors="ignore", + cache=cache_dates, ) if isinstance(result, datetime.datetime): raise Exception("scalar parser") diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c2393fc7ada06..a627a60ef0691 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -158,7 +158,9 @@ def _convert_arrays_to_dataframe( ArrowExtensionArray(pa.array(arr, from_pandas=True)) for arr in arrays ] if arrays: - return DataFrame(dict(zip(columns, arrays))) + df = DataFrame(dict(zip(list(range(len(columns))), arrays))) + df.columns = columns + return df else: return DataFrame(columns=columns) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 1d8f1dea7d478..c220c46bdc8f8 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -135,6 +135,7 @@ def test_methods_copy_keyword( "method", [ lambda ser, copy: ser.rename(index={0: 100}, copy=copy), + lambda ser, copy: ser.rename(None, copy=copy), lambda ser, copy: ser.reindex(index=ser.index, copy=copy), lambda ser, copy: ser.reindex_like(ser, copy=copy), lambda ser, copy: ser.align(ser, copy=copy)[0], @@ -152,6 +153,7 @@ def test_methods_copy_keyword( lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy), ], ids=[ + "rename (dict)", "rename", "reindex", "reindex_like", diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 650eb033dcd9e..a65fa240bf7f3 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1012,9 +1012,7 @@ def test_maybe_convert_objects_itemsize(self, data0, data1): data = [data0, data1] arr = np.array(data, dtype="object") - common_kind = np.find_common_type( - [type(data0), type(data1)], scalar_types=[] - ).kind + common_kind = np.result_type(type(data0), type(data1)).kind kind0 = "python" if not hasattr(data0, "dtype") else data0.dtype.kind kind1 = "python" if not hasattr(data1, "dtype") else data1.dtype.kind if kind0 != "python" and kind1 != "python": diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d557f5efc0a8a..8907137c71844 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -131,7 +131,7 @@ def data(dtype): @pytest.fixture def data_missing(data): """Length-2 array with [NA, Valid]""" - return type(data)._from_sequence([None, data[0]]) + return type(data)._from_sequence([None, data[0]], dtype=data.dtype) @pytest.fixture(params=["data", "data_missing"]) @@ -214,7 +214,8 @@ def data_for_sorting(data_for_grouping): A < B < C """ return type(data_for_grouping)._from_sequence( - [data_for_grouping[0], data_for_grouping[7], data_for_grouping[4]] + [data_for_grouping[0], data_for_grouping[7], data_for_grouping[4]], + dtype=data_for_grouping.dtype, ) @@ -227,7 +228,8 @@ def data_missing_for_sorting(data_for_grouping): A < B and NA missing. """ return type(data_for_grouping)._from_sequence( - [data_for_grouping[0], data_for_grouping[2], data_for_grouping[4]] + [data_for_grouping[0], data_for_grouping[2], data_for_grouping[4]], + dtype=data_for_grouping.dtype, ) @@ -1578,7 +1580,8 @@ def test_mode_dropna_false_mode_na(data): [pa.large_string(), str], [pa.list_(pa.int64()), list], [pa.large_list(pa.int64()), list], - [pa.map_(pa.string(), pa.int64()), dict], + [pa.map_(pa.string(), pa.int64()), list], + [pa.struct([("f1", pa.int8()), ("f2", pa.string())]), dict], [pa.dictionary(pa.int64(), pa.int64()), CategoricalDtypeType], ], ) @@ -1829,6 +1832,20 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series): arr.searchsorted(b) +def test_sort_values_dictionary(): + df = pd.DataFrame( + { + "a": pd.Series( + ["x", "y"], dtype=ArrowDtype(pa.dictionary(pa.int32(), pa.string())) + ), + "b": [1, 2], + }, + ) + expected = df.copy() + result = df.sort_values(by=["a", "b"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("pat", ["abc", "a[a-z]{2}"]) def test_str_count(pat): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) @@ -2510,6 +2527,17 @@ def test_dt_to_pydatetime(): tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize("date_type", [32, 64]) +def test_dt_to_pydatetime_date_error(date_type): + # GH 52812 + ser = pd.Series( + [date(2022, 12, 31)], + dtype=ArrowDtype(getattr(pa, f"date{date_type}")()), + ) + with pytest.raises(ValueError, match="to_pydatetime cannot be called with"): + ser.dt.to_pydatetime() + + def test_dt_tz_localize_unsupported_tz_options(): ser = pd.Series( [datetime(year=2023, month=1, day=2, hour=3), None], @@ -2618,6 +2646,20 @@ def test_setitem_boolean_replace_with_mask_segfault(): assert arr._data == expected._data +@pytest.mark.parametrize( + "data, arrow_dtype", + [ + ([b"a", b"b"], pa.large_binary()), + (["a", "b"], pa.large_string()), + ], +) +def test_conversion_large_dtypes_from_numpy_array(data, arrow_dtype): + dtype = ArrowDtype(arrow_dtype) + result = pd.array(np.array(data), dtype=dtype) + expected = pd.array(data, dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES) def test_describe_numeric_data(pa_type): # GH 52470 @@ -2631,6 +2673,36 @@ def test_describe_numeric_data(pa_type): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("pa_type", tm.TIMEDELTA_PYARROW_DTYPES) +def test_describe_timedelta_data(pa_type): + # GH53001 + data = pd.Series(range(1, 10), dtype=ArrowDtype(pa_type)) + result = data.describe() + expected = pd.Series( + [9] + pd.to_timedelta([5, 2, 1, 3, 5, 7, 9], unit=pa_type.unit).tolist(), + dtype=object, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("pa_type", tm.DATETIME_PYARROW_DTYPES) +def test_describe_datetime_data(pa_type): + # GH53001 + data = pd.Series(range(1, 10), dtype=ArrowDtype(pa_type)) + result = data.describe() + expected = pd.Series( + [9] + + [ + pd.Timestamp(v, tz=pa_type.tz, unit=pa_type.unit) + for v in [5, 1, 3, 5, 7, 9] + ], + dtype=object, + index=["count", "mean", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail( pa_version_under8p0, reason="Function 'add_checked' has no kernel matching input types", diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 6076933eecec4..2adee158379bb 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -53,7 +53,8 @@ def test_pyarrow_dtype_backend(self): "c": pd.Series([True, False, None], dtype=np.dtype("O")), "d": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), "e": pd.Series(pd.date_range("2022", periods=3)), - "f": pd.Series(pd.timedelta_range("1D", periods=3)), + "f": pd.Series(pd.date_range("2022", periods=3, tz="UTC").as_unit("s")), + "g": pd.Series(pd.timedelta_range("1D", periods=3)), } ) result = df.convert_dtypes(dtype_backend="pyarrow") @@ -76,6 +77,16 @@ def test_pyarrow_dtype_backend(self): ) ), "f": pd.arrays.ArrowExtensionArray( + pa.array( + [ + datetime.datetime(2022, 1, 1), + datetime.datetime(2022, 1, 2), + datetime.datetime(2022, 1, 3), + ], + type=pa.timestamp(unit="s", tz="UTC"), + ) + ), + "g": pd.arrays.ArrowExtensionArray( pa.array( [ datetime.timedelta(1), @@ -134,3 +145,17 @@ def test_pyarrow_engine_lines_false(self): ) with pytest.raises(ValueError, match=msg): df.convert_dtypes(dtype_backend="numpy") + + def test_pyarrow_backend_no_convesion(self): + # GH#52872 + pytest.importorskip("pyarrow") + df = pd.DataFrame({"a": [1, 2], "b": 1.5, "c": True, "d": "x"}) + expected = df.copy() + result = df.convert_dtypes( + convert_floating=False, + convert_integer=False, + convert_boolean=False, + convert_string=False, + dtype_backend="pyarrow", + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 31a8e7a7d36ac..1ef5bc9925f95 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -620,7 +620,7 @@ def test_categorical_transformers( result = getattr(gb_keepna, transformation_func)(*args) expected = getattr(gb_dropna, transformation_func)(*args) for iloc, value in zip( - df[df["x"].isnull()].index.tolist(), null_group_result.values + df[df["x"].isnull()].index.tolist(), null_group_result.values.ravel() ): if expected.ndim == 1: expected.iloc[iloc] = value diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 4fff862961920..c5a3512113655 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -257,3 +257,15 @@ def test_join_dtypes_all_nan(any_numeric_ea_dtype): ] ) tm.assert_index_equal(result, expected) + + +def test_join_index_levels(): + # GH#53093 + midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")]) + midx2 = MultiIndex.from_tuples([("a", "2019-01-31")]) + result = midx.join(midx2, how="outer") + expected = MultiIndex.from_tuples( + [("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")] + ) + tm.assert_index_equal(result.levels[1], expected.levels[1]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 8e507212976ec..22a6f62f53392 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -6,12 +6,14 @@ import pandas as pd from pandas import ( + CategoricalDtype, DataFrame, Index, MultiIndex, Series, ) import pandas._testing as tm +from pandas.core.arrays.boolean import BooleanDtype class TestMultiIndexBasic: @@ -206,3 +208,21 @@ def test_multiindex_with_na_missing_key(self): ) with pytest.raises(KeyError, match="missing_key"): df[[("missing_key",)]] + + def test_multiindex_dtype_preservation(self): + # GH51261 + columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"]) + df = DataFrame(["value"], columns=columns).astype("category") + df_no_multiindex = df["A"] + assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype) + + # geopandas 1763 analogue + df = DataFrame( + [[1, 0], [0, 1]], + columns=[ + ["foo", "foo"], + ["location", "location"], + ["x", "y"], + ], + ).assign(bools=Series([True, False], dtype="boolean")) + assert isinstance(df["bools"].dtype, BooleanDtype) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 27b4a4be73032..e6f44359a1a62 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -479,6 +479,21 @@ def test_setitem_new_column_all_na(self): df["new"] = s assert df["new"].isna().all() + def test_setitem_enlargement_keep_index_names(self): + # GH#53053 + mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"]) + df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"]) + df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)] + mi_expected = MultiIndex.from_tuples( + [(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"] + ) + expected = DataFrame( + data=[[10, 20, 30], [10, 20, 30]], + index=mi_expected, + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(df, expected) + @td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values # is not a view diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index a9835b8641e7d..301cccec9e0ed 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -89,6 +89,18 @@ def test_categorical_pyarrow(): tm.assert_frame_equal(result, expected) +def test_empty_categorical_pyarrow(): + # https://github.com/pandas-dev/pandas/issues/53077 + pa = pytest.importorskip("pyarrow", "11.0.0") + + arr = [None] + table = pa.table({"arr": pa.array(arr, "float64").dictionary_encode()}) + exchange_df = table.__dataframe__() + result = pd.api.interchange.from_dataframe(exchange_df) + expected = pd.DataFrame({"arr": pd.Categorical([np.nan])}) + tm.assert_frame_equal(result, expected) + + def test_large_string_pyarrow(): # GH 52795 pa = pytest.importorskip("pyarrow", "11.0.0") @@ -104,6 +116,32 @@ def test_large_string_pyarrow(): assert pa.Table.equals(pa.interchange.from_dataframe(result), table) +@pytest.mark.parametrize( + ("offset", "length", "expected_values"), + [ + (0, None, [3.3, float("nan"), 2.1]), + (1, None, [float("nan"), 2.1]), + (2, None, [2.1]), + (0, 2, [3.3, float("nan")]), + (0, 1, [3.3]), + (1, 1, [float("nan")]), + ], +) +def test_bitmasks_pyarrow(offset, length, expected_values): + # GH 52795 + pa = pytest.importorskip("pyarrow", "11.0.0") + + arr = [3.3, None, 2.1] + table = pa.table({"arr": arr}).slice(offset, length) + exchange_df = table.__dataframe__() + result = from_dataframe(exchange_df) + expected = pd.DataFrame({"arr": expected_values}) + tm.assert_frame_equal(result, expected) + + # check round-trip + assert pa.Table.equals(pa.interchange.from_dataframe(result), table) + + @pytest.mark.parametrize( "data", [int_data, uint_data, float_data, bool_data, datetime_data] ) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0d84ecf955700..fcb7f6657beac 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1397,25 +1397,100 @@ def test_to_string_no_index(self): assert df_s == expected def test_to_string_line_width_no_index(self): - # GH 13998, GH 22505, # GH 49230 + # GH 13998, GH 22505 df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \n 1 \\\n 2 \n 3 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n 1 \n 2 \n 3 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \n11 \\\n22 \n33 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \n 11 \\\n 22 \n-33 \n\n y \n 4 \n 5 \n-6 " + expected = " x \\\n 11 \n 22 \n-33 \n\n y \n 4 \n 5 \n-6 " + + assert df_s == expected + + def test_to_string_line_width_no_header(self): + # GH 53054 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, header=False) + expected = "0 1 \\\n1 2 \n2 3 \n\n0 4 \n1 5 \n2 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, header=False) + expected = "0 11 \\\n1 22 \n2 33 \n\n0 4 \n1 5 \n2 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, header=False) + expected = "0 11 \\\n1 22 \n2 -33 \n\n0 4 \n1 5 \n2 -6 " + + assert df_s == expected + + def test_to_string_line_width_no_index_no_header(self): + # GH 53054 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False, header=False) + expected = "1 \\\n2 \n3 \n\n4 \n5 \n6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False, header=False) + expected = "11 \\\n22 \n33 \n\n4 \n5 \n6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, index=False, header=False) + expected = " 11 \\\n 22 \n-33 \n\n 4 \n 5 \n-6 " + + assert df_s == expected + + def test_to_string_line_width_with_both_index_and_header(self): + # GH 53054 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1) + expected = ( + " x \\\n0 1 \n1 2 \n2 3 \n\n y \n0 4 \n1 5 \n2 6 " + ) + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1) + expected = ( + " x \\\n0 11 \n1 22 \n2 33 \n\n y \n0 4 \n1 5 \n2 6 " + ) + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1) + expected = ( + " x \\\n0 11 \n1 22 \n2 -33 \n\n y \n0 4 \n1 5 \n2 -6 " + ) assert df_s == expected diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 8c3474220cde8..94f4066ea1cb2 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2218,3 +2218,23 @@ def test_parse_dates_dict_format_index(all_parsers): index=Index([Timestamp("2019-12-31"), Timestamp("2020-12-31")], name="a"), ) tm.assert_frame_equal(result, expected) + + +def test_parse_dates_arrow_engine(all_parsers): + # GH#53295 + parser = all_parsers + data = """a,b +2000-01-01 00:00:00,1 +2000-01-01 00:00:01,1""" + + result = parser.read_csv(StringIO(data), parse_dates=["a"]) + expected = DataFrame( + { + "a": [ + Timestamp("2000-01-01 00:00:00"), + Timestamp("2000-01-01 00:00:01"), + ], + "b": 1, + } + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d2fb4a8426cf8..b749a863a3937 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1496,6 +1496,18 @@ def test_escaped_table_name(self): tm.assert_frame_equal(res, df) + def test_read_sql_duplicate_columns(self): + # GH#53117 + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) + df.to_sql("test_table", self.conn, index=False) + + result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) + expected = DataFrame( + [[1, 0.1, 2, 1], [2, 0.2, 3, 1], [3, 0.3, 4, 1]], + columns=["a", "b", "a", "c"], + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="SQLAlchemy not installed") class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a7b007f043da9..9d1346b4ad073 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1463,7 +1463,9 @@ def test_different(self, right_vals): result = merge(left, right, on="A") assert is_object_dtype(result.A.dtype) - @pytest.mark.parametrize("d1", [np.int64, np.int32, np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize( + "d1", [np.int64, np.int32, np.intc, np.int16, np.int8, np.uint8] + ) @pytest.mark.parametrize("d2", [np.int64, np.float64, np.float32, np.float16]) def test_join_multi_dtypes(self, d1, d2): dtype1 = np.dtype(d1) @@ -2750,3 +2752,30 @@ def test_merge_arrow_and_numpy_dtypes(dtype): result = df2.merge(df) expected = df2.copy() tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("how", ["inner", "left", "outer", "right"]) +@pytest.mark.parametrize("tz", [None, "America/Chicago"]) +def test_merge_datetime_different_resolution(tz, how): + # https://github.com/pandas-dev/pandas/issues/53200 + vals = [ + pd.Timestamp(2023, 5, 12, tz=tz), + pd.Timestamp(2023, 5, 13, tz=tz), + pd.Timestamp(2023, 5, 14, tz=tz), + ] + df1 = DataFrame({"t": vals[:2], "a": [1.0, 2.0]}) + df1["t"] = df1["t"].dt.as_unit("ns") + df2 = DataFrame({"t": vals[1:], "b": [1.0, 2.0]}) + df2["t"] = df2["t"].dt.as_unit("s") + + expected = DataFrame({"t": vals, "a": [1.0, 2.0, np.nan], "b": [np.nan, 1.0, 2.0]}) + expected["t"] = expected["t"].dt.as_unit("ns") + if how == "inner": + expected = expected.iloc[[1]].reset_index(drop=True) + elif how == "left": + expected = expected.iloc[[0, 1]] + elif how == "right": + expected = expected.iloc[[1, 2]].reset_index(drop=True) + + result = df1.merge(df2, on="t", how=how) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 74371830f3a19..e211bc233b9cf 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -392,6 +392,8 @@ def test_dt_round_nonnano_higher_resolution_no_op(self, freq): result = ser.dt.round(freq) tm.assert_series_equal(result, expected) + assert not np.shares_memory(ser.array._ndarray, result.array._ndarray) + def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 82c2375ffd628..e741fd310eb41 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2953,6 +2953,9 @@ class TestDatetimeParsingWrappers: ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15)), ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0)), ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15)), + ("2005/11/09 10:15:32", datetime(2005, 11, 9, 10, 15, 32)), + ("2005/11/09 10:15:32 AM", datetime(2005, 11, 9, 10, 15, 32)), + ("2005/11/09 10:15:32 PM", datetime(2005, 11, 9, 22, 15, 32)), ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0)), ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28)), ("Thu Sep 25 2003", datetime(2003, 9, 25)), diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 499bcae5e90f0..1d969e648b752 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -510,6 +510,8 @@ def test_ignore_downcast_neg_to_unsigned(): tm.assert_numpy_array_equal(res, expected) +# Warning in 32 bit platforms +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") @pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"]) @pytest.mark.parametrize( "data,expected", diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 6500afdf87beb..e33c6e37ac0e7 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -201,8 +201,10 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000+09:", None), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), - ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), - ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), + ("Tue 24 Aug 2021 01:30:48", "%a %d %b %Y %H:%M:%S"), + ("Tuesday 24 Aug 2021 01:30:48", "%A %d %b %Y %H:%M:%S"), + ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %I:%M:%S %p"), + ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %I:%M:%S %p"), ("27.03.2003 14:55:00.000", "%d.%m.%Y %H:%M:%S.%f"), # GH50317 ], ) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 968da7cf60105..d0e393e41c623 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -195,7 +195,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: else: new_arg_value = old_arg_value msg = ( - f"the {repr(old_arg_name)}' keyword is deprecated, " + f"the {repr(old_arg_name)} keyword is deprecated, " f"use {repr(new_arg_name)} instead." )